feat: automatische Orientierungserkennung fuer umgedrehte Scans
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 23s
CI / test-go-edu-search (push) Successful in 25s
CI / test-python-klausur (push) Failing after 1m50s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 15s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 23s
CI / test-go-edu-search (push) Successful in 25s
CI / test-python-klausur (push) Failing after 1m50s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 15s
Tesseract OSD erkennt 0/90/180/270° Rotation und korrigiert automatisch vor dem Deskew. Loest das Problem mit Buchscannern, bei denen jede 2. Seite auf dem Kopf steht. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -224,6 +224,54 @@ def render_image_high_res(image_data: bytes) -> np.ndarray:
|
|||||||
return img_bgr
|
return img_bgr
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Stage 1b: Orientation Detection (0°/90°/180°/270°)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
def detect_and_fix_orientation(img_bgr: np.ndarray) -> Tuple[np.ndarray, int]:
|
||||||
|
"""Detect page orientation via Tesseract OSD and rotate if needed.
|
||||||
|
|
||||||
|
Handles upside-down scans (180°) common with book scanners where
|
||||||
|
every other page is flipped due to the scanner hinge.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(corrected_image, rotation_degrees) — rotation is 0, 90, 180, or 270.
|
||||||
|
"""
|
||||||
|
if pytesseract is None:
|
||||||
|
return img_bgr, 0
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Tesseract OSD needs a grayscale or RGB image
|
||||||
|
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
|
||||||
|
pil_img = Image.fromarray(gray)
|
||||||
|
|
||||||
|
osd = pytesseract.image_to_osd(pil_img, output_type=pytesseract.Output.DICT)
|
||||||
|
rotate = osd.get("rotate", 0)
|
||||||
|
confidence = osd.get("orientation_conf", 0.0)
|
||||||
|
|
||||||
|
logger.info(f"OSD: orientation={rotate}° confidence={confidence:.1f}")
|
||||||
|
|
||||||
|
if rotate == 0 or confidence < 1.0:
|
||||||
|
return img_bgr, 0
|
||||||
|
|
||||||
|
# Apply rotation
|
||||||
|
if rotate == 180:
|
||||||
|
corrected = cv2.rotate(img_bgr, cv2.ROTATE_180)
|
||||||
|
elif rotate == 90:
|
||||||
|
corrected = cv2.rotate(img_bgr, cv2.ROTATE_90_COUNTERCLOCKWISE)
|
||||||
|
elif rotate == 270:
|
||||||
|
corrected = cv2.rotate(img_bgr, cv2.ROTATE_90_CLOCKWISE)
|
||||||
|
else:
|
||||||
|
return img_bgr, 0
|
||||||
|
|
||||||
|
logger.info(f"OSD: rotated {rotate}° to fix orientation")
|
||||||
|
return corrected, rotate
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"OSD orientation detection failed: {e}")
|
||||||
|
return img_bgr, 0
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Stage 2: Deskew (Rotation Correction)
|
# Stage 2: Deskew (Rotation Correction)
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|||||||
@@ -71,6 +71,7 @@ try:
|
|||||||
detect_row_geometry, build_cell_grid_v2,
|
detect_row_geometry, build_cell_grid_v2,
|
||||||
_cells_to_vocab_entries, _detect_sub_columns, _detect_header_footer_gaps,
|
_cells_to_vocab_entries, _detect_sub_columns, _detect_header_footer_gaps,
|
||||||
expand_narrow_columns, positional_column_regions, llm_review_entries,
|
expand_narrow_columns, positional_column_regions, llm_review_entries,
|
||||||
|
detect_and_fix_orientation,
|
||||||
_fix_phonetic_brackets,
|
_fix_phonetic_brackets,
|
||||||
render_pdf_high_res,
|
render_pdf_high_res,
|
||||||
PageRegion, RowGeometry,
|
PageRegion, RowGeometry,
|
||||||
@@ -1360,6 +1361,15 @@ async def _run_ocr_pipeline_for_page(
|
|||||||
img_h, img_w = img_bgr.shape[:2]
|
img_h, img_w = img_bgr.shape[:2]
|
||||||
logger.info(f"OCR Pipeline page {page_number + 1}: image {img_w}x{img_h}")
|
logger.info(f"OCR Pipeline page {page_number + 1}: image {img_w}x{img_h}")
|
||||||
|
|
||||||
|
# 1b. Orientation detection (fix upside-down scans)
|
||||||
|
t0 = _time.time()
|
||||||
|
img_bgr, rotation = detect_and_fix_orientation(img_bgr)
|
||||||
|
if rotation:
|
||||||
|
img_h, img_w = img_bgr.shape[:2]
|
||||||
|
logger.info(f" orientation: rotated {rotation}° ({_time.time() - t0:.1f}s)")
|
||||||
|
else:
|
||||||
|
logger.info(f" orientation: OK ({_time.time() - t0:.1f}s)")
|
||||||
|
|
||||||
# 2. Create pipeline session in DB (for debugging in admin UI)
|
# 2. Create pipeline session in DB (for debugging in admin UI)
|
||||||
pipeline_session_id = str(uuid.uuid4())
|
pipeline_session_id = str(uuid.uuid4())
|
||||||
try:
|
try:
|
||||||
|
|||||||
Reference in New Issue
Block a user