diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py index e1c7725..f2120e5 100644 --- a/klausur-service/backend/cv_vocab_pipeline.py +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -224,6 +224,54 @@ def render_image_high_res(image_data: bytes) -> np.ndarray: return img_bgr +# ============================================================================= +# Stage 1b: Orientation Detection (0°/90°/180°/270°) +# ============================================================================= + +def detect_and_fix_orientation(img_bgr: np.ndarray) -> Tuple[np.ndarray, int]: + """Detect page orientation via Tesseract OSD and rotate if needed. + + Handles upside-down scans (180°) common with book scanners where + every other page is flipped due to the scanner hinge. + + Returns: + (corrected_image, rotation_degrees) — rotation is 0, 90, 180, or 270. + """ + if pytesseract is None: + return img_bgr, 0 + + try: + # Tesseract OSD needs a grayscale or RGB image + gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY) + pil_img = Image.fromarray(gray) + + osd = pytesseract.image_to_osd(pil_img, output_type=pytesseract.Output.DICT) + rotate = osd.get("rotate", 0) + confidence = osd.get("orientation_conf", 0.0) + + logger.info(f"OSD: orientation={rotate}° confidence={confidence:.1f}") + + if rotate == 0 or confidence < 1.0: + return img_bgr, 0 + + # Apply rotation + if rotate == 180: + corrected = cv2.rotate(img_bgr, cv2.ROTATE_180) + elif rotate == 90: + corrected = cv2.rotate(img_bgr, cv2.ROTATE_90_COUNTERCLOCKWISE) + elif rotate == 270: + corrected = cv2.rotate(img_bgr, cv2.ROTATE_90_CLOCKWISE) + else: + return img_bgr, 0 + + logger.info(f"OSD: rotated {rotate}° to fix orientation") + return corrected, rotate + + except Exception as e: + logger.warning(f"OSD orientation detection failed: {e}") + return img_bgr, 0 + + # ============================================================================= # Stage 2: Deskew (Rotation Correction) # ============================================================================= diff --git a/klausur-service/backend/vocab_worksheet_api.py b/klausur-service/backend/vocab_worksheet_api.py index a8e86f5..90a44bf 100644 --- a/klausur-service/backend/vocab_worksheet_api.py +++ b/klausur-service/backend/vocab_worksheet_api.py @@ -71,6 +71,7 @@ try: detect_row_geometry, build_cell_grid_v2, _cells_to_vocab_entries, _detect_sub_columns, _detect_header_footer_gaps, expand_narrow_columns, positional_column_regions, llm_review_entries, + detect_and_fix_orientation, _fix_phonetic_brackets, render_pdf_high_res, PageRegion, RowGeometry, @@ -1360,6 +1361,15 @@ async def _run_ocr_pipeline_for_page( img_h, img_w = img_bgr.shape[:2] logger.info(f"OCR Pipeline page {page_number + 1}: image {img_w}x{img_h}") + # 1b. Orientation detection (fix upside-down scans) + t0 = _time.time() + img_bgr, rotation = detect_and_fix_orientation(img_bgr) + if rotation: + img_h, img_w = img_bgr.shape[:2] + logger.info(f" orientation: rotated {rotation}° ({_time.time() - t0:.1f}s)") + else: + logger.info(f" orientation: OK ({_time.time() - t0:.1f}s)") + # 2. Create pipeline session in DB (for debugging in admin UI) pipeline_session_id = str(uuid.uuid4()) try: