diff --git a/klausur-service/backend/vocab_worksheet_api.py b/klausur-service/backend/vocab_worksheet_api.py index 01a6c2f..c8fe2d1 100644 --- a/klausur-service/backend/vocab_worksheet_api.py +++ b/klausur-service/backend/vocab_worksheet_api.py @@ -1290,6 +1290,9 @@ async def process_single_page( page_number: int, ipa_mode: str = Query("none", pattern="^(auto|all|de|en|none)$"), syllable_mode: str = Query("none", pattern="^(auto|all|de|en|none)$"), + enhance: bool = Query(True, description="Step 3: CLAHE + Denoise for degraded scans"), + max_cols: int = Query(3, description="Step 2: Max column count (0=unlimited)"), + min_conf: int = Query(0, description="Step 1: Min OCR confidence (0=auto from quality score)"), ): """ Process a SINGLE page of an uploaded PDF using the Kombi OCR pipeline. @@ -1300,6 +1303,9 @@ async def process_single_page( Query params: ipa_mode: "none" (default), "auto", "all", "en", "de" syllable_mode: "none" (default), "auto", "all", "en", "de" + enhance: true (default) — apply CLAHE/denoise for degraded scans + max_cols: 3 (default) — max column count (0=unlimited) + min_conf: 0 (default=auto) — min OCR confidence (0=from quality score) The frontend should call this sequentially for each page. Returns the vocabulary for just this one page. @@ -1332,6 +1338,9 @@ async def process_single_page( page_vocabulary, rotation_deg, quality_report = await _run_ocr_pipeline_for_page( img_bgr, page_number, session_id, ipa_mode=ipa_mode, syllable_mode=syllable_mode, + enable_enhance=enhance, + max_columns=max_cols if max_cols > 0 else None, + override_min_conf=min_conf if min_conf > 0 else None, ) except Exception as e: logger.error(f"OCR pipeline failed for page {page_number + 1}: {e}", exc_info=True) @@ -1395,11 +1404,17 @@ async def process_single_page( "rotation": rotation_deg, } - # Add scan quality report if available + # Add scan quality report + active steps info if quality_report: - result["scan_quality"] = quality_report.to_dict() + sq = quality_report.to_dict() + sq["active_steps"] = { + "step1_confidence": f"min_conf={min_ocr_conf}" if not override_min_conf else f"min_conf={override_min_conf} (override)", + "step2_max_columns": f"max_cols={max_columns}" if max_columns else "unlimited", + "step3_enhance": "on" if enable_enhance and quality_report.is_degraded else "off", + } + result["scan_quality"] = sq else: - quality_report = None # ensure variable exists for non-pipeline path + quality_report = None return result @@ -1411,6 +1426,9 @@ async def _run_ocr_pipeline_for_page( *, ipa_mode: str = "none", syllable_mode: str = "none", + enable_enhance: bool = True, + max_columns: Optional[int] = 3, + override_min_conf: Optional[int] = None, ) -> tuple: """Run the full Kombi OCR pipeline on a single page and return vocab entries. @@ -1488,11 +1506,14 @@ async def _run_ocr_pipeline_for_page( except Exception as e: logger.warning(f" scan quality: failed ({e})") - min_ocr_conf = scan_quality_report.recommended_min_conf if scan_quality_report else 40 + if override_min_conf: + min_ocr_conf = override_min_conf + else: + min_ocr_conf = scan_quality_report.recommended_min_conf if scan_quality_report else 40 # 5c. Image enhancement for degraded scans is_degraded = scan_quality_report.is_degraded if scan_quality_report else False - if is_degraded: + if is_degraded and enable_enhance: try: from ocr_image_enhance import enhance_for_ocr dewarped_bgr = enhance_for_ocr(dewarped_bgr, is_degraded=True) @@ -1547,8 +1568,8 @@ async def _run_ocr_pipeline_for_page( else: merged_words = tess_words # fallback to Tesseract only - # Build initial grid from merged words (limit to 3 columns for vocab tables) - cells, columns_meta = build_grid_from_words(merged_words, img_w, img_h, max_columns=3) + # Build initial grid from merged words + cells, columns_meta = build_grid_from_words(merged_words, img_w, img_h, max_columns=max_columns) for cell in cells: cell["ocr_engine"] = "rapid_kombi" diff --git a/studio-v2/app/vocab-worksheet/components/VocabularyTab.tsx b/studio-v2/app/vocab-worksheet/components/VocabularyTab.tsx index 82af7a8..61a06bd 100644 --- a/studio-v2/app/vocab-worksheet/components/VocabularyTab.tsx +++ b/studio-v2/app/vocab-worksheet/components/VocabularyTab.tsx @@ -120,6 +120,36 @@ export function VocabularyTab({ h }: { h: VocabWorksheetHook }) { + {/* OCR Quality Steps (A/B Testing) */} +