From 545c8676b0949b6bff8c4823db3a190d092c2bb5 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Thu, 23 Apr 2026 15:27:26 +0200 Subject: [PATCH] Add A/B testing toggles for OCR quality steps Each quality improvement step can now be toggled independently: - CLAHE checkbox (Step 3: image enhancement on/off) - MaxCols dropdown (Step 2: 0=unlimited, 2-5) - MinConf dropdown (Step 1: auto/20/30/40/50/60) Backend: Query params enhance, max_cols, min_conf on process-single-page. Response includes active_steps dict showing which steps are enabled. Frontend: Toggle controls in VocabularyTab above the table. This allows empirical A/B testing of each step on the same scan. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../backend/vocab_worksheet_api.py | 35 +++++++++++++++---- .../components/VocabularyTab.tsx | 30 ++++++++++++++++ studio-v2/app/vocab-worksheet/types.ts | 8 +++++ .../app/vocab-worksheet/useVocabWorksheet.ts | 24 +++++++++++-- 4 files changed, 88 insertions(+), 9 deletions(-) diff --git a/klausur-service/backend/vocab_worksheet_api.py b/klausur-service/backend/vocab_worksheet_api.py index 01a6c2f..c8fe2d1 100644 --- a/klausur-service/backend/vocab_worksheet_api.py +++ b/klausur-service/backend/vocab_worksheet_api.py @@ -1290,6 +1290,9 @@ async def process_single_page( page_number: int, ipa_mode: str = Query("none", pattern="^(auto|all|de|en|none)$"), syllable_mode: str = Query("none", pattern="^(auto|all|de|en|none)$"), + enhance: bool = Query(True, description="Step 3: CLAHE + Denoise for degraded scans"), + max_cols: int = Query(3, description="Step 2: Max column count (0=unlimited)"), + min_conf: int = Query(0, description="Step 1: Min OCR confidence (0=auto from quality score)"), ): """ Process a SINGLE page of an uploaded PDF using the Kombi OCR pipeline. @@ -1300,6 +1303,9 @@ async def process_single_page( Query params: ipa_mode: "none" (default), "auto", "all", "en", "de" syllable_mode: "none" (default), "auto", "all", "en", "de" + enhance: true (default) — apply CLAHE/denoise for degraded scans + max_cols: 3 (default) — max column count (0=unlimited) + min_conf: 0 (default=auto) — min OCR confidence (0=from quality score) The frontend should call this sequentially for each page. Returns the vocabulary for just this one page. @@ -1332,6 +1338,9 @@ async def process_single_page( page_vocabulary, rotation_deg, quality_report = await _run_ocr_pipeline_for_page( img_bgr, page_number, session_id, ipa_mode=ipa_mode, syllable_mode=syllable_mode, + enable_enhance=enhance, + max_columns=max_cols if max_cols > 0 else None, + override_min_conf=min_conf if min_conf > 0 else None, ) except Exception as e: logger.error(f"OCR pipeline failed for page {page_number + 1}: {e}", exc_info=True) @@ -1395,11 +1404,17 @@ async def process_single_page( "rotation": rotation_deg, } - # Add scan quality report if available + # Add scan quality report + active steps info if quality_report: - result["scan_quality"] = quality_report.to_dict() + sq = quality_report.to_dict() + sq["active_steps"] = { + "step1_confidence": f"min_conf={min_ocr_conf}" if not override_min_conf else f"min_conf={override_min_conf} (override)", + "step2_max_columns": f"max_cols={max_columns}" if max_columns else "unlimited", + "step3_enhance": "on" if enable_enhance and quality_report.is_degraded else "off", + } + result["scan_quality"] = sq else: - quality_report = None # ensure variable exists for non-pipeline path + quality_report = None return result @@ -1411,6 +1426,9 @@ async def _run_ocr_pipeline_for_page( *, ipa_mode: str = "none", syllable_mode: str = "none", + enable_enhance: bool = True, + max_columns: Optional[int] = 3, + override_min_conf: Optional[int] = None, ) -> tuple: """Run the full Kombi OCR pipeline on a single page and return vocab entries. @@ -1488,11 +1506,14 @@ async def _run_ocr_pipeline_for_page( except Exception as e: logger.warning(f" scan quality: failed ({e})") - min_ocr_conf = scan_quality_report.recommended_min_conf if scan_quality_report else 40 + if override_min_conf: + min_ocr_conf = override_min_conf + else: + min_ocr_conf = scan_quality_report.recommended_min_conf if scan_quality_report else 40 # 5c. Image enhancement for degraded scans is_degraded = scan_quality_report.is_degraded if scan_quality_report else False - if is_degraded: + if is_degraded and enable_enhance: try: from ocr_image_enhance import enhance_for_ocr dewarped_bgr = enhance_for_ocr(dewarped_bgr, is_degraded=True) @@ -1547,8 +1568,8 @@ async def _run_ocr_pipeline_for_page( else: merged_words = tess_words # fallback to Tesseract only - # Build initial grid from merged words (limit to 3 columns for vocab tables) - cells, columns_meta = build_grid_from_words(merged_words, img_w, img_h, max_columns=3) + # Build initial grid from merged words + cells, columns_meta = build_grid_from_words(merged_words, img_w, img_h, max_columns=max_columns) for cell in cells: cell["ocr_engine"] = "rapid_kombi" diff --git a/studio-v2/app/vocab-worksheet/components/VocabularyTab.tsx b/studio-v2/app/vocab-worksheet/components/VocabularyTab.tsx index 82af7a8..61a06bd 100644 --- a/studio-v2/app/vocab-worksheet/components/VocabularyTab.tsx +++ b/studio-v2/app/vocab-worksheet/components/VocabularyTab.tsx @@ -120,6 +120,36 @@ export function VocabularyTab({ h }: { h: VocabWorksheetHook }) { + {/* OCR Quality Steps (A/B Testing) */} +
+ Steps: + + + +
+ {/* Error messages for failed pages */} {h.processingErrors.length > 0 && (
diff --git a/studio-v2/app/vocab-worksheet/types.ts b/studio-v2/app/vocab-worksheet/types.ts index d7c621b..68327e2 100644 --- a/studio-v2/app/vocab-worksheet/types.ts +++ b/studio-v2/app/vocab-worksheet/types.ts @@ -140,6 +140,14 @@ export interface VocabWorksheetHook { showSettings: boolean setShowSettings: (show: boolean) => void + // OCR Quality Steps (A/B testing) + ocrEnhance: boolean + setOcrEnhance: (v: boolean) => void + ocrMaxCols: number + setOcrMaxCols: (v: number) => void + ocrMinConf: number + setOcrMinConf: (v: number) => void + // QR showQRModal: boolean setShowQRModal: (show: boolean) => void diff --git a/studio-v2/app/vocab-worksheet/useVocabWorksheet.ts b/studio-v2/app/vocab-worksheet/useVocabWorksheet.ts index a286d81..2507d42 100644 --- a/studio-v2/app/vocab-worksheet/useVocabWorksheet.ts +++ b/studio-v2/app/vocab-worksheet/useVocabWorksheet.ts @@ -90,6 +90,11 @@ export function useVocabWorksheet(): VocabWorksheetHook { const [ocrPrompts, setOcrPrompts] = useState(defaultOcrPrompts) const [showSettings, setShowSettings] = useState(false) + // OCR Quality Steps (toggle individually for A/B testing) + const [ocrEnhance, setOcrEnhance] = useState(true) // Step 3: CLAHE + denoise + const [ocrMaxCols, setOcrMaxCols] = useState(3) // Step 2: max columns (0=unlimited) + const [ocrMinConf, setOcrMinConf] = useState(0) // Step 1: 0=auto from quality score + // QR Code Upload const [showQRModal, setShowQRModal] = useState(false) const [uploadSessionId, setUploadSessionId] = useState('') @@ -359,7 +364,14 @@ export function useVocabWorksheet(): VocabWorksheetHook { const API_BASE = getApiBase() try { - const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session!.id}/process-single-page/${pageIndex}?ipa_mode=${ipa}&syllable_mode=${syllable}`, { + const params = new URLSearchParams({ + ipa_mode: ipa, + syllable_mode: syllable, + enhance: String(ocrEnhance), + max_cols: String(ocrMaxCols), + min_conf: String(ocrMinConf), + }) + const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session!.id}/process-single-page/${pageIndex}?${params}`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ ocr_prompts: ocrPrompts }), @@ -793,7 +805,14 @@ export function useVocabWorksheet(): VocabWorksheetHook { for (const pageIndex of pagesToReprocess) { setExtractionStatus(`Verarbeite Seite ${pageIndex + 1}...`) try { - const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/process-single-page/${pageIndex}?ipa_mode=${ipa}&syllable_mode=${syllable}`, { + const params = new URLSearchParams({ + ipa_mode: ipa, + syllable_mode: syllable, + enhance: String(ocrEnhance), + max_cols: String(ocrMaxCols), + min_conf: String(ocrMinConf), + }) + const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/process-single-page/${pageIndex}?${params}`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ ocr_prompts: ocrPrompts }), @@ -849,6 +868,7 @@ export function useVocabWorksheet(): VocabWorksheetHook { processingErrors, successfulPages, failedPages, currentlyProcessingPage, // OCR settings ocrPrompts, showSettings, setShowSettings, + ocrEnhance, setOcrEnhance, ocrMaxCols, setOcrMaxCols, ocrMinConf, setOcrMinConf, // QR showQRModal, setShowQRModal, uploadSessionId, mobileUploadedFiles, selectedMobileFile, setSelectedMobileFile, setMobileUploadedFiles,