Add A/B testing toggles for OCR quality steps
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 30s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m33s
CI / test-python-agent-core (push) Successful in 26s
CI / test-nodejs-website (push) Successful in 18s

Each quality improvement step can now be toggled independently:
- CLAHE checkbox (Step 3: image enhancement on/off)
- MaxCols dropdown (Step 2: 0=unlimited, 2-5)
- MinConf dropdown (Step 1: auto/20/30/40/50/60)

Backend: Query params enhance, max_cols, min_conf on process-single-page.
Response includes active_steps dict showing which steps are enabled.
Frontend: Toggle controls in VocabularyTab above the table.

This allows empirical A/B testing of each step on the same scan.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-23 15:27:26 +02:00
parent 2f34ee9ede
commit 545c8676b0
4 changed files with 88 additions and 9 deletions

View File

@@ -1290,6 +1290,9 @@ async def process_single_page(
page_number: int,
ipa_mode: str = Query("none", pattern="^(auto|all|de|en|none)$"),
syllable_mode: str = Query("none", pattern="^(auto|all|de|en|none)$"),
enhance: bool = Query(True, description="Step 3: CLAHE + Denoise for degraded scans"),
max_cols: int = Query(3, description="Step 2: Max column count (0=unlimited)"),
min_conf: int = Query(0, description="Step 1: Min OCR confidence (0=auto from quality score)"),
):
"""
Process a SINGLE page of an uploaded PDF using the Kombi OCR pipeline.
@@ -1300,6 +1303,9 @@ async def process_single_page(
Query params:
ipa_mode: "none" (default), "auto", "all", "en", "de"
syllable_mode: "none" (default), "auto", "all", "en", "de"
enhance: true (default) — apply CLAHE/denoise for degraded scans
max_cols: 3 (default) — max column count (0=unlimited)
min_conf: 0 (default=auto) — min OCR confidence (0=from quality score)
The frontend should call this sequentially for each page.
Returns the vocabulary for just this one page.
@@ -1332,6 +1338,9 @@ async def process_single_page(
page_vocabulary, rotation_deg, quality_report = await _run_ocr_pipeline_for_page(
img_bgr, page_number, session_id,
ipa_mode=ipa_mode, syllable_mode=syllable_mode,
enable_enhance=enhance,
max_columns=max_cols if max_cols > 0 else None,
override_min_conf=min_conf if min_conf > 0 else None,
)
except Exception as e:
logger.error(f"OCR pipeline failed for page {page_number + 1}: {e}", exc_info=True)
@@ -1395,11 +1404,17 @@ async def process_single_page(
"rotation": rotation_deg,
}
# Add scan quality report if available
# Add scan quality report + active steps info
if quality_report:
result["scan_quality"] = quality_report.to_dict()
sq = quality_report.to_dict()
sq["active_steps"] = {
"step1_confidence": f"min_conf={min_ocr_conf}" if not override_min_conf else f"min_conf={override_min_conf} (override)",
"step2_max_columns": f"max_cols={max_columns}" if max_columns else "unlimited",
"step3_enhance": "on" if enable_enhance and quality_report.is_degraded else "off",
}
result["scan_quality"] = sq
else:
quality_report = None # ensure variable exists for non-pipeline path
quality_report = None
return result
@@ -1411,6 +1426,9 @@ async def _run_ocr_pipeline_for_page(
*,
ipa_mode: str = "none",
syllable_mode: str = "none",
enable_enhance: bool = True,
max_columns: Optional[int] = 3,
override_min_conf: Optional[int] = None,
) -> tuple:
"""Run the full Kombi OCR pipeline on a single page and return vocab entries.
@@ -1488,11 +1506,14 @@ async def _run_ocr_pipeline_for_page(
except Exception as e:
logger.warning(f" scan quality: failed ({e})")
min_ocr_conf = scan_quality_report.recommended_min_conf if scan_quality_report else 40
if override_min_conf:
min_ocr_conf = override_min_conf
else:
min_ocr_conf = scan_quality_report.recommended_min_conf if scan_quality_report else 40
# 5c. Image enhancement for degraded scans
is_degraded = scan_quality_report.is_degraded if scan_quality_report else False
if is_degraded:
if is_degraded and enable_enhance:
try:
from ocr_image_enhance import enhance_for_ocr
dewarped_bgr = enhance_for_ocr(dewarped_bgr, is_degraded=True)
@@ -1547,8 +1568,8 @@ async def _run_ocr_pipeline_for_page(
else:
merged_words = tess_words # fallback to Tesseract only
# Build initial grid from merged words (limit to 3 columns for vocab tables)
cells, columns_meta = build_grid_from_words(merged_words, img_w, img_h, max_columns=3)
# Build initial grid from merged words
cells, columns_meta = build_grid_from_words(merged_words, img_w, img_h, max_columns=max_columns)
for cell in cells:
cell["ocr_engine"] = "rapid_kombi"