Add A/B testing toggles for OCR quality steps
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 30s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m33s
CI / test-python-agent-core (push) Successful in 26s
CI / test-nodejs-website (push) Successful in 18s

Each quality improvement step can now be toggled independently:
- CLAHE checkbox (Step 3: image enhancement on/off)
- MaxCols dropdown (Step 2: 0=unlimited, 2-5)
- MinConf dropdown (Step 1: auto/20/30/40/50/60)

Backend: Query params enhance, max_cols, min_conf on process-single-page.
Response includes active_steps dict showing which steps are enabled.
Frontend: Toggle controls in VocabularyTab above the table.

This allows empirical A/B testing of each step on the same scan.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-23 15:27:26 +02:00
parent 2f34ee9ede
commit 545c8676b0
4 changed files with 88 additions and 9 deletions

View File

@@ -1290,6 +1290,9 @@ async def process_single_page(
page_number: int, page_number: int,
ipa_mode: str = Query("none", pattern="^(auto|all|de|en|none)$"), ipa_mode: str = Query("none", pattern="^(auto|all|de|en|none)$"),
syllable_mode: str = Query("none", pattern="^(auto|all|de|en|none)$"), syllable_mode: str = Query("none", pattern="^(auto|all|de|en|none)$"),
enhance: bool = Query(True, description="Step 3: CLAHE + Denoise for degraded scans"),
max_cols: int = Query(3, description="Step 2: Max column count (0=unlimited)"),
min_conf: int = Query(0, description="Step 1: Min OCR confidence (0=auto from quality score)"),
): ):
""" """
Process a SINGLE page of an uploaded PDF using the Kombi OCR pipeline. Process a SINGLE page of an uploaded PDF using the Kombi OCR pipeline.
@@ -1300,6 +1303,9 @@ async def process_single_page(
Query params: Query params:
ipa_mode: "none" (default), "auto", "all", "en", "de" ipa_mode: "none" (default), "auto", "all", "en", "de"
syllable_mode: "none" (default), "auto", "all", "en", "de" syllable_mode: "none" (default), "auto", "all", "en", "de"
enhance: true (default) — apply CLAHE/denoise for degraded scans
max_cols: 3 (default) — max column count (0=unlimited)
min_conf: 0 (default=auto) — min OCR confidence (0=from quality score)
The frontend should call this sequentially for each page. The frontend should call this sequentially for each page.
Returns the vocabulary for just this one page. Returns the vocabulary for just this one page.
@@ -1332,6 +1338,9 @@ async def process_single_page(
page_vocabulary, rotation_deg, quality_report = await _run_ocr_pipeline_for_page( page_vocabulary, rotation_deg, quality_report = await _run_ocr_pipeline_for_page(
img_bgr, page_number, session_id, img_bgr, page_number, session_id,
ipa_mode=ipa_mode, syllable_mode=syllable_mode, ipa_mode=ipa_mode, syllable_mode=syllable_mode,
enable_enhance=enhance,
max_columns=max_cols if max_cols > 0 else None,
override_min_conf=min_conf if min_conf > 0 else None,
) )
except Exception as e: except Exception as e:
logger.error(f"OCR pipeline failed for page {page_number + 1}: {e}", exc_info=True) logger.error(f"OCR pipeline failed for page {page_number + 1}: {e}", exc_info=True)
@@ -1395,11 +1404,17 @@ async def process_single_page(
"rotation": rotation_deg, "rotation": rotation_deg,
} }
# Add scan quality report if available # Add scan quality report + active steps info
if quality_report: if quality_report:
result["scan_quality"] = quality_report.to_dict() sq = quality_report.to_dict()
sq["active_steps"] = {
"step1_confidence": f"min_conf={min_ocr_conf}" if not override_min_conf else f"min_conf={override_min_conf} (override)",
"step2_max_columns": f"max_cols={max_columns}" if max_columns else "unlimited",
"step3_enhance": "on" if enable_enhance and quality_report.is_degraded else "off",
}
result["scan_quality"] = sq
else: else:
quality_report = None # ensure variable exists for non-pipeline path quality_report = None
return result return result
@@ -1411,6 +1426,9 @@ async def _run_ocr_pipeline_for_page(
*, *,
ipa_mode: str = "none", ipa_mode: str = "none",
syllable_mode: str = "none", syllable_mode: str = "none",
enable_enhance: bool = True,
max_columns: Optional[int] = 3,
override_min_conf: Optional[int] = None,
) -> tuple: ) -> tuple:
"""Run the full Kombi OCR pipeline on a single page and return vocab entries. """Run the full Kombi OCR pipeline on a single page and return vocab entries.
@@ -1488,11 +1506,14 @@ async def _run_ocr_pipeline_for_page(
except Exception as e: except Exception as e:
logger.warning(f" scan quality: failed ({e})") logger.warning(f" scan quality: failed ({e})")
if override_min_conf:
min_ocr_conf = override_min_conf
else:
min_ocr_conf = scan_quality_report.recommended_min_conf if scan_quality_report else 40 min_ocr_conf = scan_quality_report.recommended_min_conf if scan_quality_report else 40
# 5c. Image enhancement for degraded scans # 5c. Image enhancement for degraded scans
is_degraded = scan_quality_report.is_degraded if scan_quality_report else False is_degraded = scan_quality_report.is_degraded if scan_quality_report else False
if is_degraded: if is_degraded and enable_enhance:
try: try:
from ocr_image_enhance import enhance_for_ocr from ocr_image_enhance import enhance_for_ocr
dewarped_bgr = enhance_for_ocr(dewarped_bgr, is_degraded=True) dewarped_bgr = enhance_for_ocr(dewarped_bgr, is_degraded=True)
@@ -1547,8 +1568,8 @@ async def _run_ocr_pipeline_for_page(
else: else:
merged_words = tess_words # fallback to Tesseract only merged_words = tess_words # fallback to Tesseract only
# Build initial grid from merged words (limit to 3 columns for vocab tables) # Build initial grid from merged words
cells, columns_meta = build_grid_from_words(merged_words, img_w, img_h, max_columns=3) cells, columns_meta = build_grid_from_words(merged_words, img_w, img_h, max_columns=max_columns)
for cell in cells: for cell in cells:
cell["ocr_engine"] = "rapid_kombi" cell["ocr_engine"] = "rapid_kombi"

View File

@@ -120,6 +120,36 @@ export function VocabularyTab({ h }: { h: VocabWorksheetHook }) {
</div> </div>
</div> </div>
{/* OCR Quality Steps (A/B Testing) */}
<div className={`flex items-center gap-3 mb-3 flex-shrink-0 flex-wrap ${isDark ? 'text-white/60' : 'text-slate-500'}`}>
<span className="text-xs font-medium">Steps:</span>
<label className="flex items-center gap-1 text-xs cursor-pointer">
<input type="checkbox" checked={h.ocrEnhance} onChange={(e) => h.setOcrEnhance(e.target.checked)} className="rounded" />
CLAHE
</label>
<label className="flex items-center gap-1 text-xs">
<span>MaxCols:</span>
<select value={h.ocrMaxCols} onChange={(e) => h.setOcrMaxCols(Number(e.target.value))} className={`px-1 py-0.5 text-xs rounded border ${isDark ? 'border-white/20 bg-white/10 text-white' : 'border-gray-200 bg-white text-gray-600'}`}>
<option value={0}>unbegrenzt</option>
<option value={2}>2</option>
<option value={3}>3</option>
<option value={4}>4</option>
<option value={5}>5</option>
</select>
</label>
<label className="flex items-center gap-1 text-xs">
<span>MinConf:</span>
<select value={h.ocrMinConf} onChange={(e) => h.setOcrMinConf(Number(e.target.value))} className={`px-1 py-0.5 text-xs rounded border ${isDark ? 'border-white/20 bg-white/10 text-white' : 'border-gray-200 bg-white text-gray-600'}`}>
<option value={0}>auto</option>
<option value={20}>20</option>
<option value={30}>30</option>
<option value={40}>40</option>
<option value={50}>50</option>
<option value={60}>60</option>
</select>
</label>
</div>
{/* Error messages for failed pages */} {/* Error messages for failed pages */}
{h.processingErrors.length > 0 && ( {h.processingErrors.length > 0 && (
<div className={`rounded-xl p-3 mb-3 flex-shrink-0 ${isDark ? 'bg-orange-500/20 text-orange-200 border border-orange-500/30' : 'bg-orange-100 text-orange-700 border border-orange-200'}`}> <div className={`rounded-xl p-3 mb-3 flex-shrink-0 ${isDark ? 'bg-orange-500/20 text-orange-200 border border-orange-500/30' : 'bg-orange-100 text-orange-700 border border-orange-200'}`}>

View File

@@ -140,6 +140,14 @@ export interface VocabWorksheetHook {
showSettings: boolean showSettings: boolean
setShowSettings: (show: boolean) => void setShowSettings: (show: boolean) => void
// OCR Quality Steps (A/B testing)
ocrEnhance: boolean
setOcrEnhance: (v: boolean) => void
ocrMaxCols: number
setOcrMaxCols: (v: number) => void
ocrMinConf: number
setOcrMinConf: (v: number) => void
// QR // QR
showQRModal: boolean showQRModal: boolean
setShowQRModal: (show: boolean) => void setShowQRModal: (show: boolean) => void

View File

@@ -90,6 +90,11 @@ export function useVocabWorksheet(): VocabWorksheetHook {
const [ocrPrompts, setOcrPrompts] = useState<OcrPrompts>(defaultOcrPrompts) const [ocrPrompts, setOcrPrompts] = useState<OcrPrompts>(defaultOcrPrompts)
const [showSettings, setShowSettings] = useState(false) const [showSettings, setShowSettings] = useState(false)
// OCR Quality Steps (toggle individually for A/B testing)
const [ocrEnhance, setOcrEnhance] = useState(true) // Step 3: CLAHE + denoise
const [ocrMaxCols, setOcrMaxCols] = useState(3) // Step 2: max columns (0=unlimited)
const [ocrMinConf, setOcrMinConf] = useState(0) // Step 1: 0=auto from quality score
// QR Code Upload // QR Code Upload
const [showQRModal, setShowQRModal] = useState(false) const [showQRModal, setShowQRModal] = useState(false)
const [uploadSessionId, setUploadSessionId] = useState('') const [uploadSessionId, setUploadSessionId] = useState('')
@@ -359,7 +364,14 @@ export function useVocabWorksheet(): VocabWorksheetHook {
const API_BASE = getApiBase() const API_BASE = getApiBase()
try { try {
const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session!.id}/process-single-page/${pageIndex}?ipa_mode=${ipa}&syllable_mode=${syllable}`, { const params = new URLSearchParams({
ipa_mode: ipa,
syllable_mode: syllable,
enhance: String(ocrEnhance),
max_cols: String(ocrMaxCols),
min_conf: String(ocrMinConf),
})
const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session!.id}/process-single-page/${pageIndex}?${params}`, {
method: 'POST', method: 'POST',
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ ocr_prompts: ocrPrompts }), body: JSON.stringify({ ocr_prompts: ocrPrompts }),
@@ -793,7 +805,14 @@ export function useVocabWorksheet(): VocabWorksheetHook {
for (const pageIndex of pagesToReprocess) { for (const pageIndex of pagesToReprocess) {
setExtractionStatus(`Verarbeite Seite ${pageIndex + 1}...`) setExtractionStatus(`Verarbeite Seite ${pageIndex + 1}...`)
try { try {
const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/process-single-page/${pageIndex}?ipa_mode=${ipa}&syllable_mode=${syllable}`, { const params = new URLSearchParams({
ipa_mode: ipa,
syllable_mode: syllable,
enhance: String(ocrEnhance),
max_cols: String(ocrMaxCols),
min_conf: String(ocrMinConf),
})
const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/process-single-page/${pageIndex}?${params}`, {
method: 'POST', method: 'POST',
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ ocr_prompts: ocrPrompts }), body: JSON.stringify({ ocr_prompts: ocrPrompts }),
@@ -849,6 +868,7 @@ export function useVocabWorksheet(): VocabWorksheetHook {
processingErrors, successfulPages, failedPages, currentlyProcessingPage, processingErrors, successfulPages, failedPages, currentlyProcessingPage,
// OCR settings // OCR settings
ocrPrompts, showSettings, setShowSettings, ocrPrompts, showSettings, setShowSettings,
ocrEnhance, setOcrEnhance, ocrMaxCols, setOcrMaxCols, ocrMinConf, setOcrMinConf,
// QR // QR
showQRModal, setShowQRModal, uploadSessionId, showQRModal, setShowQRModal, uploadSessionId,
mobileUploadedFiles, selectedMobileFile, setSelectedMobileFile, setMobileUploadedFiles, mobileUploadedFiles, selectedMobileFile, setSelectedMobileFile, setMobileUploadedFiles,