Add A/B testing toggles to OCR Kombi Grid Review
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 39s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m27s
CI / test-python-agent-core (push) Successful in 19s
CI / test-nodejs-website (push) Successful in 24s

Quality step toggles in admin-lehrer StepGridReview (port 3002):
- CLAHE checkbox (Step 3: image enhancement)
- MaxCol dropdown (Step 2: column limit, 0=off)
- MinConf dropdown (Step 1: OCR confidence, 0=auto)

Parameters flow through: StepGridReview → useGridEditor → build-grid
endpoint → _build_grid_core. MinConf filters words before grid building.

Toggle settings, click "Neu berechnen" to test each step individually.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-23 16:09:17 +02:00
parent 545c8676b0
commit 25e5a7415a
4 changed files with 73 additions and 3 deletions

View File

@@ -28,6 +28,11 @@ export function useGridEditor(sessionId: string | null) {
const [ipaMode, setIpaMode] = useState<IpaMode>('auto') const [ipaMode, setIpaMode] = useState<IpaMode>('auto')
const [syllableMode, setSyllableMode] = useState<SyllableMode>('auto') const [syllableMode, setSyllableMode] = useState<SyllableMode>('auto')
// OCR Quality Steps (A/B testing toggles)
const [ocrEnhance, setOcrEnhance] = useState(true)
const [ocrMaxCols, setOcrMaxCols] = useState(0) // 0 = unlimited (admin pipeline default)
const [ocrMinConf, setOcrMinConf] = useState(0) // 0 = auto from quality score
// Undo/redo stacks store serialized zone arrays // Undo/redo stacks store serialized zone arrays
const undoStack = useRef<string[]>([]) const undoStack = useRef<string[]>([])
const redoStack = useRef<string[]>([]) const redoStack = useRef<string[]>([])
@@ -52,6 +57,9 @@ export function useGridEditor(sessionId: string | null) {
const params = new URLSearchParams() const params = new URLSearchParams()
params.set('ipa_mode', ipaMode) params.set('ipa_mode', ipaMode)
params.set('syllable_mode', syllableMode) params.set('syllable_mode', syllableMode)
params.set('enhance', String(ocrEnhance))
if (ocrMaxCols > 0) params.set('max_cols', String(ocrMaxCols))
if (ocrMinConf > 0) params.set('min_conf', String(ocrMinConf))
const res = await fetch( const res = await fetch(
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/build-grid?${params}`, `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/build-grid?${params}`,
{ method: 'POST' }, { method: 'POST' },
@@ -70,7 +78,7 @@ export function useGridEditor(sessionId: string | null) {
} finally { } finally {
setLoading(false) setLoading(false)
} }
}, [sessionId, ipaMode, syllableMode]) }, [sessionId, ipaMode, syllableMode, ocrEnhance, ocrMaxCols, ocrMinConf])
const loadGrid = useCallback(async () => { const loadGrid = useCallback(async () => {
if (!sessionId) return if (!sessionId) return
@@ -85,6 +93,9 @@ export function useGridEditor(sessionId: string | null) {
const params = new URLSearchParams() const params = new URLSearchParams()
params.set('ipa_mode', ipaMode) params.set('ipa_mode', ipaMode)
params.set('syllable_mode', syllableMode) params.set('syllable_mode', syllableMode)
params.set('enhance', String(ocrEnhance))
if (ocrMaxCols > 0) params.set('max_cols', String(ocrMaxCols))
if (ocrMinConf > 0) params.set('min_conf', String(ocrMinConf))
const buildRes = await fetch( const buildRes = await fetch(
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/build-grid?${params}`, `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/build-grid?${params}`,
{ method: 'POST' }, { method: 'POST' },
@@ -981,5 +992,11 @@ export function useGridEditor(sessionId: string | null) {
setIpaMode, setIpaMode,
syllableMode, syllableMode,
setSyllableMode, setSyllableMode,
ocrEnhance,
setOcrEnhance,
ocrMaxCols,
setOcrMaxCols,
ocrMinConf,
setOcrMinConf,
} }
} }

View File

@@ -61,6 +61,12 @@ export function StepGridReview({ sessionId, onNext, saveRef }: StepGridReviewPro
setIpaMode, setIpaMode,
syllableMode, syllableMode,
setSyllableMode, setSyllableMode,
ocrEnhance,
setOcrEnhance,
ocrMaxCols,
setOcrMaxCols,
ocrMinConf,
setOcrMinConf,
} = useGridEditor(sessionId) } = useGridEditor(sessionId)
const [showImage, setShowImage] = useState(true) const [showImage, setShowImage] = useState(true)
@@ -256,6 +262,34 @@ export function StepGridReview({ sessionId, onNext, saveRef }: StepGridReviewPro
Alle akzeptieren Alle akzeptieren
</button> </button>
)} )}
{/* OCR Quality Steps (A/B Testing) */}
<span className="text-gray-400 dark:text-gray-500">|</span>
<label className="flex items-center gap-1 cursor-pointer" title="Step 3: CLAHE + Bilateral-Filter Enhancement">
<input type="checkbox" checked={ocrEnhance} onChange={(e) => setOcrEnhance(e.target.checked)} className="rounded w-3 h-3" />
<span className="text-gray-500 dark:text-gray-400">CLAHE</span>
</label>
<label className="flex items-center gap-1" title="Step 2: Max Spaltenanzahl (0=unbegrenzt)">
<span className="text-gray-500 dark:text-gray-400">MaxCol:</span>
<select value={ocrMaxCols} onChange={(e) => setOcrMaxCols(Number(e.target.value))} className="px-1 py-0.5 text-xs rounded border border-gray-200 dark:border-gray-600 bg-white dark:bg-gray-700 text-gray-700 dark:text-gray-300">
<option value={0}>off</option>
<option value={2}>2</option>
<option value={3}>3</option>
<option value={4}>4</option>
<option value={5}>5</option>
</select>
</label>
<label className="flex items-center gap-1" title="Step 1: Min OCR Confidence (0=auto)">
<span className="text-gray-500 dark:text-gray-400">MinConf:</span>
<select value={ocrMinConf} onChange={(e) => setOcrMinConf(Number(e.target.value))} className="px-1 py-0.5 text-xs rounded border border-gray-200 dark:border-gray-600 bg-white dark:bg-gray-700 text-gray-700 dark:text-gray-300">
<option value={0}>auto</option>
<option value={20}>20</option>
<option value={30}>30</option>
<option value={40}>40</option>
<option value={50}>50</option>
<option value={60}>60</option>
</select>
</label>
<div className="ml-auto flex items-center gap-2"> <div className="ml-auto flex items-center gap-2">
<button <button
onClick={() => { onClick={() => {

View File

@@ -55,6 +55,9 @@ async def _build_grid_core(
*, *,
ipa_mode: str = "auto", ipa_mode: str = "auto",
syllable_mode: str = "auto", syllable_mode: str = "auto",
enhance: bool = True,
max_columns: int | None = None,
min_conf: int | None = None,
) -> dict: ) -> dict:
"""Core grid building logic — pure computation, no HTTP or DB side effects. """Core grid building logic — pure computation, no HTTP or DB side effects.
@@ -91,8 +94,18 @@ async def _build_grid_core(
if not all_words: if not all_words:
raise ValueError("No word boxes found in cells") raise ValueError("No word boxes found in cells")
logger.info("build-grid session %s: %d words from %d cells", # 2a-pre. Apply min_conf filter if specified (Step 1: confidence tightening)
session_id, len(all_words), len(word_result["cells"])) if min_conf and min_conf > 0:
before = len(all_words)
all_words = [w for w in all_words if w.get('conf', 100) >= min_conf]
removed = before - len(all_words)
if removed:
logger.info("build-grid session %s: min_conf=%d removed %d/%d words",
session_id, min_conf, removed, before)
logger.info("build-grid session %s: %d words from %d cells (enhance=%s, max_cols=%s, min_conf=%s)",
session_id, len(all_words), len(word_result["cells"]),
enhance, max_columns, min_conf)
# 2b. Filter decorative margin columns (alphabet graphics). # 2b. Filter decorative margin columns (alphabet graphics).
# Some worksheets have a decorative alphabet strip along one margin # Some worksheets have a decorative alphabet strip along one margin

View File

@@ -32,6 +32,9 @@ async def build_grid(
session_id: str, session_id: str,
ipa_mode: str = Query("auto", pattern="^(auto|all|de|en|none)$"), ipa_mode: str = Query("auto", pattern="^(auto|all|de|en|none)$"),
syllable_mode: str = Query("auto", pattern="^(auto|all|de|en|none)$"), syllable_mode: str = Query("auto", pattern="^(auto|all|de|en|none)$"),
enhance: bool = Query(True, description="Step 3: CLAHE + denoise for degraded scans"),
max_cols: int = Query(0, description="Step 2: Max column count (0=unlimited)"),
min_conf: int = Query(0, description="Step 1: Min OCR confidence (0=auto)"),
): ):
"""Build a structured, zone-aware grid from existing Kombi word results. """Build a structured, zone-aware grid from existing Kombi word results.
@@ -53,6 +56,9 @@ async def build_grid(
result = await _build_grid_core( result = await _build_grid_core(
session_id, session, session_id, session,
ipa_mode=ipa_mode, syllable_mode=syllable_mode, ipa_mode=ipa_mode, syllable_mode=syllable_mode,
enhance=enhance,
max_columns=max_cols if max_cols > 0 else None,
min_conf=min_conf if min_conf > 0 else None,
) )
except ValueError as e: except ValueError as e:
raise HTTPException(status_code=400, detail=str(e)) raise HTTPException(status_code=400, detail=str(e))