Add A/B testing toggles for OCR quality steps
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 30s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m33s
CI / test-python-agent-core (push) Successful in 26s
CI / test-nodejs-website (push) Successful in 18s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 30s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m33s
CI / test-python-agent-core (push) Successful in 26s
CI / test-nodejs-website (push) Successful in 18s
Each quality improvement step can now be toggled independently: - CLAHE checkbox (Step 3: image enhancement on/off) - MaxCols dropdown (Step 2: 0=unlimited, 2-5) - MinConf dropdown (Step 1: auto/20/30/40/50/60) Backend: Query params enhance, max_cols, min_conf on process-single-page. Response includes active_steps dict showing which steps are enabled. Frontend: Toggle controls in VocabularyTab above the table. This allows empirical A/B testing of each step on the same scan. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1290,6 +1290,9 @@ async def process_single_page(
|
|||||||
page_number: int,
|
page_number: int,
|
||||||
ipa_mode: str = Query("none", pattern="^(auto|all|de|en|none)$"),
|
ipa_mode: str = Query("none", pattern="^(auto|all|de|en|none)$"),
|
||||||
syllable_mode: str = Query("none", pattern="^(auto|all|de|en|none)$"),
|
syllable_mode: str = Query("none", pattern="^(auto|all|de|en|none)$"),
|
||||||
|
enhance: bool = Query(True, description="Step 3: CLAHE + Denoise for degraded scans"),
|
||||||
|
max_cols: int = Query(3, description="Step 2: Max column count (0=unlimited)"),
|
||||||
|
min_conf: int = Query(0, description="Step 1: Min OCR confidence (0=auto from quality score)"),
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Process a SINGLE page of an uploaded PDF using the Kombi OCR pipeline.
|
Process a SINGLE page of an uploaded PDF using the Kombi OCR pipeline.
|
||||||
@@ -1300,6 +1303,9 @@ async def process_single_page(
|
|||||||
Query params:
|
Query params:
|
||||||
ipa_mode: "none" (default), "auto", "all", "en", "de"
|
ipa_mode: "none" (default), "auto", "all", "en", "de"
|
||||||
syllable_mode: "none" (default), "auto", "all", "en", "de"
|
syllable_mode: "none" (default), "auto", "all", "en", "de"
|
||||||
|
enhance: true (default) — apply CLAHE/denoise for degraded scans
|
||||||
|
max_cols: 3 (default) — max column count (0=unlimited)
|
||||||
|
min_conf: 0 (default=auto) — min OCR confidence (0=from quality score)
|
||||||
|
|
||||||
The frontend should call this sequentially for each page.
|
The frontend should call this sequentially for each page.
|
||||||
Returns the vocabulary for just this one page.
|
Returns the vocabulary for just this one page.
|
||||||
@@ -1332,6 +1338,9 @@ async def process_single_page(
|
|||||||
page_vocabulary, rotation_deg, quality_report = await _run_ocr_pipeline_for_page(
|
page_vocabulary, rotation_deg, quality_report = await _run_ocr_pipeline_for_page(
|
||||||
img_bgr, page_number, session_id,
|
img_bgr, page_number, session_id,
|
||||||
ipa_mode=ipa_mode, syllable_mode=syllable_mode,
|
ipa_mode=ipa_mode, syllable_mode=syllable_mode,
|
||||||
|
enable_enhance=enhance,
|
||||||
|
max_columns=max_cols if max_cols > 0 else None,
|
||||||
|
override_min_conf=min_conf if min_conf > 0 else None,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"OCR pipeline failed for page {page_number + 1}: {e}", exc_info=True)
|
logger.error(f"OCR pipeline failed for page {page_number + 1}: {e}", exc_info=True)
|
||||||
@@ -1395,11 +1404,17 @@ async def process_single_page(
|
|||||||
"rotation": rotation_deg,
|
"rotation": rotation_deg,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Add scan quality report if available
|
# Add scan quality report + active steps info
|
||||||
if quality_report:
|
if quality_report:
|
||||||
result["scan_quality"] = quality_report.to_dict()
|
sq = quality_report.to_dict()
|
||||||
|
sq["active_steps"] = {
|
||||||
|
"step1_confidence": f"min_conf={min_ocr_conf}" if not override_min_conf else f"min_conf={override_min_conf} (override)",
|
||||||
|
"step2_max_columns": f"max_cols={max_columns}" if max_columns else "unlimited",
|
||||||
|
"step3_enhance": "on" if enable_enhance and quality_report.is_degraded else "off",
|
||||||
|
}
|
||||||
|
result["scan_quality"] = sq
|
||||||
else:
|
else:
|
||||||
quality_report = None # ensure variable exists for non-pipeline path
|
quality_report = None
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@@ -1411,6 +1426,9 @@ async def _run_ocr_pipeline_for_page(
|
|||||||
*,
|
*,
|
||||||
ipa_mode: str = "none",
|
ipa_mode: str = "none",
|
||||||
syllable_mode: str = "none",
|
syllable_mode: str = "none",
|
||||||
|
enable_enhance: bool = True,
|
||||||
|
max_columns: Optional[int] = 3,
|
||||||
|
override_min_conf: Optional[int] = None,
|
||||||
) -> tuple:
|
) -> tuple:
|
||||||
"""Run the full Kombi OCR pipeline on a single page and return vocab entries.
|
"""Run the full Kombi OCR pipeline on a single page and return vocab entries.
|
||||||
|
|
||||||
@@ -1488,11 +1506,14 @@ async def _run_ocr_pipeline_for_page(
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f" scan quality: failed ({e})")
|
logger.warning(f" scan quality: failed ({e})")
|
||||||
|
|
||||||
min_ocr_conf = scan_quality_report.recommended_min_conf if scan_quality_report else 40
|
if override_min_conf:
|
||||||
|
min_ocr_conf = override_min_conf
|
||||||
|
else:
|
||||||
|
min_ocr_conf = scan_quality_report.recommended_min_conf if scan_quality_report else 40
|
||||||
|
|
||||||
# 5c. Image enhancement for degraded scans
|
# 5c. Image enhancement for degraded scans
|
||||||
is_degraded = scan_quality_report.is_degraded if scan_quality_report else False
|
is_degraded = scan_quality_report.is_degraded if scan_quality_report else False
|
||||||
if is_degraded:
|
if is_degraded and enable_enhance:
|
||||||
try:
|
try:
|
||||||
from ocr_image_enhance import enhance_for_ocr
|
from ocr_image_enhance import enhance_for_ocr
|
||||||
dewarped_bgr = enhance_for_ocr(dewarped_bgr, is_degraded=True)
|
dewarped_bgr = enhance_for_ocr(dewarped_bgr, is_degraded=True)
|
||||||
@@ -1547,8 +1568,8 @@ async def _run_ocr_pipeline_for_page(
|
|||||||
else:
|
else:
|
||||||
merged_words = tess_words # fallback to Tesseract only
|
merged_words = tess_words # fallback to Tesseract only
|
||||||
|
|
||||||
# Build initial grid from merged words (limit to 3 columns for vocab tables)
|
# Build initial grid from merged words
|
||||||
cells, columns_meta = build_grid_from_words(merged_words, img_w, img_h, max_columns=3)
|
cells, columns_meta = build_grid_from_words(merged_words, img_w, img_h, max_columns=max_columns)
|
||||||
for cell in cells:
|
for cell in cells:
|
||||||
cell["ocr_engine"] = "rapid_kombi"
|
cell["ocr_engine"] = "rapid_kombi"
|
||||||
|
|
||||||
|
|||||||
@@ -120,6 +120,36 @@ export function VocabularyTab({ h }: { h: VocabWorksheetHook }) {
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{/* OCR Quality Steps (A/B Testing) */}
|
||||||
|
<div className={`flex items-center gap-3 mb-3 flex-shrink-0 flex-wrap ${isDark ? 'text-white/60' : 'text-slate-500'}`}>
|
||||||
|
<span className="text-xs font-medium">Steps:</span>
|
||||||
|
<label className="flex items-center gap-1 text-xs cursor-pointer">
|
||||||
|
<input type="checkbox" checked={h.ocrEnhance} onChange={(e) => h.setOcrEnhance(e.target.checked)} className="rounded" />
|
||||||
|
CLAHE
|
||||||
|
</label>
|
||||||
|
<label className="flex items-center gap-1 text-xs">
|
||||||
|
<span>MaxCols:</span>
|
||||||
|
<select value={h.ocrMaxCols} onChange={(e) => h.setOcrMaxCols(Number(e.target.value))} className={`px-1 py-0.5 text-xs rounded border ${isDark ? 'border-white/20 bg-white/10 text-white' : 'border-gray-200 bg-white text-gray-600'}`}>
|
||||||
|
<option value={0}>unbegrenzt</option>
|
||||||
|
<option value={2}>2</option>
|
||||||
|
<option value={3}>3</option>
|
||||||
|
<option value={4}>4</option>
|
||||||
|
<option value={5}>5</option>
|
||||||
|
</select>
|
||||||
|
</label>
|
||||||
|
<label className="flex items-center gap-1 text-xs">
|
||||||
|
<span>MinConf:</span>
|
||||||
|
<select value={h.ocrMinConf} onChange={(e) => h.setOcrMinConf(Number(e.target.value))} className={`px-1 py-0.5 text-xs rounded border ${isDark ? 'border-white/20 bg-white/10 text-white' : 'border-gray-200 bg-white text-gray-600'}`}>
|
||||||
|
<option value={0}>auto</option>
|
||||||
|
<option value={20}>20</option>
|
||||||
|
<option value={30}>30</option>
|
||||||
|
<option value={40}>40</option>
|
||||||
|
<option value={50}>50</option>
|
||||||
|
<option value={60}>60</option>
|
||||||
|
</select>
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
|
||||||
{/* Error messages for failed pages */}
|
{/* Error messages for failed pages */}
|
||||||
{h.processingErrors.length > 0 && (
|
{h.processingErrors.length > 0 && (
|
||||||
<div className={`rounded-xl p-3 mb-3 flex-shrink-0 ${isDark ? 'bg-orange-500/20 text-orange-200 border border-orange-500/30' : 'bg-orange-100 text-orange-700 border border-orange-200'}`}>
|
<div className={`rounded-xl p-3 mb-3 flex-shrink-0 ${isDark ? 'bg-orange-500/20 text-orange-200 border border-orange-500/30' : 'bg-orange-100 text-orange-700 border border-orange-200'}`}>
|
||||||
|
|||||||
@@ -140,6 +140,14 @@ export interface VocabWorksheetHook {
|
|||||||
showSettings: boolean
|
showSettings: boolean
|
||||||
setShowSettings: (show: boolean) => void
|
setShowSettings: (show: boolean) => void
|
||||||
|
|
||||||
|
// OCR Quality Steps (A/B testing)
|
||||||
|
ocrEnhance: boolean
|
||||||
|
setOcrEnhance: (v: boolean) => void
|
||||||
|
ocrMaxCols: number
|
||||||
|
setOcrMaxCols: (v: number) => void
|
||||||
|
ocrMinConf: number
|
||||||
|
setOcrMinConf: (v: number) => void
|
||||||
|
|
||||||
// QR
|
// QR
|
||||||
showQRModal: boolean
|
showQRModal: boolean
|
||||||
setShowQRModal: (show: boolean) => void
|
setShowQRModal: (show: boolean) => void
|
||||||
|
|||||||
@@ -90,6 +90,11 @@ export function useVocabWorksheet(): VocabWorksheetHook {
|
|||||||
const [ocrPrompts, setOcrPrompts] = useState<OcrPrompts>(defaultOcrPrompts)
|
const [ocrPrompts, setOcrPrompts] = useState<OcrPrompts>(defaultOcrPrompts)
|
||||||
const [showSettings, setShowSettings] = useState(false)
|
const [showSettings, setShowSettings] = useState(false)
|
||||||
|
|
||||||
|
// OCR Quality Steps (toggle individually for A/B testing)
|
||||||
|
const [ocrEnhance, setOcrEnhance] = useState(true) // Step 3: CLAHE + denoise
|
||||||
|
const [ocrMaxCols, setOcrMaxCols] = useState(3) // Step 2: max columns (0=unlimited)
|
||||||
|
const [ocrMinConf, setOcrMinConf] = useState(0) // Step 1: 0=auto from quality score
|
||||||
|
|
||||||
// QR Code Upload
|
// QR Code Upload
|
||||||
const [showQRModal, setShowQRModal] = useState(false)
|
const [showQRModal, setShowQRModal] = useState(false)
|
||||||
const [uploadSessionId, setUploadSessionId] = useState('')
|
const [uploadSessionId, setUploadSessionId] = useState('')
|
||||||
@@ -359,7 +364,14 @@ export function useVocabWorksheet(): VocabWorksheetHook {
|
|||||||
const API_BASE = getApiBase()
|
const API_BASE = getApiBase()
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session!.id}/process-single-page/${pageIndex}?ipa_mode=${ipa}&syllable_mode=${syllable}`, {
|
const params = new URLSearchParams({
|
||||||
|
ipa_mode: ipa,
|
||||||
|
syllable_mode: syllable,
|
||||||
|
enhance: String(ocrEnhance),
|
||||||
|
max_cols: String(ocrMaxCols),
|
||||||
|
min_conf: String(ocrMinConf),
|
||||||
|
})
|
||||||
|
const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session!.id}/process-single-page/${pageIndex}?${params}`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json' },
|
||||||
body: JSON.stringify({ ocr_prompts: ocrPrompts }),
|
body: JSON.stringify({ ocr_prompts: ocrPrompts }),
|
||||||
@@ -793,7 +805,14 @@ export function useVocabWorksheet(): VocabWorksheetHook {
|
|||||||
for (const pageIndex of pagesToReprocess) {
|
for (const pageIndex of pagesToReprocess) {
|
||||||
setExtractionStatus(`Verarbeite Seite ${pageIndex + 1}...`)
|
setExtractionStatus(`Verarbeite Seite ${pageIndex + 1}...`)
|
||||||
try {
|
try {
|
||||||
const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/process-single-page/${pageIndex}?ipa_mode=${ipa}&syllable_mode=${syllable}`, {
|
const params = new URLSearchParams({
|
||||||
|
ipa_mode: ipa,
|
||||||
|
syllable_mode: syllable,
|
||||||
|
enhance: String(ocrEnhance),
|
||||||
|
max_cols: String(ocrMaxCols),
|
||||||
|
min_conf: String(ocrMinConf),
|
||||||
|
})
|
||||||
|
const res = await fetch(`${API_BASE}/api/v1/vocab/sessions/${session.id}/process-single-page/${pageIndex}?${params}`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json' },
|
||||||
body: JSON.stringify({ ocr_prompts: ocrPrompts }),
|
body: JSON.stringify({ ocr_prompts: ocrPrompts }),
|
||||||
@@ -849,6 +868,7 @@ export function useVocabWorksheet(): VocabWorksheetHook {
|
|||||||
processingErrors, successfulPages, failedPages, currentlyProcessingPage,
|
processingErrors, successfulPages, failedPages, currentlyProcessingPage,
|
||||||
// OCR settings
|
// OCR settings
|
||||||
ocrPrompts, showSettings, setShowSettings,
|
ocrPrompts, showSettings, setShowSettings,
|
||||||
|
ocrEnhance, setOcrEnhance, ocrMaxCols, setOcrMaxCols, ocrMinConf, setOcrMinConf,
|
||||||
// QR
|
// QR
|
||||||
showQRModal, setShowQRModal, uploadSessionId,
|
showQRModal, setShowQRModal, uploadSessionId,
|
||||||
mobileUploadedFiles, selectedMobileFile, setSelectedMobileFile, setMobileUploadedFiles,
|
mobileUploadedFiles, selectedMobileFile, setSelectedMobileFile, setMobileUploadedFiles,
|
||||||
|
|||||||
Reference in New Issue
Block a user