Add Vision-LLM OCR Fusion (Step 4) for degraded scans
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 42s
CI / test-go-edu-search (push) Successful in 29s
CI / test-python-klausur (push) Failing after 2m43s
CI / test-python-agent-core (push) Successful in 20s
CI / test-nodejs-website (push) Successful in 27s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 42s
CI / test-go-edu-search (push) Successful in 29s
CI / test-python-klausur (push) Failing after 2m43s
CI / test-python-agent-core (push) Successful in 20s
CI / test-nodejs-website (push) Successful in 27s
New module vision_ocr_fusion.py: Sends scan image + OCR word coordinates + document type to Qwen2.5-VL 32B. The LLM reads the image visually while using OCR positions as structural hints. Key features: - Document-type-aware prompts (Vokabelseite, Woerterbuch, etc.) - OCR words grouped into lines with x/y coordinates in prompt - Low-confidence words marked with (?) for LLM attention - Continuation row merging instructions in prompt - JSON response parsing with markdown code block handling - Fallback to original OCR on any error Frontend (admin-lehrer Grid Review): - "Vision-LLM" checkbox toggle - "Typ" dropdown (Vokabelseite, Woerterbuch, etc.) - Steps 1-3 defaults set to inactive Activate: Check "Vision-LLM", select document type, click "OCR neu + Grid". Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -28,10 +28,14 @@ export function useGridEditor(sessionId: string | null) {
|
||||
const [ipaMode, setIpaMode] = useState<IpaMode>('auto')
|
||||
const [syllableMode, setSyllableMode] = useState<SyllableMode>('auto')
|
||||
|
||||
// OCR Quality Steps (A/B testing toggles)
|
||||
const [ocrEnhance, setOcrEnhance] = useState(true)
|
||||
const [ocrMaxCols, setOcrMaxCols] = useState(0) // 0 = unlimited (admin pipeline default)
|
||||
const [ocrMinConf, setOcrMinConf] = useState(0) // 0 = auto from quality score
|
||||
// OCR Quality Steps (A/B testing toggles — defaults off for now)
|
||||
const [ocrEnhance, setOcrEnhance] = useState(false)
|
||||
const [ocrMaxCols, setOcrMaxCols] = useState(0)
|
||||
const [ocrMinConf, setOcrMinConf] = useState(0)
|
||||
|
||||
// Vision-LLM Fusion (Step 4)
|
||||
const [visionFusion, setVisionFusion] = useState(false)
|
||||
const [documentCategory, setDocumentCategory] = useState('vokabelseite')
|
||||
|
||||
// Undo/redo stacks store serialized zone arrays
|
||||
const undoStack = useRef<string[]>([])
|
||||
@@ -92,6 +96,8 @@ export function useGridEditor(sessionId: string | null) {
|
||||
params.set('enhance', String(ocrEnhance))
|
||||
if (ocrMaxCols > 0) params.set('max_cols', String(ocrMaxCols))
|
||||
if (ocrMinConf > 0) params.set('min_conf', String(ocrMinConf))
|
||||
params.set('vision_fusion', String(visionFusion))
|
||||
if (documentCategory) params.set('doc_category', documentCategory)
|
||||
const res = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/rerun-ocr-and-build-grid?${params}`,
|
||||
{ method: 'POST' },
|
||||
@@ -110,7 +116,7 @@ export function useGridEditor(sessionId: string | null) {
|
||||
} finally {
|
||||
setLoading(false)
|
||||
}
|
||||
}, [sessionId, ipaMode, syllableMode, ocrEnhance, ocrMaxCols, ocrMinConf])
|
||||
}, [sessionId, ipaMode, syllableMode, ocrEnhance, ocrMaxCols, ocrMinConf, visionFusion, documentCategory])
|
||||
|
||||
const loadGrid = useCallback(async () => {
|
||||
if (!sessionId) return
|
||||
@@ -1030,6 +1036,10 @@ export function useGridEditor(sessionId: string | null) {
|
||||
setOcrMaxCols,
|
||||
ocrMinConf,
|
||||
setOcrMinConf,
|
||||
visionFusion,
|
||||
setVisionFusion,
|
||||
documentCategory,
|
||||
setDocumentCategory,
|
||||
rerunOcr,
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user