diff --git a/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx b/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx index c9fd289..a99dc89 100644 --- a/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx +++ b/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx @@ -11,12 +11,13 @@ import { StepRowDetection } from '@/components/ocr-pipeline/StepRowDetection' import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition' import { OverlayReconstruction } from '@/components/ocr-overlay/OverlayReconstruction' import { PaddleDirectStep } from '@/components/ocr-overlay/PaddleDirectStep' -import { OVERLAY_PIPELINE_STEPS, PADDLE_DIRECT_STEPS, KOMBI_STEPS, DOCUMENT_CATEGORIES, dbStepToOverlayUi, type PipelineStep, type SessionListItem, type DocumentCategory } from './types' +import { KombiCompareStep } from '@/components/ocr-overlay/KombiCompareStep' +import { OVERLAY_PIPELINE_STEPS, PADDLE_DIRECT_STEPS, KOMBI_STEPS, KOMBI_COMPARE_STEPS, DOCUMENT_CATEGORIES, dbStepToOverlayUi, type PipelineStep, type SessionListItem, type DocumentCategory } from './types' const KLAUSUR_API = '/klausur-api' export default function OcrOverlayPage() { - const [mode, setMode] = useState<'pipeline' | 'paddle-direct' | 'kombi'>('pipeline') + const [mode, setMode] = useState<'pipeline' | 'paddle-direct' | 'kombi' | 'kombi-compare'>('pipeline') const [currentStep, setCurrentStep] = useState(0) const [sessionId, setSessionId] = useState(null) const [sessionName, setSessionName] = useState('') @@ -63,14 +64,15 @@ export default function OcrOverlayPage() { setSessionName(data.name || data.filename || '') setActiveCategory(data.document_category || undefined) - // Check if this session was processed with paddle_direct or kombi + // Check if this session was processed with paddle_direct, kombi, or rapid_kombi const ocrEngine = data.word_result?.ocr_engine const isPaddleDirect = ocrEngine === 'paddle_direct' const isKombi = ocrEngine === 'kombi' + const isRapidKombi = ocrEngine === 'rapid_kombi' - if (isPaddleDirect || isKombi) { - const m = isKombi ? 'kombi' : 'paddle-direct' - const baseSteps = isKombi ? KOMBI_STEPS : PADDLE_DIRECT_STEPS + if (isPaddleDirect || isKombi || isRapidKombi) { + const m = isKombi ? 'kombi' : isPaddleDirect ? 'paddle-direct' : 'kombi-compare' + const baseSteps = isKombi ? KOMBI_STEPS : isRapidKombi ? KOMBI_COMPARE_STEPS : PADDLE_DIRECT_STEPS setMode(m) setSteps( baseSteps.map((s, i) => ({ @@ -105,7 +107,7 @@ export default function OcrOverlayPage() { if (sessionId === sid) { setSessionId(null) setCurrentStep(0) - const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS + const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'kombi-compare' ? KOMBI_COMPARE_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' }))) } } catch (e) { @@ -162,7 +164,7 @@ export default function OcrOverlayPage() { const handleNext = () => { if (currentStep >= steps.length - 1) { // Last step completed — return to session list - const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS + const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'kombi-compare' ? KOMBI_COMPARE_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' }))) setCurrentStep(0) setSessionId(null) @@ -191,7 +193,7 @@ export default function OcrOverlayPage() { setSessionId(null) setSessionName('') setCurrentStep(0) - const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS + const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'kombi-compare' ? KOMBI_COMPARE_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' }))) } @@ -230,7 +232,7 @@ export default function OcrOverlayPage() { }, [sessionId, goToStep]) const renderStep = () => { - if (mode === 'paddle-direct' || mode === 'kombi') { + if (mode === 'paddle-direct' || mode === 'kombi' || mode === 'kombi-compare') { switch (currentStep) { case 0: return @@ -241,6 +243,9 @@ export default function OcrOverlayPage() { case 3: return case 4: + if (mode === 'kombi-compare') { + return + } return mode === 'kombi' ? ( Kombi (5 Schritte) + void +} + +export function KombiCompareStep({ sessionId, onNext }: KombiCompareStepProps) { + const [phase, setPhase] = useState('idle') + const [error, setError] = useState('') + const [paddleResult, setPaddleResult] = useState(null) + const [rapidResult, setRapidResult] = useState(null) + const [paddleStatus, setPaddleStatus] = useState<'pending' | 'running' | 'done' | 'error'>('pending') + const [rapidStatus, setRapidStatus] = useState<'pending' | 'running' | 'done' | 'error'>('pending') + + const runBothEngines = async () => { + if (!sessionId) return + setPhase('running') + setError('') + setPaddleStatus('running') + setRapidStatus('running') + setPaddleResult(null) + setRapidResult(null) + + const fetchEngine = async ( + endpoint: string, + setResult: (r: KombiResult) => void, + setStatus: (s: 'pending' | 'running' | 'done' | 'error') => void, + ) => { + try { + const res = await fetch( + `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/${endpoint}`, + { method: 'POST' }, + ) + if (!res.ok) { + const body = await res.json().catch(() => ({})) + throw new Error(body.detail || `HTTP ${res.status}`) + } + const data = await res.json() + setResult(data) + setStatus('done') + } catch (e: unknown) { + setStatus('error') + throw e + } + } + + try { + await Promise.all([ + fetchEngine('paddle-kombi', setPaddleResult, setPaddleStatus), + fetchEngine('rapid-kombi', setRapidResult, setRapidStatus), + ]) + setPhase('compare') + } catch (e: unknown) { + // At least one failed — still show compare if the other succeeded + setError(e instanceof Error ? e.message : String(e)) + setPhase('compare') + } + } + + if (phase === 'idle') { + return ( +
+
⚖️
+

+ Kombi-Vergleich +

+

+ Beide Kombi-Modi (Paddle + Tesseract vs. RapidOCR + Tesseract) laufen parallel. + Die Ergebnisse werden nebeneinander angezeigt, damit die Qualitaet direkt verglichen werden kann. +

+ +
+ ) + } + + if (phase === 'running' && !paddleResult && !rapidResult) { + return ( +
+
+ + +
+
+ ) + } + + // compare phase + return ( +
+ {error && ( +
+ {error} +
+ )} + +
+

+ Side-by-Side Vergleich +

+ +
+ +
+ {/* Left: Paddle-Kombi */} +
+
+ + 🔀 Paddle + Tesseract + + {paddleStatus === 'error' && ( + Fehler + )} +
+ {paddleResult ? ( + <> + {}} + wordResultOverride={paddleResult} + /> + + + ) : ( +
+ {paddleStatus === 'running' ? 'Laeuft...' : 'Fehlgeschlagen'} +
+ )} +
+ + {/* Right: Rapid-Kombi */} +
+
+ + ⚡ RapidOCR + Tesseract + + {rapidStatus === 'error' && ( + Fehler + )} +
+ {rapidResult ? ( + <> + {}} + wordResultOverride={rapidResult} + /> + + + ) : ( +
+ {rapidStatus === 'running' ? 'Laeuft...' : 'Fehlgeschlagen'} +
+ )} +
+
+ +
+ +
+
+ ) +} + +function EngineStatusCard({ label, status }: { label: string; status: string }) { + return ( +
+ {status === 'running' && ( +
+ )} + {status === 'done' && } + {status === 'error' && } + {status === 'pending' && } + {label} +
+ ) +} + +function StatsBar({ result, engine }: { result: KombiResult; engine: string }) { + const nonEmpty = result.summary?.non_empty_cells ?? 0 + const totalCells = result.summary?.total_cells ?? 0 + const merged = result.summary?.merged_words ?? 0 + const duration = result.duration_seconds ?? 0 + + return ( +
+ {engine} + {merged} Woerter + {nonEmpty}/{totalCells} Zellen + {duration.toFixed(2)}s +
+ ) +} diff --git a/admin-lehrer/components/ocr-overlay/OverlayReconstruction.tsx b/admin-lehrer/components/ocr-overlay/OverlayReconstruction.tsx index 5790852..a6b7620 100644 --- a/admin-lehrer/components/ocr-overlay/OverlayReconstruction.tsx +++ b/admin-lehrer/components/ocr-overlay/OverlayReconstruction.tsx @@ -10,6 +10,8 @@ const KLAUSUR_API = '/klausur-api' interface OverlayReconstructionProps { sessionId: string | null onNext: () => void + /** When set, use this data directly instead of fetching from the session API. */ + wordResultOverride?: { cells: GridCell[]; image_width: number; image_height: number; [key: string]: unknown } } interface EditableCell { @@ -24,7 +26,7 @@ interface EditableCell { type UndoAction = { cellId: string; oldText: string; newText: string } -export function OverlayReconstruction({ sessionId, onNext }: OverlayReconstructionProps) { +export function OverlayReconstruction({ sessionId, onNext, wordResultOverride }: OverlayReconstructionProps) { const [status, setStatus] = useState<'loading' | 'ready' | 'saving' | 'saved' | 'error'>('loading') const [error, setError] = useState('') const [cells, setCells] = useState([]) @@ -78,10 +80,39 @@ export function OverlayReconstruction({ sessionId, onNext }: OverlayReconstructi // Load session data useEffect(() => { + if (wordResultOverride) { + applyWordResult(wordResultOverride) + return + } if (!sessionId) return loadSessionData() // eslint-disable-next-line react-hooks/exhaustive-deps - }, [sessionId]) + }, [sessionId, wordResultOverride]) + + const applyWordResult = (wordResult: { cells: GridCell[]; image_width: number; image_height: number; [key: string]: unknown }) => { + const rawGridCells: GridCell[] = wordResult.cells || [] + setGridCells(rawGridCells) + + const editableCells: EditableCell[] = rawGridCells.map(c => ({ + cellId: c.cell_id, + text: c.text, + originalText: c.text, + bboxPct: c.bbox_pct, + colType: c.col_type, + rowIndex: c.row_index, + colIndex: c.col_index, + })) + setCells(editableCells) + setEditedTexts(new Map()) + setUndoStack([]) + setRedoStack([]) + + if (wordResult.image_width && wordResult.image_height) { + setImageNaturalSize({ w: wordResult.image_width, h: wordResult.image_height }) + } + + setStatus('ready') + } const loadSessionData = async () => { if (!sessionId) return @@ -98,33 +129,11 @@ export function OverlayReconstruction({ sessionId, onNext }: OverlayReconstructi return } - const rawGridCells: GridCell[] = wordResult.cells || [] - setGridCells(rawGridCells) - - const editableCells: EditableCell[] = rawGridCells.map(c => ({ - cellId: c.cell_id, - text: c.text, - originalText: c.text, - bboxPct: c.bbox_pct, - colType: c.col_type, - rowIndex: c.row_index, - colIndex: c.col_index, - })) - setCells(editableCells) - setEditedTexts(new Map()) - setUndoStack([]) - setRedoStack([]) + applyWordResult(wordResult as unknown as { cells: GridCell[]; image_width: number; image_height: number }) // Load rows const rowResult: RowResult | undefined = data.row_result if (rowResult?.rows) setRows(rowResult.rows) - - // Store image dimensions - if (wordResult.image_width && wordResult.image_height) { - setImageNaturalSize({ w: wordResult.image_width, h: wordResult.image_height }) - } - - setStatus('ready') } catch (e: unknown) { setError(e instanceof Error ? e.message : String(e)) setStatus('error') diff --git a/klausur-service/backend/ocr_pipeline_api.py b/klausur-service/backend/ocr_pipeline_api.py index 4914718..a677326 100644 --- a/klausur-service/backend/ocr_pipeline_api.py +++ b/klausur-service/backend/ocr_pipeline_api.py @@ -2976,6 +2976,141 @@ async def paddle_kombi(session_id: str): return {"session_id": session_id, **word_result} +@router.post("/sessions/{session_id}/rapid-kombi") +async def rapid_kombi(session_id: str): + """Run RapidOCR + Tesseract on the preprocessed image and merge results. + + Same merge logic as paddle-kombi, but uses local RapidOCR (ONNX Runtime) + instead of remote PaddleOCR service. + """ + img_png = await get_session_image(session_id, "cropped") + if not img_png: + img_png = await get_session_image(session_id, "dewarped") + if not img_png: + img_png = await get_session_image(session_id, "original") + if not img_png: + raise HTTPException(status_code=404, detail="No image found for this session") + + img_arr = np.frombuffer(img_png, dtype=np.uint8) + img_bgr = cv2.imdecode(img_arr, cv2.IMREAD_COLOR) + if img_bgr is None: + raise HTTPException(status_code=400, detail="Failed to decode image") + + img_h, img_w = img_bgr.shape[:2] + + from cv_ocr_engines import ocr_region_rapid + from cv_vocab_types import PageRegion + + t0 = time.time() + + # --- RapidOCR (local, synchronous) --- + full_region = PageRegion( + type="full_page", x=0, y=0, width=img_w, height=img_h, + ) + rapid_words = ocr_region_rapid(img_bgr, full_region) + if not rapid_words: + rapid_words = [] + + # --- Tesseract --- + from PIL import Image + import pytesseract + + pil_img = Image.fromarray(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)) + data = pytesseract.image_to_data( + pil_img, lang="eng+deu", + config="--psm 6 --oem 3", + output_type=pytesseract.Output.DICT, + ) + tess_words = [] + for i in range(len(data["text"])): + text = str(data["text"][i]).strip() + conf_raw = str(data["conf"][i]) + conf = int(conf_raw) if conf_raw.lstrip("-").isdigit() else -1 + if not text or conf < 20: + continue + tess_words.append({ + "text": text, + "left": data["left"][i], + "top": data["top"][i], + "width": data["width"][i], + "height": data["height"][i], + "conf": conf, + }) + + # --- Split multi-word RapidOCR boxes into individual words --- + rapid_words_split = _split_paddle_multi_words(rapid_words) + logger.info( + "rapid_kombi: split %d rapid boxes → %d individual words", + len(rapid_words), len(rapid_words_split), + ) + + # --- Merge --- + if not rapid_words_split and not tess_words: + raise HTTPException(status_code=400, detail="Both OCR engines returned no words") + + merged_words = _merge_paddle_tesseract(rapid_words_split, tess_words) + + cells, columns_meta = build_grid_from_words(merged_words, img_w, img_h) + duration = time.time() - t0 + + for cell in cells: + cell["ocr_engine"] = "rapid_kombi" + + n_rows = len(set(c["row_index"] for c in cells)) if cells else 0 + n_cols = len(columns_meta) + col_types = {c.get("type") for c in columns_meta} + is_vocab = bool(col_types & {"column_en", "column_de"}) + + word_result = { + "cells": cells, + "grid_shape": {"rows": n_rows, "cols": n_cols, "total_cells": len(cells)}, + "columns_used": columns_meta, + "layout": "vocab" if is_vocab else "generic", + "image_width": img_w, + "image_height": img_h, + "duration_seconds": round(duration, 2), + "ocr_engine": "rapid_kombi", + "grid_method": "rapid_kombi", + "raw_rapid_words": rapid_words, + "raw_rapid_words_split": rapid_words_split, + "raw_tesseract_words": tess_words, + "summary": { + "total_cells": len(cells), + "non_empty_cells": sum(1 for c in cells if c.get("text")), + "low_confidence": sum(1 for c in cells if 0 < c.get("confidence", 0) < 50), + "rapid_words": len(rapid_words), + "rapid_words_split": len(rapid_words_split), + "tesseract_words": len(tess_words), + "merged_words": len(merged_words), + }, + } + + await update_session_db( + session_id, + word_result=word_result, + cropped_png=img_png, + current_step=8, + ) + + logger.info( + "rapid_kombi session %s: %d cells (%d rows, %d cols) in %.2fs " + "[rapid=%d, tess=%d, merged=%d]", + session_id, len(cells), n_rows, n_cols, duration, + len(rapid_words), len(tess_words), len(merged_words), + ) + + await _append_pipeline_log(session_id, "rapid_kombi", { + "total_cells": len(cells), + "non_empty_cells": word_result["summary"]["non_empty_cells"], + "rapid_words": len(rapid_words), + "tesseract_words": len(tess_words), + "merged_words": len(merged_words), + "ocr_engine": "rapid_kombi", + }, duration_ms=int(duration * 1000)) + + return {"session_id": session_id, **word_result} + + class WordGroundTruthRequest(BaseModel): is_correct: bool corrected_entries: Optional[List[Dict[str, Any]]] = None diff --git a/klausur-service/backend/tests/test_paddle_kombi.py b/klausur-service/backend/tests/test_paddle_kombi.py index 65371ce..a68aabc 100644 --- a/klausur-service/backend/tests/test_paddle_kombi.py +++ b/klausur-service/backend/tests/test_paddle_kombi.py @@ -449,6 +449,67 @@ class TestSpatialOverlapDedup: assert len(merged) == 2 +class TestRapidOcrMergeCompatibility: + """Test that _merge_paddle_tesseract works with RapidOCR word format. + + RapidOCR words include an extra 'region_type' key that PaddleOCR words + don't have. The merge logic must tolerate this extra field. + """ + + def _rapid_word(self, text, left, top, width=60, height=20, conf=80, region_type="full_page"): + """Create a word dict in RapidOCR format (has region_type).""" + return { + "text": text, + "left": left, + "top": top, + "width": width, + "height": height, + "conf": conf, + "region_type": region_type, + } + + def test_rapid_words_merge_with_tesseract(self): + """RapidOCR words (with region_type) merge correctly with Tesseract words.""" + rapid = [ + self._rapid_word("apple", 50, 10, 70, 20, conf=90), + self._rapid_word("Apfel", 300, 10, 60, 20, conf=85), + ] + tess = [ + _word("apple", 52, 11, 68, 19, conf=75), + _word("Apfel", 298, 12, 62, 18, conf=70), + ] + merged = _merge_paddle_tesseract(rapid, tess) + assert len(merged) == 2 + texts = sorted(w["text"] for w in merged) + assert texts == ["Apfel", "apple"] + + def test_rapid_words_split_then_merge(self): + """Split + merge works with RapidOCR multi-word boxes.""" + rapid_raw = [ + self._rapid_word("More than 200", 944, 287, 160, 29, conf=96), + ] + tess = [ + _word("More", 948, 292, 60, 20, conf=90), + _word("than", 1017, 291, 49, 21, conf=96), + _word("200", 1076, 292, 43, 20, conf=93), + ] + rapid_split = _split_paddle_multi_words(rapid_raw) + assert len(rapid_split) == 3 + merged = _merge_paddle_tesseract(rapid_split, tess) + texts = [w["text"] for w in merged] + assert texts.count("More") == 1 + assert texts.count("than") == 1 + assert texts.count("200") == 1 + + def test_region_type_preserved_in_unmatched(self): + """Unmatched RapidOCR words keep their region_type field.""" + rapid = [self._rapid_word("unique", 500, 10, 80, 20, conf=90)] + tess = [] # No Tesseract words + merged = _merge_paddle_tesseract(rapid, tess) + assert len(merged) == 1 + assert merged[0]["text"] == "unique" + + class TestSplitThenMerge: """Test the full pipeline: split multi-word Paddle boxes, then merge."""