From 27b895a848d7e0240c795a5093892be93518585a Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Sun, 1 Mar 2026 17:22:56 +0100 Subject: [PATCH] feat(ocr-pipeline): generic cell-grid with optional vocab mapping Extract build_cell_grid() as layout-agnostic foundation from build_word_grid(). Step 5 now produces a generic cell grid (columns x rows) and auto-detects whether vocab layout is present. Frontend dynamically switches between vocab table (EN/DE/Example) and generic cell table based on layout type. Co-Authored-By: Claude Opus 4.6 --- .../app/(admin)/ai/ocr-pipeline/types.ts | 46 +- .../ocr-pipeline/StepWordRecognition.tsx | 582 +++++++++++++----- klausur-service/backend/cv_vocab_pipeline.py | 262 ++++++-- klausur-service/backend/ocr_pipeline_api.py | 213 ++++--- 4 files changed, 802 insertions(+), 301 deletions(-) diff --git a/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts b/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts index 3da5c8e..3cec591 100644 --- a/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts +++ b/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts @@ -29,7 +29,7 @@ export interface SessionInfo { dewarp_result?: DewarpResult column_result?: ColumnResult row_result?: RowResult - word_result?: WordResult + word_result?: GridResult } export interface DeskewResult { @@ -124,6 +124,49 @@ export interface WordBbox { h: number } +export interface GridCell { + cell_id: string // "R03_C1" + row_index: number + col_index: number + col_type: string + text: string + confidence: number + bbox_px: WordBbox + bbox_pct: WordBbox + ocr_engine?: string + status?: 'pending' | 'confirmed' | 'edited' | 'skipped' +} + +export interface ColumnMeta { + index: number + type: string + x: number + width: number +} + +export interface GridResult { + cells: GridCell[] + grid_shape: { rows: number; cols: number; total_cells: number } + columns_used: ColumnMeta[] + layout: 'vocab' | 'generic' + image_width: number + image_height: number + duration_seconds: number + ocr_engine?: string + vocab_entries?: WordEntry[] // Only when layout='vocab' + entries?: WordEntry[] // Backwards compat alias for vocab_entries + entry_count?: number + summary: { + total_cells: number + non_empty_cells: number + low_confidence: number + // Only when layout='vocab': + total_entries?: number + with_english?: number + with_german?: number + } +} + export interface WordEntry { row_index: number english: string @@ -137,6 +180,7 @@ export interface WordEntry { status?: 'pending' | 'confirmed' | 'edited' | 'skipped' } +/** @deprecated Use GridResult instead */ export interface WordResult { entries: WordEntry[] entry_count: number diff --git a/admin-lehrer/components/ocr-pipeline/StepWordRecognition.tsx b/admin-lehrer/components/ocr-pipeline/StepWordRecognition.tsx index 5b90ade..09f8169 100644 --- a/admin-lehrer/components/ocr-pipeline/StepWordRecognition.tsx +++ b/admin-lehrer/components/ocr-pipeline/StepWordRecognition.tsx @@ -1,13 +1,13 @@ 'use client' import { useCallback, useEffect, useRef, useState } from 'react' -import type { WordResult, WordEntry, WordGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types' +import type { GridResult, GridCell, WordEntry, WordGroundTruth } from '@/app/(admin)/ai/ocr-pipeline/types' const KLAUSUR_API = '/klausur-api' /** Render text with \n as line breaks */ function MultilineText({ text }: { text: string }) { - if (!text) return + if (!text) return const lines = text.split('\n') if (lines.length === 1) return <>{text} return <>{lines.map((line, i) => ( @@ -15,6 +15,31 @@ function MultilineText({ text }: { text: string }) { ))} } +/** Column type → human-readable header */ +function colTypeLabel(colType: string): string { + const labels: Record = { + column_en: 'English', + column_de: 'Deutsch', + column_example: 'Example', + column_text: 'Text', + column_marker: 'Marker', + page_ref: 'Seite', + } + return labels[colType] || colType.replace('column_', '') +} + +/** Column type → color class */ +function colTypeColor(colType: string): string { + const colors: Record = { + column_en: 'text-blue-600 dark:text-blue-400', + column_de: 'text-green-600 dark:text-green-400', + column_example: 'text-orange-600 dark:text-orange-400', + column_text: 'text-purple-600 dark:text-purple-400', + column_marker: 'text-gray-500 dark:text-gray-400', + } + return colors[colType] || 'text-gray-600 dark:text-gray-400' +} + interface StepWordRecognitionProps { sessionId: string | null onNext: () => void @@ -22,7 +47,7 @@ interface StepWordRecognitionProps { } export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRecognitionProps) { - const [wordResult, setWordResult] = useState(null) + const [gridResult, setGridResult] = useState(null) const [detecting, setDetecting] = useState(false) const [error, setError] = useState(null) const [gtNotes, setGtNotes] = useState('') @@ -31,6 +56,7 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec // Step-through labeling state const [activeIndex, setActiveIndex] = useState(0) const [editedEntries, setEditedEntries] = useState([]) + const [editedCells, setEditedCells] = useState([]) const [mode, setMode] = useState<'overview' | 'labeling'>('overview') const [ocrEngine, setOcrEngine] = useState<'auto' | 'tesseract' | 'rapid'>('auto') const [usedEngine, setUsedEngine] = useState('') @@ -38,6 +64,8 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec const enRef = useRef(null) + const isVocab = gridResult?.layout === 'vocab' + useEffect(() => { if (!sessionId) return @@ -47,9 +75,7 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec if (res.ok) { const info = await res.json() if (info.word_result) { - setWordResult(info.word_result) - setUsedEngine(info.word_result.ocr_engine || '') - initEntries(info.word_result.entries) + applyGridResult(info.word_result) return } } @@ -63,6 +89,17 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec // eslint-disable-next-line react-hooks/exhaustive-deps }, [sessionId]) + const applyGridResult = (data: GridResult) => { + setGridResult(data) + setUsedEngine(data.ocr_engine || '') + if (data.layout === 'vocab' && data.entries) { + initEntries(data.entries) + } + if (data.cells) { + setEditedCells(data.cells.map(c => ({ ...c, status: c.status || 'pending' }))) + } + } + const initEntries = (entries: WordEntry[]) => { setEditedEntries(entries.map(e => ({ ...e, status: e.status || 'pending' }))) setActiveIndex(0) @@ -82,21 +119,20 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec throw new Error(err.detail || 'Worterkennung fehlgeschlagen') } const data = await res.json() - setWordResult(data) - setUsedEngine(data.ocr_engine || eng) - initEntries(data.entries) + applyGridResult(data) } catch (e) { setError(e instanceof Error ? e.message : 'Unbekannter Fehler') } finally { setDetecting(false) } - }, [sessionId, ocrEngine]) + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [sessionId, ocrEngine, pronunciation]) const handleGroundTruth = useCallback(async (isCorrect: boolean) => { if (!sessionId) return const gt: WordGroundTruth = { is_correct: isCorrect, - corrected_entries: isCorrect ? undefined : editedEntries, + corrected_entries: isCorrect ? undefined : (isVocab ? editedEntries : undefined), notes: gtNotes || undefined, } try { @@ -109,35 +145,68 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec } catch (e) { console.error('Ground truth save failed:', e) } - }, [sessionId, gtNotes, editedEntries]) + }, [sessionId, gtNotes, editedEntries, isVocab]) - // Step-through: update entry field + // Vocab mode: update entry field const updateEntry = (index: number, field: 'english' | 'german' | 'example', value: string) => { setEditedEntries(prev => prev.map((e, i) => i === index ? { ...e, [field]: value, status: 'edited' as const } : e )) } + // Generic mode: update cell text + const updateCell = (cellId: string, value: string) => { + setEditedCells(prev => prev.map(c => + c.cell_id === cellId ? { ...c, text: value, status: 'edited' as const } : c + )) + } + // Step-through: confirm current entry const confirmEntry = () => { - setEditedEntries(prev => prev.map((e, i) => - i === activeIndex ? { ...e, status: e.status === 'edited' ? 'edited' : 'confirmed' } : e - )) - if (activeIndex < editedEntries.length - 1) { + if (isVocab) { + setEditedEntries(prev => prev.map((e, i) => + i === activeIndex ? { ...e, status: e.status === 'edited' ? 'edited' : 'confirmed' } : e + )) + } else { + // Generic: confirm all cells in this row + const rowCells = getRowCells(activeIndex) + const cellIds = new Set(rowCells.map(c => c.cell_id)) + setEditedCells(prev => prev.map(c => + cellIds.has(c.cell_id) ? { ...c, status: c.status === 'edited' ? 'edited' : 'confirmed' } : c + )) + } + const maxIdx = isVocab ? editedEntries.length - 1 : getUniqueRowCount() - 1 + if (activeIndex < maxIdx) { setActiveIndex(activeIndex + 1) } } // Step-through: skip current entry const skipEntry = () => { - setEditedEntries(prev => prev.map((e, i) => - i === activeIndex ? { ...e, status: 'skipped' as const } : e - )) - if (activeIndex < editedEntries.length - 1) { + if (isVocab) { + setEditedEntries(prev => prev.map((e, i) => + i === activeIndex ? { ...e, status: 'skipped' as const } : e + )) + } + const maxIdx = isVocab ? editedEntries.length - 1 : getUniqueRowCount() - 1 + if (activeIndex < maxIdx) { setActiveIndex(activeIndex + 1) } } + // Helper: get unique row indices from cells + const getUniqueRowCount = () => { + if (!editedCells.length) return 0 + return new Set(editedCells.map(c => c.row_index)).size + } + + // Helper: get cells for a given row index (by position in sorted unique rows) + const getRowCells = (rowPosition: number) => { + const uniqueRows = [...new Set(editedCells.map(c => c.row_index))].sort((a, b) => a - b) + const rowIdx = uniqueRows[rowPosition] + return editedCells.filter(c => c.row_index === rowIdx) + } + // Focus english input when active entry changes in labeling mode useEffect(() => { if (mode === 'labeling' && enRef.current) { @@ -152,8 +221,6 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault() confirmEntry() - } else if (e.key === 'Tab' && !e.shiftKey) { - // Let Tab move between fields naturally unless on last field } else if (e.key === 'ArrowDown' && e.ctrlKey) { e.preventDefault() skipEntry() @@ -165,7 +232,7 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec window.addEventListener('keydown', handler) return () => window.removeEventListener('keydown', handler) // eslint-disable-next-line react-hooks/exhaustive-deps - }, [mode, activeIndex, editedEntries]) + }, [mode, activeIndex, editedEntries, editedCells]) if (!sessionId) { return ( @@ -200,9 +267,24 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec return map[status || 'pending'] || map.pending } - const summary = wordResult?.summary - const confirmedCount = editedEntries.filter(e => e.status === 'confirmed' || e.status === 'edited').length - const totalCount = editedEntries.length + const summary = gridResult?.summary + const columnsUsed = gridResult?.columns_used || [] + const gridShape = gridResult?.grid_shape + + // Counts for labeling progress + const confirmedCount = isVocab + ? editedEntries.filter(e => e.status === 'confirmed' || e.status === 'edited').length + : editedCells.filter(c => c.status === 'confirmed' || c.status === 'edited').length + const totalCount = isVocab ? editedEntries.length : getUniqueRowCount() + + // Group cells by row for generic table display + const cellsByRow: Map = new Map() + for (const cell of editedCells) { + const existing = cellsByRow.get(cell.row_index) || [] + existing.push(cell) + cellsByRow.set(cell.row_index, existing) + } + const sortedRowIndices = [...cellsByRow.keys()].sort((a, b) => a - b) return (
@@ -214,9 +296,26 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
)} - {/* Mode toggle */} - {wordResult && ( + {/* Layout badge + Mode toggle */} + {gridResult && (
+ {/* Layout badge */} + + {isVocab ? 'Vokabel-Layout' : 'Generisch'} + + + {gridShape && ( + + {gridShape.rows}×{gridShape.cols} = {gridShape.total_cells} Zellen + + )} + +
+
)} - {/* Overview mode: side-by-side images + entry list */} + {/* Overview mode */} {mode === 'overview' && ( <> {/* Images: overlay vs clean */} @@ -250,7 +349,7 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec Mit Grid-Overlay
- {wordResult ? ( + {gridResult ? ( // eslint-disable-next-line @next/next/no-img-element {/* Result summary */} - {wordResult && summary && ( + {gridResult && summary && (

- Ergebnis: {summary.total_entries} Eintraege erkannt + {isVocab + ? `Ergebnis: ${summary.total_entries ?? 0} Vokabel-Eintraege erkannt` + : `Ergebnis: ${summary.non_empty_cells}/${summary.total_cells} Zellen mit Text` + }

- {wordResult.duration_seconds}s + {gridResult.duration_seconds}s
{/* Summary badges */}
- - EN: {summary.with_english} - - - DE: {summary.with_german} - + {isVocab ? ( + <> + + EN: {summary.with_english ?? 0} + + + DE: {summary.with_german ?? 0} + + + ) : ( + <> + + Zellen: {summary.non_empty_cells}/{summary.total_cells} + + {columnsUsed.map((col, i) => ( + + C{col.index}: {colTypeLabel(col.type)} + + ))} + + )} {summary.low_confidence > 0 && ( Unsicher: {summary.low_confidence} @@ -306,57 +423,110 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec )}
- {/* Entry table */} + {/* Entry/Cell table */}
- - - - - - - - - - - - {editedEntries.map((entry, idx) => ( - { setActiveIndex(idx); setMode('labeling') }} - > - - - - - + {isVocab ? ( + /* Vocab table: EN/DE/Example columns */ +
#EnglishDeutschExampleConf
{idx + 1} - - - - - - - {entry.confidence}% -
+ + + + + + + - ))} - -
#EnglishDeutschExampleConf
+ + + {editedEntries.map((entry, idx) => ( + { setActiveIndex(idx); setMode('labeling') }} + > + {idx + 1} + + + + + + + + + + + {entry.confidence}% + + + ))} + + + ) : ( + /* Generic table: dynamic columns from columns_used */ + + + + + {columnsUsed.map((col, i) => ( + + ))} + + + + + {sortedRowIndices.map((rowIdx, posIdx) => { + const rowCells = cellsByRow.get(rowIdx) || [] + const avgConf = rowCells.length + ? Math.round(rowCells.reduce((s, c) => s + c.confidence, 0) / rowCells.length) + : 0 + return ( + { setActiveIndex(posIdx); setMode('labeling') }} + > + + {columnsUsed.map((col) => { + const cell = rowCells.find(c => c.col_index === col.index) + return ( + + ) + })} + + + ) + })} + +
Zeile + {colTypeLabel(col.type)} + Conf
+ R{String(rowIdx).padStart(2, '0')} + + + + {avgConf}% +
+ )}
)} )} - {/* Labeling mode: image crop + editable fields */} - {mode === 'labeling' && editedEntries.length > 0 && ( + {/* Labeling mode */} + {mode === 'labeling' && (isVocab ? editedEntries.length > 0 : editedCells.length > 0) && (
{/* Left 2/3: Image with highlighted active row */}
- Eintrag {activeIndex + 1} von {editedEntries.length} + {isVocab + ? `Eintrag ${activeIndex + 1} von ${editedEntries.length}` + : `Zeile ${activeIndex + 1} von ${getUniqueRowCount()}` + }
{/* eslint-disable-next-line @next/next/no-img-element */} @@ -365,8 +535,8 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec alt="Wort-Overlay" className="w-full h-auto" /> - {/* Highlight overlay for active entry bbox */} - {editedEntries[activeIndex]?.bbox && ( + {/* Highlight overlay for active row/entry */} + {isVocab && editedEntries[activeIndex]?.bbox && (
)} + {!isVocab && (() => { + const rowCells = getRowCells(activeIndex) + return rowCells.map(cell => ( +
+ )) + })()}
- {/* Right 1/3: Editable entry fields */} + {/* Right 1/3: Editable fields */}
{/* Navigation */}
@@ -391,10 +576,15 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec > Zurueck - {activeIndex + 1} / {editedEntries.length} + + {activeIndex + 1} / {isVocab ? editedEntries.length : getUniqueRowCount()} +