'use client' import { useCallback, useEffect, useRef, useState } from 'react' import type { GridResult, GridCell, WordEntry, WordGroundTruth } from '@/app/(admin)/ai/ocr-kombi/types' import { WordRecognitionOverview } from './WordRecognitionOverview' import { WordRecognitionLabeling } from './WordRecognitionLabeling' import { WordRecognitionControls } from './WordRecognitionControls' const KLAUSUR_API = '/klausur-api' interface StepWordRecognitionProps { sessionId: string | null onNext: () => void goToStep: (step: number) => void /** Skip _heal_row_gaps in cell grid (better overlay positioning) */ skipHealGaps?: boolean } export function StepWordRecognition({ sessionId, onNext, goToStep, skipHealGaps = false }: StepWordRecognitionProps) { const [gridResult, setGridResult] = useState(null) const [detecting, setDetecting] = useState(false) const [error, setError] = useState(null) const [gtNotes, setGtNotes] = useState('') const [gtSaved, setGtSaved] = useState(false) // Step-through labeling state const [activeIndex, setActiveIndex] = useState(0) const [editedEntries, setEditedEntries] = useState([]) const [editedCells, setEditedCells] = useState([]) const [mode, setMode] = useState<'overview' | 'labeling'>('overview') const [ocrEngine, setOcrEngine] = useState<'auto' | 'tesseract' | 'rapid' | 'paddle'>('auto') const [usedEngine, setUsedEngine] = useState('') const [pronunciation, setPronunciation] = useState<'british' | 'american'>('british') const [gridMethod, setGridMethod] = useState<'v2' | 'words_first'>('v2') // Streaming progress state const [streamProgress, setStreamProgress] = useState<{ current: number; total: number } | null>(null) const enRef = useRef(null) const tableEndRef = useRef(null) const isVocab = gridResult?.layout === 'vocab' useEffect(() => { if (!sessionId) return // Always run fresh detection -- word-lookup is fast (~0.03s) // and avoids stale cached results from previous pipeline versions. runAutoDetection() // eslint-disable-next-line react-hooks/exhaustive-deps }, [sessionId]) const applyGridResult = (data: GridResult) => { setGridResult(data) setUsedEngine(data.ocr_engine || '') if (data.layout === 'vocab' && data.entries) { initEntries(data.entries) } if (data.cells) { setEditedCells(data.cells.map(c => ({ ...c, status: c.status || 'pending' }))) } } const initEntries = (entries: WordEntry[]) => { setEditedEntries(entries.map(e => ({ ...e, status: e.status || 'pending' }))) setActiveIndex(0) } const runAutoDetection = useCallback(async (engine?: string) => { if (!sessionId) return const eng = engine || ocrEngine setDetecting(true) setError(null) setStreamProgress(null) setEditedCells([]) setEditedEntries([]) setGridResult(null) try { // PP-OCRv5 forces words_first on the backend, so align frontend accordingly const effectiveGridMethod = eng === 'paddle' ? 'words_first' : gridMethod const useStream = effectiveGridMethod === 'v2' // Retry once if initial request fails (e.g. after container restart, // session cache may not be warm yet when navigating via wizard) let res: Response | null = null for (let attempt = 0; attempt < 2; attempt++) { res = await fetch( `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/words?stream=${useStream ? 'true' : 'false'}&engine=${eng}&pronunciation=${pronunciation}${skipHealGaps ? '&skip_heal_gaps=true' : ''}&grid_method=${effectiveGridMethod}`, { method: 'POST' }, ) if (res.ok) break if (attempt === 0 && (res.status === 400 || res.status === 404)) { // Wait briefly for cache to warm up, then retry await new Promise(r => setTimeout(r, 2000)) continue } break } if (!res || !res.ok) { const err = await res?.json().catch(() => ({ detail: res?.statusText })) || { detail: 'Worterkennung fehlgeschlagen' } throw new Error(err.detail || 'Worterkennung fehlgeschlagen') } // words_first / pp-ocrv5 returns plain JSON (no streaming) if (!useStream) { const data = await res.json() as GridResult applyGridResult(data) return } const reader = res.body!.getReader() const decoder = new TextDecoder() let buffer = '' let streamLayout: string | null = null let streamColumnsUsed: GridResult['columns_used'] = [] let streamGridShape: GridResult['grid_shape'] | null = null let streamCells: GridCell[] = [] while (true) { const { done, value } = await reader.read() if (done) break buffer += decoder.decode(value, { stream: true }) // Parse SSE events (separated by \n\n) while (buffer.includes('\n\n')) { const idx = buffer.indexOf('\n\n') const chunk = buffer.slice(0, idx).trim() buffer = buffer.slice(idx + 2) if (!chunk.startsWith('data: ')) continue const dataStr = chunk.slice(6) // strip "data: " let event: any try { event = JSON.parse(dataStr) } catch { continue } if (event.type === 'meta') { streamLayout = event.layout || 'generic' streamGridShape = event.grid_shape || null // Show partial grid result so UI renders structure setGridResult(prev => ({ ...prev, layout: event.layout || 'generic', grid_shape: event.grid_shape, columns_used: [], cells: [], summary: { total_cells: event.grid_shape?.total_cells || 0, non_empty_cells: 0, low_confidence: 0 }, duration_seconds: 0, ocr_engine: '', } as GridResult)) } if (event.type === 'columns') { streamColumnsUsed = event.columns_used || [] setGridResult(prev => prev ? { ...prev, columns_used: streamColumnsUsed } : prev) } if (event.type === 'cell') { const cell: GridCell = { ...event.cell, status: 'pending' } streamCells = [...streamCells, cell] setEditedCells(streamCells) setStreamProgress(event.progress) // Auto-scroll table to bottom setTimeout(() => tableEndRef.current?.scrollIntoView({ behavior: 'smooth', block: 'nearest' }), 16) } if (event.type === 'complete') { // Build final GridResult const finalResult: GridResult = { cells: streamCells, grid_shape: streamGridShape || { rows: 0, cols: 0, total_cells: streamCells.length }, columns_used: streamColumnsUsed, layout: streamLayout || 'generic', image_width: 0, image_height: 0, duration_seconds: event.duration_seconds || 0, ocr_engine: event.ocr_engine || '', summary: event.summary || {}, } // If vocab: apply post-processed entries from complete event if (event.vocab_entries) { finalResult.entries = event.vocab_entries finalResult.vocab_entries = event.vocab_entries finalResult.entry_count = event.vocab_entries.length } applyGridResult(finalResult) setUsedEngine(event.ocr_engine || '') setStreamProgress(null) } } } } catch (e) { setError(e instanceof Error ? e.message : 'Unbekannter Fehler') } finally { setDetecting(false) } // eslint-disable-next-line react-hooks/exhaustive-deps }, [sessionId, ocrEngine, pronunciation, gridMethod]) const handleGroundTruth = useCallback(async (isCorrect: boolean) => { if (!sessionId) return const gt: WordGroundTruth = { is_correct: isCorrect, corrected_entries: isCorrect ? undefined : (isVocab ? editedEntries : undefined), notes: gtNotes || undefined, } try { await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/ground-truth/words`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(gt), }) setGtSaved(true) } catch (e) { console.error('Ground truth save failed:', e) } }, [sessionId, gtNotes, editedEntries, isVocab]) // Generic mode: update cell text const updateCell = (cellId: string, value: string) => { setEditedCells(prev => prev.map(c => c.cell_id === cellId ? { ...c, text: value, status: 'edited' as const } : c )) } // Helper: get unique row indices from cells const getUniqueRowCount = () => { if (!editedCells.length) return 0 return new Set(editedCells.map(c => c.row_index)).size } // Helper: get cells for a given row index (by position in sorted unique rows) const getRowCells = (rowPosition: number) => { const uniqueRows = [...new Set(editedCells.map(c => c.row_index))].sort((a, b) => a - b) const rowIdx = uniqueRows[rowPosition] return editedCells.filter(c => c.row_index === rowIdx) } // Step-through: confirm current row (always cell-based) const confirmEntry = () => { const rowCells = getRowCells(activeIndex) const cellIds = new Set(rowCells.map(c => c.cell_id)) setEditedCells(prev => prev.map(c => cellIds.has(c.cell_id) ? { ...c, status: c.status === 'edited' ? 'edited' : 'confirmed' } : c )) const maxIdx = getUniqueRowCount() - 1 if (activeIndex < maxIdx) { setActiveIndex(activeIndex + 1) } } // Step-through: skip current row const skipEntry = () => { const rowCells = getRowCells(activeIndex) const cellIds = new Set(rowCells.map(c => c.cell_id)) setEditedCells(prev => prev.map(c => cellIds.has(c.cell_id) ? { ...c, status: 'skipped' as const } : c )) const maxIdx = getUniqueRowCount() - 1 if (activeIndex < maxIdx) { setActiveIndex(activeIndex + 1) } } // Focus english input when active entry changes in labeling mode useEffect(() => { if (mode === 'labeling' && enRef.current) { enRef.current.focus() } }, [activeIndex, mode]) // Keyboard shortcuts in labeling mode useEffect(() => { if (mode !== 'labeling') return const handler = (e: KeyboardEvent) => { if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault() confirmEntry() } else if (e.key === 'ArrowDown' && e.ctrlKey) { e.preventDefault() skipEntry() } else if (e.key === 'ArrowUp' && e.ctrlKey) { e.preventDefault() if (activeIndex > 0) setActiveIndex(activeIndex - 1) } } window.addEventListener('keydown', handler) return () => window.removeEventListener('keydown', handler) // eslint-disable-next-line react-hooks/exhaustive-deps }, [mode, activeIndex, editedEntries, editedCells]) if (!sessionId) { return (
🔤

Schritt 5: Worterkennung

Bitte zuerst Schritte 1-4 abschliessen.

) } const columnsUsed = gridResult?.columns_used || [] const gridShape = gridResult?.grid_shape // Counts for labeling progress (always cell-based) const confirmedRowIds = new Set( editedCells.filter(c => c.status === 'confirmed' || c.status === 'edited').map(c => c.row_index) ) const confirmedCount = confirmedRowIds.size const totalCount = getUniqueRowCount() // Group cells by row (shared between overview and labeling) const cellsByRow: Map = new Map() for (const cell of editedCells) { const existing = cellsByRow.get(cell.row_index) || [] existing.push(cell) cellsByRow.set(cell.row_index, existing) } const sortedRowIndices = [...cellsByRow.keys()].sort((a, b) => a - b) return (
{/* Loading with streaming progress */} {detecting && (
{streamProgress ? `Zelle ${streamProgress.current}/${streamProgress.total} erkannt...` : 'Worterkennung startet...'}
{streamProgress && streamProgress.total > 0 && (
)}
)} {/* Layout badge + Mode toggle */} {gridResult && (
{/* Layout badge */} {isVocab ? 'Vokabel-Layout' : 'Generisch'} {gridShape && ( {gridShape.rows}×{gridShape.cols} = {gridShape.total_cells} Zellen )}
)} {/* Overview mode */} {mode === 'overview' && ( )} {/* Labeling mode */} {mode === 'labeling' && ( )} {/* Controls */} {gridResult && ( )} {error && (
{error}
)}
) }