feat(ocr-pipeline): add SSE streaming for word recognition (Step 5)
Cells now appear one-by-one in the UI as they are OCR'd, with a live progress bar, instead of waiting for the full result. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -62,7 +62,11 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
const [usedEngine, setUsedEngine] = useState<string>('')
|
||||
const [pronunciation, setPronunciation] = useState<'british' | 'american'>('british')
|
||||
|
||||
// Streaming progress state
|
||||
const [streamProgress, setStreamProgress] = useState<{ current: number; total: number } | null>(null)
|
||||
|
||||
const enRef = useRef<HTMLInputElement>(null)
|
||||
const tableEndRef = useRef<HTMLDivElement>(null)
|
||||
|
||||
const isVocab = gridResult?.layout === 'vocab'
|
||||
|
||||
@@ -110,16 +114,107 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
const eng = engine || ocrEngine
|
||||
setDetecting(true)
|
||||
setError(null)
|
||||
setStreamProgress(null)
|
||||
setEditedCells([])
|
||||
setEditedEntries([])
|
||||
setGridResult(null)
|
||||
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/words?engine=${eng}&pronunciation=${pronunciation}`, {
|
||||
method: 'POST',
|
||||
})
|
||||
const res = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/words?stream=true&engine=${eng}&pronunciation=${pronunciation}`,
|
||||
{ method: 'POST' },
|
||||
)
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({ detail: res.statusText }))
|
||||
throw new Error(err.detail || 'Worterkennung fehlgeschlagen')
|
||||
}
|
||||
const data = await res.json()
|
||||
applyGridResult(data)
|
||||
|
||||
const reader = res.body!.getReader()
|
||||
const decoder = new TextDecoder()
|
||||
let buffer = ''
|
||||
let streamLayout: string | null = null
|
||||
let streamColumnsUsed: GridResult['columns_used'] = []
|
||||
let streamGridShape: GridResult['grid_shape'] | null = null
|
||||
let streamCells: GridCell[] = []
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read()
|
||||
if (done) break
|
||||
buffer += decoder.decode(value, { stream: true })
|
||||
|
||||
// Parse SSE events (separated by \n\n)
|
||||
while (buffer.includes('\n\n')) {
|
||||
const idx = buffer.indexOf('\n\n')
|
||||
const chunk = buffer.slice(0, idx).trim()
|
||||
buffer = buffer.slice(idx + 2)
|
||||
|
||||
if (!chunk.startsWith('data: ')) continue
|
||||
const dataStr = chunk.slice(6) // strip "data: "
|
||||
|
||||
let event: any
|
||||
try {
|
||||
event = JSON.parse(dataStr)
|
||||
} catch {
|
||||
continue
|
||||
}
|
||||
|
||||
if (event.type === 'meta') {
|
||||
streamLayout = event.layout || 'generic'
|
||||
streamGridShape = event.grid_shape || null
|
||||
// Show partial grid result so UI renders structure
|
||||
setGridResult(prev => ({
|
||||
...prev,
|
||||
layout: event.layout || 'generic',
|
||||
grid_shape: event.grid_shape,
|
||||
columns_used: [],
|
||||
cells: [],
|
||||
summary: { total_cells: event.grid_shape?.total_cells || 0, non_empty_cells: 0, low_confidence: 0 },
|
||||
duration_seconds: 0,
|
||||
ocr_engine: '',
|
||||
} as GridResult))
|
||||
}
|
||||
|
||||
if (event.type === 'columns') {
|
||||
streamColumnsUsed = event.columns_used || []
|
||||
setGridResult(prev => prev ? { ...prev, columns_used: streamColumnsUsed } : prev)
|
||||
}
|
||||
|
||||
if (event.type === 'cell') {
|
||||
const cell: GridCell = { ...event.cell, status: 'pending' }
|
||||
streamCells = [...streamCells, cell]
|
||||
setEditedCells(streamCells)
|
||||
setStreamProgress(event.progress)
|
||||
// Auto-scroll table to bottom
|
||||
setTimeout(() => tableEndRef.current?.scrollIntoView({ behavior: 'smooth', block: 'nearest' }), 16)
|
||||
}
|
||||
|
||||
if (event.type === 'complete') {
|
||||
// Build final GridResult
|
||||
const finalResult: GridResult = {
|
||||
cells: streamCells,
|
||||
grid_shape: streamGridShape || { rows: 0, cols: 0, total_cells: streamCells.length },
|
||||
columns_used: streamColumnsUsed,
|
||||
layout: streamLayout || 'generic',
|
||||
image_width: 0,
|
||||
image_height: 0,
|
||||
duration_seconds: event.duration_seconds || 0,
|
||||
ocr_engine: event.ocr_engine || '',
|
||||
summary: event.summary || {},
|
||||
}
|
||||
|
||||
// If vocab: apply post-processed entries from complete event
|
||||
if (event.vocab_entries) {
|
||||
finalResult.entries = event.vocab_entries
|
||||
finalResult.vocab_entries = event.vocab_entries
|
||||
finalResult.entry_count = event.vocab_entries.length
|
||||
}
|
||||
|
||||
applyGridResult(finalResult)
|
||||
setUsedEngine(event.ocr_engine || '')
|
||||
setStreamProgress(null)
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
|
||||
} finally {
|
||||
@@ -288,11 +383,23 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Loading */}
|
||||
{/* Loading with streaming progress */}
|
||||
{detecting && (
|
||||
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
|
||||
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
|
||||
Worterkennung laeuft...
|
||||
<div className="space-y-1">
|
||||
<div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
|
||||
<div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
|
||||
{streamProgress
|
||||
? `Zelle ${streamProgress.current}/${streamProgress.total} erkannt...`
|
||||
: 'Worterkennung startet...'}
|
||||
</div>
|
||||
{streamProgress && streamProgress.total > 0 && (
|
||||
<div className="w-full bg-gray-200 dark:bg-gray-700 rounded-full h-1.5">
|
||||
<div
|
||||
className="bg-teal-500 h-1.5 rounded-full transition-all duration-150"
|
||||
style={{ width: `${(streamProgress.current / streamProgress.total) * 100}%` }}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
@@ -378,8 +485,8 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Result summary */}
|
||||
{gridResult && summary && (
|
||||
{/* Result summary (only after streaming completes) */}
|
||||
{gridResult && summary && !detecting && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
|
||||
<div className="flex items-center justify-between">
|
||||
<h4 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
@@ -511,6 +618,67 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
|
||||
</tbody>
|
||||
</table>
|
||||
)}
|
||||
<div ref={tableEndRef} />
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Streaming cell table (shown while detecting, before complete) */}
|
||||
{detecting && editedCells.length > 0 && !gridResult?.summary?.non_empty_cells && (
|
||||
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
|
||||
<h4 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
Live: {editedCells.length} Zellen erkannt...
|
||||
</h4>
|
||||
<div className="max-h-80 overflow-y-auto">
|
||||
<table className="w-full text-xs">
|
||||
<thead className="sticky top-0 bg-white dark:bg-gray-800">
|
||||
<tr className="text-left text-gray-500 dark:text-gray-400 border-b dark:border-gray-700">
|
||||
<th className="py-1 pr-2 w-12">Zelle</th>
|
||||
{columnsUsed.map((col, i) => (
|
||||
<th key={i} className={`py-1 pr-2 ${colTypeColor(col.type)}`}>
|
||||
{colTypeLabel(col.type)}
|
||||
</th>
|
||||
))}
|
||||
<th className="py-1 w-12 text-right">Conf</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{(() => {
|
||||
const liveByRow: Map<number, GridCell[]> = new Map()
|
||||
for (const cell of editedCells) {
|
||||
const existing = liveByRow.get(cell.row_index) || []
|
||||
existing.push(cell)
|
||||
liveByRow.set(cell.row_index, existing)
|
||||
}
|
||||
const liveSorted = [...liveByRow.keys()].sort((a, b) => a - b)
|
||||
return liveSorted.map(rowIdx => {
|
||||
const rowCells = liveByRow.get(rowIdx) || []
|
||||
const avgConf = rowCells.length
|
||||
? Math.round(rowCells.reduce((s, c) => s + c.confidence, 0) / rowCells.length)
|
||||
: 0
|
||||
return (
|
||||
<tr key={rowIdx} className="border-b dark:border-gray-700/50 animate-fade-in">
|
||||
<td className="py-1 pr-2 text-gray-400 font-mono text-[10px]">
|
||||
R{String(rowIdx).padStart(2, '0')}
|
||||
</td>
|
||||
{columnsUsed.map((col) => {
|
||||
const cell = rowCells.find(c => c.col_index === col.index)
|
||||
return (
|
||||
<td key={col.index} className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300">
|
||||
<MultilineText text={cell?.text || ''} />
|
||||
</td>
|
||||
)
|
||||
})}
|
||||
<td className={`py-1 text-right font-mono ${confColor(avgConf)}`}>
|
||||
{avgConf}%
|
||||
</td>
|
||||
</tr>
|
||||
)
|
||||
})
|
||||
})()}
|
||||
</tbody>
|
||||
</table>
|
||||
<div ref={tableEndRef} />
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
Reference in New Issue
Block a user