feat(ocr-pipeline): add SSE streaming for word recognition (Step 5)

Cells now appear one-by-one in the UI as they are OCR'd, with a live progress bar, instead of waiting for the full result. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-01 17:54:20 +01:00
parent a666e883da
commit 7f27783008
3 changed files with 506 additions and 93 deletions
@@ -62,7 +62,11 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
  const [usedEngine, setUsedEngine] = useState<string>('')
  const [pronunciation, setPronunciation] = useState<'british' | 'american'>('british')

+  // Streaming progress state
+  const [streamProgress, setStreamProgress] = useState<{ current: number; total: number } | null>(null)
+
  const enRef = useRef<HTMLInputElement>(null)
+  const tableEndRef = useRef<HTMLDivElement>(null)

  const isVocab = gridResult?.layout === 'vocab'

@@ -110,16 +114,107 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
    const eng = engine || ocrEngine
    setDetecting(true)
    setError(null)
+    setStreamProgress(null)
+    setEditedCells([])
+    setEditedEntries([])
+    setGridResult(null)
+
    try {
-      const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/words?engine=${eng}&pronunciation=${pronunciation}`, {
-        method: 'POST',
-      })
+      const res = await fetch(
+        `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/words?stream=true&engine=${eng}&pronunciation=${pronunciation}`,
+        { method: 'POST' },
+      )
      if (!res.ok) {
        const err = await res.json().catch(() => ({ detail: res.statusText }))
        throw new Error(err.detail || 'Worterkennung fehlgeschlagen')
      }
-      const data = await res.json()
-      applyGridResult(data)
+
+      const reader = res.body!.getReader()
+      const decoder = new TextDecoder()
+      let buffer = ''
+      let streamLayout: string | null = null
+      let streamColumnsUsed: GridResult['columns_used'] = []
+      let streamGridShape: GridResult['grid_shape'] | null = null
+      let streamCells: GridCell[] = []
+
+      while (true) {
+        const { done, value } = await reader.read()
+        if (done) break
+        buffer += decoder.decode(value, { stream: true })
+
+        // Parse SSE events (separated by \n\n)
+        while (buffer.includes('\n\n')) {
+          const idx = buffer.indexOf('\n\n')
+          const chunk = buffer.slice(0, idx).trim()
+          buffer = buffer.slice(idx + 2)
+
+          if (!chunk.startsWith('data: ')) continue
+          const dataStr = chunk.slice(6) // strip "data: "
+
+          let event: any
+          try {
+            event = JSON.parse(dataStr)
+          } catch {
+            continue
+          }
+
+          if (event.type === 'meta') {
+            streamLayout = event.layout || 'generic'
+            streamGridShape = event.grid_shape || null
+            // Show partial grid result so UI renders structure
+            setGridResult(prev => ({
+              ...prev,
+              layout: event.layout || 'generic',
+              grid_shape: event.grid_shape,
+              columns_used: [],
+              cells: [],
+              summary: { total_cells: event.grid_shape?.total_cells || 0, non_empty_cells: 0, low_confidence: 0 },
+              duration_seconds: 0,
+              ocr_engine: '',
+            } as GridResult))
+          }
+
+          if (event.type === 'columns') {
+            streamColumnsUsed = event.columns_used || []
+            setGridResult(prev => prev ? { ...prev, columns_used: streamColumnsUsed } : prev)
+          }
+
+          if (event.type === 'cell') {
+            const cell: GridCell = { ...event.cell, status: 'pending' }
+            streamCells = [...streamCells, cell]
+            setEditedCells(streamCells)
+            setStreamProgress(event.progress)
+            // Auto-scroll table to bottom
+            setTimeout(() => tableEndRef.current?.scrollIntoView({ behavior: 'smooth', block: 'nearest' }), 16)
+          }
+
+          if (event.type === 'complete') {
+            // Build final GridResult
+            const finalResult: GridResult = {
+              cells: streamCells,
+              grid_shape: streamGridShape || { rows: 0, cols: 0, total_cells: streamCells.length },
+              columns_used: streamColumnsUsed,
+              layout: streamLayout || 'generic',
+              image_width: 0,
+              image_height: 0,
+              duration_seconds: event.duration_seconds || 0,
+              ocr_engine: event.ocr_engine || '',
+              summary: event.summary || {},
+            }
+
+            // If vocab: apply post-processed entries from complete event
+            if (event.vocab_entries) {
+              finalResult.entries = event.vocab_entries
+              finalResult.vocab_entries = event.vocab_entries
+              finalResult.entry_count = event.vocab_entries.length
+            }
+
+            applyGridResult(finalResult)
+            setUsedEngine(event.ocr_engine || '')
+            setStreamProgress(null)
+          }
+        }
+      }
    } catch (e) {
      setError(e instanceof Error ? e.message : 'Unbekannter Fehler')
    } finally {
@@ -288,11 +383,23 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec

  return (
    <div className="space-y-4">
-      {/* Loading */}
+      {/* Loading with streaming progress */}
      {detecting && (
-        <div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
-          <div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
-          Worterkennung laeuft...
+        <div className="space-y-1">
+          <div className="flex items-center gap-2 text-teal-600 dark:text-teal-400 text-sm">
+            <div className="animate-spin w-4 h-4 border-2 border-teal-500 border-t-transparent rounded-full" />
+            {streamProgress
+              ? `Zelle ${streamProgress.current}/${streamProgress.total} erkannt...`
+              : 'Worterkennung startet...'}
+          </div>
+          {streamProgress && streamProgress.total > 0 && (
+            <div className="w-full bg-gray-200 dark:bg-gray-700 rounded-full h-1.5">
+              <div
+                className="bg-teal-500 h-1.5 rounded-full transition-all duration-150"
+                style={{ width: `${(streamProgress.current / streamProgress.total) * 100}%` }}
+              />
+            </div>
+          )}
        </div>
      )}

@@ -378,8 +485,8 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
            </div>
          </div>

-          {/* Result summary */}
-          {gridResult && summary && (
+          {/* Result summary (only after streaming completes) */}
+          {gridResult && summary && !detecting && (
            <div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
              <div className="flex items-center justify-between">
                <h4 className="text-sm font-medium text-gray-700 dark:text-gray-300">
@@ -511,6 +618,67 @@ export function StepWordRecognition({ sessionId, onNext, goToStep }: StepWordRec
                    </tbody>
                  </table>
                )}
+                <div ref={tableEndRef} />
+              </div>
+            </div>
+          )}
+
+          {/* Streaming cell table (shown while detecting, before complete) */}
+          {detecting && editedCells.length > 0 && !gridResult?.summary?.non_empty_cells && (
+            <div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-3">
+              <h4 className="text-sm font-medium text-gray-700 dark:text-gray-300">
+                Live: {editedCells.length} Zellen erkannt...
+              </h4>
+              <div className="max-h-80 overflow-y-auto">
+                <table className="w-full text-xs">
+                  <thead className="sticky top-0 bg-white dark:bg-gray-800">
+                    <tr className="text-left text-gray-500 dark:text-gray-400 border-b dark:border-gray-700">
+                      <th className="py-1 pr-2 w-12">Zelle</th>
+                      {columnsUsed.map((col, i) => (
+                        <th key={i} className={`py-1 pr-2 ${colTypeColor(col.type)}`}>
+                          {colTypeLabel(col.type)}
+                        </th>
+                      ))}
+                      <th className="py-1 w-12 text-right">Conf</th>
+                    </tr>
+                  </thead>
+                  <tbody>
+                    {(() => {
+                      const liveByRow: Map<number, GridCell[]> = new Map()
+                      for (const cell of editedCells) {
+                        const existing = liveByRow.get(cell.row_index) || []
+                        existing.push(cell)
+                        liveByRow.set(cell.row_index, existing)
+                      }
+                      const liveSorted = [...liveByRow.keys()].sort((a, b) => a - b)
+                      return liveSorted.map(rowIdx => {
+                        const rowCells = liveByRow.get(rowIdx) || []
+                        const avgConf = rowCells.length
+                          ? Math.round(rowCells.reduce((s, c) => s + c.confidence, 0) / rowCells.length)
+                          : 0
+                        return (
+                          <tr key={rowIdx} className="border-b dark:border-gray-700/50 animate-fade-in">
+                            <td className="py-1 pr-2 text-gray-400 font-mono text-[10px]">
+                              R{String(rowIdx).padStart(2, '0')}
+                            </td>
+                            {columnsUsed.map((col) => {
+                              const cell = rowCells.find(c => c.col_index === col.index)
+                              return (
+                                <td key={col.index} className="py-1 pr-2 font-mono text-gray-700 dark:text-gray-300">
+                                  <MultilineText text={cell?.text || ''} />
+                                </td>
+                              )
+                            })}
+                            <td className={`py-1 text-right font-mono ${confColor(avgConf)}`}>
+                              {avgConf}%
+                            </td>
+                          </tr>
+                        )
+                      })
+                    })()}
+                  </tbody>
+                </table>
+                <div ref={tableEndRef} />
              </div>
            </div>
          )}