Files
breakpilot-lehrer/admin-lehrer/components/ocr-pipeline/StepLlmReview.tsx
Benjamin Admin 2a493890b6 feat(ocr-pipeline): add SSE streaming and phonetic filter to LLM review
- Stream LLM review results batch-by-batch (8 entries per batch) via SSE
- Frontend shows live progress bar, batch log, and corrections appearing
- Skip entries with IPA phonetic transcriptions (already dictionary-corrected)
- Refactor llm_review_entries into reusable helpers for both streaming and non-streaming paths

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-02 11:46:06 +01:00

402 lines
16 KiB
TypeScript
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
'use client'
import { useCallback, useRef, useState } from 'react'
const KLAUSUR_API = '/klausur-api'
interface LlmChange {
row_index: number
field: 'english' | 'german' | 'example'
old: string
new: string
}
interface StepLlmReviewProps {
sessionId: string | null
onNext: () => void
}
interface ReviewMeta {
total_entries: number
to_review: number
skipped: number
model: string
}
interface StreamProgress {
current: number
total: number
}
const FIELD_LABELS: Record<string, string> = {
english: 'EN',
german: 'DE',
example: 'Beispiel',
}
export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
const [status, setStatus] = useState<'idle' | 'running' | 'done' | 'error' | 'applied'>('idle')
const [meta, setMeta] = useState<ReviewMeta | null>(null)
const [changes, setChanges] = useState<LlmChange[]>([])
const [progress, setProgress] = useState<StreamProgress | null>(null)
const [batchLog, setBatchLog] = useState<string[]>([])
const [totalDuration, setTotalDuration] = useState(0)
const [error, setError] = useState('')
const [accepted, setAccepted] = useState<Set<number>>(new Set())
const [applying, setApplying] = useState(false)
const tableEndRef = useRef<HTMLDivElement>(null)
const runReview = useCallback(async () => {
if (!sessionId) return
setStatus('running')
setError('')
setChanges([])
setBatchLog([])
setProgress(null)
setMeta(null)
setTotalDuration(0)
try {
const res = await fetch(
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/llm-review?stream=true`,
{ method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({}) },
)
if (!res.ok) {
const data = await res.json().catch(() => ({}))
throw new Error(data.detail || `HTTP ${res.status}`)
}
const reader = res.body!.getReader()
const decoder = new TextDecoder()
let buffer = ''
let allChanges: LlmChange[] = []
while (true) {
const { done, value } = await reader.read()
if (done) break
buffer += decoder.decode(value, { stream: true })
while (buffer.includes('\n\n')) {
const idx = buffer.indexOf('\n\n')
const chunk = buffer.slice(0, idx).trim()
buffer = buffer.slice(idx + 2)
if (!chunk.startsWith('data: ')) continue
const dataStr = chunk.slice(6)
let event: any
try { event = JSON.parse(dataStr) } catch { continue }
if (event.type === 'meta') {
setMeta({
total_entries: event.total_entries,
to_review: event.to_review,
skipped: event.skipped,
model: event.model,
})
setBatchLog([`${event.total_entries} Eintraege, ${event.skipped} uebersprungen (Lautschrift), ${event.to_review} zu pruefen`])
}
if (event.type === 'batch') {
const batchChanges: LlmChange[] = event.changes || []
allChanges = [...allChanges, ...batchChanges]
setChanges(allChanges)
setProgress(event.progress)
const rows = (event.entries_reviewed || []).map((r: number) => `R${r}`).join(', ')
setBatchLog(prev => [...prev,
`Batch ${event.batch_index + 1}: ${rows}${batchChanges.length} Korrektur${batchChanges.length !== 1 ? 'en' : ''} (${event.duration_ms}ms)`
])
setTimeout(() => tableEndRef.current?.scrollIntoView({ behavior: 'smooth', block: 'nearest' }), 16)
}
if (event.type === 'complete') {
setTotalDuration(event.duration_ms)
setAccepted(new Set(allChanges.map((_, i) => i)))
setStatus('done')
}
if (event.type === 'error') {
throw new Error(event.detail || 'Unbekannter Fehler')
}
}
}
// If no complete event was received (e.g. 0 entries to review)
if (allChanges.length === 0 && status !== 'done') {
setStatus('done')
}
} catch (e: unknown) {
const msg = e instanceof Error ? e.message : String(e)
setError(msg)
setStatus('error')
}
}, [sessionId])
const toggleChange = (index: number) => {
setAccepted(prev => {
const next = new Set(prev)
if (next.has(index)) next.delete(index)
else next.add(index)
return next
})
}
const toggleAll = () => {
if (accepted.size === changes.length) {
setAccepted(new Set())
} else {
setAccepted(new Set(changes.map((_, i) => i)))
}
}
const applyChanges = useCallback(async () => {
if (!sessionId) return
setApplying(true)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/llm-review/apply`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ accepted_indices: Array.from(accepted) }),
})
if (!res.ok) {
const data = await res.json().catch(() => ({}))
throw new Error(data.detail || `HTTP ${res.status}`)
}
setStatus('applied')
} catch (e: unknown) {
setError(e instanceof Error ? e.message : String(e))
} finally {
setApplying(false)
}
}, [sessionId, accepted])
if (!sessionId) {
return <div className="text-center py-12 text-gray-400">Bitte zuerst eine Session auswaehlen.</div>
}
// --- Idle ---
if (status === 'idle') {
return (
<div className="flex flex-col items-center justify-center py-12 text-center">
<div className="text-5xl mb-4">🤖</div>
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
Schritt 6: LLM-Korrektur
</h3>
<p className="text-gray-500 dark:text-gray-400 max-w-lg mb-2">
Ein lokales Sprachmodell prueft die OCR-Ergebnisse auf typische Erkennungsfehler.
Eintraege mit Lautschrift werden automatisch uebersprungen.
</p>
<p className="text-xs text-gray-400 dark:text-gray-500 mb-6">
Modell: <span className="font-mono">qwen3:30b-a3b</span> via Ollama (lokal)
</p>
<button onClick={runReview}
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium">
LLM-Korrektur starten
</button>
</div>
)
}
// --- Running (with live progress) ---
if (status === 'running') {
const pct = progress ? Math.round((progress.current / progress.total) * 100) : 0
return (
<div className="space-y-4">
<div className="flex items-center gap-3">
<div className="animate-spin rounded-full h-5 w-5 border-b-2 border-teal-500" />
<h3 className="text-base font-medium text-gray-700 dark:text-gray-300">
LLM-Korrektur laeuft...
</h3>
{meta && (
<span className="text-xs text-gray-400 font-mono">{meta.model}</span>
)}
</div>
{/* Progress bar */}
{progress && (
<div className="space-y-1">
<div className="flex justify-between text-xs text-gray-400">
<span>{progress.current} / {progress.total} Eintraege geprueft</span>
<span>{pct}%</span>
</div>
<div className="w-full bg-gray-200 dark:bg-gray-700 rounded-full h-2">
<div className="bg-teal-500 h-2 rounded-full transition-all duration-500" style={{ width: `${pct}%` }} />
</div>
</div>
)}
{/* Live batch log */}
<div className="bg-gray-50 dark:bg-gray-800/50 rounded-lg p-3 max-h-40 overflow-y-auto">
{batchLog.map((line, i) => (
<div key={i} className="text-xs text-gray-500 dark:text-gray-400 font-mono py-0.5">{line}</div>
))}
</div>
{/* Live changes appearing */}
{changes.length > 0 && (
<div className="border border-gray-200 dark:border-gray-700 rounded-lg overflow-hidden">
<table className="w-full text-sm">
<thead>
<tr className="bg-gray-50 dark:bg-gray-800 border-b border-gray-200 dark:border-gray-700">
<th className="px-3 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">Zeile</th>
<th className="px-3 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">Feld</th>
<th className="px-3 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">Vorher</th>
<th className="px-3 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">Nachher</th>
</tr>
</thead>
<tbody>
{changes.map((change, idx) => (
<tr key={idx} className="border-b border-gray-100 dark:border-gray-700/50 bg-teal-50/50 dark:bg-teal-900/10">
<td className="px-3 py-1.5 text-gray-500 dark:text-gray-400 font-mono text-xs">R{change.row_index}</td>
<td className="px-3 py-1.5">
<span className="text-xs px-1.5 py-0.5 rounded bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-400">
{FIELD_LABELS[change.field] || change.field}
</span>
</td>
<td className="px-3 py-1.5"><span className="line-through text-red-500 dark:text-red-400">{change.old}</span></td>
<td className="px-3 py-1.5"><span className="text-green-600 dark:text-green-400 font-medium">{change.new}</span></td>
</tr>
))}
</tbody>
</table>
<div ref={tableEndRef} />
</div>
)}
</div>
)
}
// --- Error ---
if (status === 'error') {
return (
<div className="flex flex-col items-center justify-center py-12 text-center">
<div className="text-5xl mb-4"></div>
<h3 className="text-lg font-medium text-red-600 dark:text-red-400 mb-2">Fehler bei LLM-Korrektur</h3>
<p className="text-sm text-gray-500 dark:text-gray-400 max-w-lg mb-4">{error}</p>
<div className="flex gap-3">
<button onClick={runReview}
className="px-5 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm">
Erneut versuchen
</button>
<button onClick={onNext}
className="px-5 py-2 bg-gray-200 dark:bg-gray-700 text-gray-700 dark:text-gray-300 rounded-lg hover:bg-gray-300 dark:hover:bg-gray-600 transition-colors text-sm">
Ueberspringen
</button>
</div>
</div>
)
}
// --- Applied ---
if (status === 'applied') {
return (
<div className="flex flex-col items-center justify-center py-12 text-center">
<div className="text-5xl mb-4"></div>
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">Korrekturen uebernommen</h3>
<p className="text-sm text-gray-500 dark:text-gray-400 mb-6">
{accepted.size} von {changes.length} Korrekturen wurden angewendet.
</p>
<button onClick={onNext}
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium">
Weiter
</button>
</div>
)
}
// --- Done: diff table with checkboxes ---
if (changes.length === 0) {
return (
<div className="flex flex-col items-center justify-center py-12 text-center">
<div className="text-5xl mb-4">👍</div>
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">Keine Korrekturen noetig</h3>
<p className="text-sm text-gray-500 dark:text-gray-400 mb-1">Das LLM hat keine OCR-Fehler gefunden.</p>
{meta && (
<p className="text-xs text-gray-400 dark:text-gray-500 mb-6">
{meta.to_review} geprueft, {meta.skipped} uebersprungen · {totalDuration}ms · {meta.model}
</p>
)}
<button onClick={onNext}
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium">
Weiter
</button>
</div>
)
}
return (
<div className="space-y-4">
{/* Header */}
<div className="flex items-center justify-between">
<div>
<h3 className="text-base font-medium text-gray-700 dark:text-gray-300">LLM-Korrekturvorschlaege</h3>
<p className="text-xs text-gray-400 mt-0.5">
{changes.length} Korrektur{changes.length !== 1 ? 'en' : ''} gefunden
{meta && <> · {meta.skipped} uebersprungen (Lautschrift)</>}
{' '}· {totalDuration}ms · {meta?.model}
</p>
</div>
<button onClick={toggleAll}
className="text-xs px-3 py-1.5 border border-gray-300 dark:border-gray-600 rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 transition-colors text-gray-600 dark:text-gray-400">
{accepted.size === changes.length ? 'Keine' : 'Alle'} auswaehlen
</button>
</div>
{/* Diff table */}
<div className="border border-gray-200 dark:border-gray-700 rounded-lg overflow-hidden">
<table className="w-full text-sm">
<thead>
<tr className="bg-gray-50 dark:bg-gray-800 border-b border-gray-200 dark:border-gray-700">
<th className="w-10 px-3 py-2 text-center">
<input type="checkbox" checked={accepted.size === changes.length} onChange={toggleAll}
className="rounded border-gray-300 dark:border-gray-600" />
</th>
<th className="px-3 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">Zeile</th>
<th className="px-3 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">Feld</th>
<th className="px-3 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">Vorher</th>
<th className="px-3 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">Nachher</th>
</tr>
</thead>
<tbody>
{changes.map((change, idx) => (
<tr key={idx} className={`border-b border-gray-100 dark:border-gray-700/50 ${
accepted.has(idx) ? 'bg-teal-50/50 dark:bg-teal-900/10' : 'bg-white dark:bg-gray-800/50'
}`}>
<td className="px-3 py-2 text-center">
<input type="checkbox" checked={accepted.has(idx)} onChange={() => toggleChange(idx)}
className="rounded border-gray-300 dark:border-gray-600" />
</td>
<td className="px-3 py-2 text-gray-500 dark:text-gray-400 font-mono text-xs">R{change.row_index}</td>
<td className="px-3 py-2">
<span className="text-xs px-1.5 py-0.5 rounded bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-400">
{FIELD_LABELS[change.field] || change.field}
</span>
</td>
<td className="px-3 py-2"><span className="line-through text-red-500 dark:text-red-400">{change.old}</span></td>
<td className="px-3 py-2"><span className="text-green-600 dark:text-green-400 font-medium">{change.new}</span></td>
</tr>
))}
</tbody>
</table>
</div>
{/* Actions */}
<div className="flex items-center justify-between pt-2">
<p className="text-xs text-gray-400">{accepted.size} von {changes.length} ausgewaehlt</p>
<div className="flex gap-3">
<button onClick={onNext}
className="px-4 py-2 text-sm border border-gray-300 dark:border-gray-600 rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 transition-colors text-gray-600 dark:text-gray-400">
Alle ablehnen
</button>
<button onClick={applyChanges} disabled={applying || accepted.size === 0}
className="px-5 py-2 text-sm bg-teal-600 text-white rounded-lg hover:bg-teal-700 disabled:opacity-50 disabled:cursor-not-allowed transition-colors font-medium">
{applying ? 'Wird uebernommen...' : `${accepted.size} Korrektur${accepted.size !== 1 ? 'en' : ''} uebernehmen`}
</button>
</div>
</div>
</div>
)
}