feat(ocr-pipeline): add LLM-based OCR correction step (Step 6)

Replace the placeholder "Koordinaten" step with an LLM review step that
sends vocab entries to qwen3:30b-a3b via Ollama for OCR error correction
(e.g. "8en" → "Ben"). Teachers can review, accept/reject individual
corrections in a diff table before applying them.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-02 11:13:17 +01:00
parent e9f368d3ec
commit 938d1d69cf
5 changed files with 586 additions and 5 deletions

View File

@@ -0,0 +1,345 @@
'use client'
import { useCallback, useState } from 'react'
const KLAUSUR_API = '/klausur-api'
interface LlmChange {
row_index: number
field: 'english' | 'german' | 'example'
old: string
new: string
}
interface LlmReviewResult {
changes: LlmChange[]
model_used: string
duration_ms: number
total_entries: number
corrections_found: number
}
interface StepLlmReviewProps {
sessionId: string | null
onNext: () => void
}
const FIELD_LABELS: Record<string, string> = {
english: 'EN',
german: 'DE',
example: 'Beispiel',
}
export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
const [status, setStatus] = useState<'idle' | 'running' | 'done' | 'error' | 'applied'>('idle')
const [result, setResult] = useState<LlmReviewResult | null>(null)
const [error, setError] = useState<string>('')
const [accepted, setAccepted] = useState<Set<number>>(new Set())
const [applying, setApplying] = useState(false)
const runReview = useCallback(async () => {
if (!sessionId) return
setStatus('running')
setError('')
setResult(null)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/llm-review`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({}),
})
if (!res.ok) {
const data = await res.json().catch(() => ({}))
throw new Error(data.detail || `HTTP ${res.status}`)
}
const data: LlmReviewResult = await res.json()
setResult(data)
// Accept all changes by default
setAccepted(new Set(data.changes.map((_, i) => i)))
setStatus('done')
} catch (e: unknown) {
const msg = e instanceof Error ? e.message : String(e)
setError(msg)
setStatus('error')
}
}, [sessionId])
const toggleChange = (index: number) => {
setAccepted((prev) => {
const next = new Set(prev)
if (next.has(index)) next.delete(index)
else next.add(index)
return next
})
}
const toggleAll = () => {
if (!result) return
if (accepted.size === result.changes.length) {
setAccepted(new Set())
} else {
setAccepted(new Set(result.changes.map((_, i) => i)))
}
}
const applyChanges = useCallback(async () => {
if (!sessionId || !result) return
setApplying(true)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/llm-review/apply`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ accepted_indices: Array.from(accepted) }),
})
if (!res.ok) {
const data = await res.json().catch(() => ({}))
throw new Error(data.detail || `HTTP ${res.status}`)
}
setStatus('applied')
} catch (e: unknown) {
const msg = e instanceof Error ? e.message : String(e)
setError(msg)
} finally {
setApplying(false)
}
}, [sessionId, result, accepted])
if (!sessionId) {
return (
<div className="text-center py-12 text-gray-400">
Bitte zuerst eine Session auswaehlen.
</div>
)
}
// --- Idle state ---
if (status === 'idle') {
return (
<div className="flex flex-col items-center justify-center py-12 text-center">
<div className="text-5xl mb-4">🤖</div>
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
Schritt 6: LLM-Korrektur
</h3>
<p className="text-gray-500 dark:text-gray-400 max-w-lg mb-2">
Ein lokales Sprachmodell prueft die OCR-Ergebnisse auf typische Erkennungsfehler
(z.B. &quot;8en&quot; statt &quot;Ben&quot;) und schlaegt Korrekturen vor.
</p>
<p className="text-xs text-gray-400 dark:text-gray-500 mb-6">
Modell: <span className="font-mono">qwen3:30b-a3b</span> via Ollama (lokal)
</p>
<button
onClick={runReview}
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium"
>
LLM-Korrektur starten
</button>
</div>
)
}
// --- Running state ---
if (status === 'running') {
return (
<div className="flex flex-col items-center justify-center py-16 text-center">
<div className="animate-spin rounded-full h-10 w-10 border-b-2 border-teal-500 mb-4" />
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-1">
Korrektur laeuft...
</h3>
<p className="text-sm text-gray-400">
<span className="font-mono">qwen3:30b-a3b</span> prueft die Vokabeleintraege
</p>
</div>
)
}
// --- Error state ---
if (status === 'error') {
return (
<div className="flex flex-col items-center justify-center py-12 text-center">
<div className="text-5xl mb-4"></div>
<h3 className="text-lg font-medium text-red-600 dark:text-red-400 mb-2">
Fehler bei LLM-Korrektur
</h3>
<p className="text-sm text-gray-500 dark:text-gray-400 max-w-lg mb-4">
{error}
</p>
<div className="flex gap-3">
<button
onClick={runReview}
className="px-5 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm"
>
Erneut versuchen
</button>
<button
onClick={onNext}
className="px-5 py-2 bg-gray-200 dark:bg-gray-700 text-gray-700 dark:text-gray-300 rounded-lg hover:bg-gray-300 dark:hover:bg-gray-600 transition-colors text-sm"
>
Ueberspringen
</button>
</div>
</div>
)
}
// --- Applied state ---
if (status === 'applied') {
return (
<div className="flex flex-col items-center justify-center py-12 text-center">
<div className="text-5xl mb-4"></div>
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
Korrekturen uebernommen
</h3>
<p className="text-sm text-gray-500 dark:text-gray-400 mb-6">
{accepted.size} von {result?.changes.length ?? 0} Korrekturen wurden angewendet.
</p>
<button
onClick={onNext}
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium"
>
Weiter
</button>
</div>
)
}
// --- Done state: show diff table ---
const changes = result?.changes ?? []
if (changes.length === 0) {
return (
<div className="flex flex-col items-center justify-center py-12 text-center">
<div className="text-5xl mb-4">👍</div>
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
Keine Korrekturen noetig
</h3>
<p className="text-sm text-gray-500 dark:text-gray-400 mb-1">
Das LLM hat keine OCR-Fehler gefunden.
</p>
<p className="text-xs text-gray-400 dark:text-gray-500 mb-6">
{result?.total_entries} Eintraege geprueft in {result?.duration_ms}ms
({result?.model_used})
</p>
<button
onClick={onNext}
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium"
>
Weiter
</button>
</div>
)
}
return (
<div className="space-y-4">
{/* Header */}
<div className="flex items-center justify-between">
<div>
<h3 className="text-base font-medium text-gray-700 dark:text-gray-300">
LLM-Korrekturvorschlaege
</h3>
<p className="text-xs text-gray-400 mt-0.5">
{changes.length} Korrektur{changes.length !== 1 ? 'en' : ''} gefunden
· {result?.duration_ms}ms · {result?.model_used}
</p>
</div>
<div className="flex items-center gap-2">
<button
onClick={toggleAll}
className="text-xs px-3 py-1.5 border border-gray-300 dark:border-gray-600 rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 transition-colors text-gray-600 dark:text-gray-400"
>
{accepted.size === changes.length ? 'Keine' : 'Alle'} auswaehlen
</button>
</div>
</div>
{/* Diff table */}
<div className="border border-gray-200 dark:border-gray-700 rounded-lg overflow-hidden">
<table className="w-full text-sm">
<thead>
<tr className="bg-gray-50 dark:bg-gray-800 border-b border-gray-200 dark:border-gray-700">
<th className="w-10 px-3 py-2 text-center">
<input
type="checkbox"
checked={accepted.size === changes.length}
onChange={toggleAll}
className="rounded border-gray-300 dark:border-gray-600"
/>
</th>
<th className="px-3 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">Zeile</th>
<th className="px-3 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">Feld</th>
<th className="px-3 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">Vorher</th>
<th className="px-3 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">Nachher</th>
</tr>
</thead>
<tbody>
{changes.map((change, idx) => (
<tr
key={idx}
className={`border-b border-gray-100 dark:border-gray-700/50 ${
accepted.has(idx)
? 'bg-teal-50/50 dark:bg-teal-900/10'
: 'bg-white dark:bg-gray-800/50'
}`}
>
<td className="px-3 py-2 text-center">
<input
type="checkbox"
checked={accepted.has(idx)}
onChange={() => toggleChange(idx)}
className="rounded border-gray-300 dark:border-gray-600"
/>
</td>
<td className="px-3 py-2 text-gray-500 dark:text-gray-400 font-mono text-xs">
R{change.row_index}
</td>
<td className="px-3 py-2">
<span className="text-xs px-1.5 py-0.5 rounded bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-400">
{FIELD_LABELS[change.field] || change.field}
</span>
</td>
<td className="px-3 py-2">
<span className="line-through text-red-500 dark:text-red-400">
{change.old}
</span>
</td>
<td className="px-3 py-2">
<span className="text-green-600 dark:text-green-400 font-medium">
{change.new}
</span>
</td>
</tr>
))}
</tbody>
</table>
</div>
{/* Actions */}
<div className="flex items-center justify-between pt-2">
<p className="text-xs text-gray-400">
{accepted.size} von {changes.length} ausgewaehlt
</p>
<div className="flex gap-3">
<button
onClick={onNext}
className="px-4 py-2 text-sm border border-gray-300 dark:border-gray-600 rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 transition-colors text-gray-600 dark:text-gray-400"
>
Alle ablehnen
</button>
<button
onClick={applyChanges}
disabled={applying || accepted.size === 0}
className="px-5 py-2 text-sm bg-teal-600 text-white rounded-lg hover:bg-teal-700 disabled:opacity-50 disabled:cursor-not-allowed transition-colors font-medium"
>
{applying ? 'Wird uebernommen...' : `${accepted.size} Korrektur${accepted.size !== 1 ? 'en' : ''} uebernehmen`}
</button>
</div>
</div>
</div>
)
}