feat(ocr-pipeline): add LLM-based OCR correction step (Step 6)
Replace the placeholder "Koordinaten" step with an LLM review step that sends vocab entries to qwen3:30b-a3b via Ollama for OCR error correction (e.g. "8en" → "Ben"). Teachers can review, accept/reject individual corrections in a diff table before applying them. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -8,7 +8,7 @@ import { StepDewarp } from '@/components/ocr-pipeline/StepDewarp'
|
||||
import { StepColumnDetection } from '@/components/ocr-pipeline/StepColumnDetection'
|
||||
import { StepRowDetection } from '@/components/ocr-pipeline/StepRowDetection'
|
||||
import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
|
||||
import { StepCoordinates } from '@/components/ocr-pipeline/StepCoordinates'
|
||||
import { StepLlmReview } from '@/components/ocr-pipeline/StepLlmReview'
|
||||
import { StepReconstruction } from '@/components/ocr-pipeline/StepReconstruction'
|
||||
import { StepGroundTruth } from '@/components/ocr-pipeline/StepGroundTruth'
|
||||
import { PIPELINE_STEPS, type PipelineStep, type SessionListItem } from './types'
|
||||
@@ -155,7 +155,7 @@ export default function OcrPipelinePage() {
|
||||
3: 'Spalten',
|
||||
4: 'Zeilen',
|
||||
5: 'Woerter',
|
||||
6: 'Koordinaten',
|
||||
6: 'LLM-Korrektur',
|
||||
7: 'Rekonstruktion',
|
||||
8: 'Validierung',
|
||||
}
|
||||
@@ -173,7 +173,7 @@ export default function OcrPipelinePage() {
|
||||
case 4:
|
||||
return <StepWordRecognition sessionId={sessionId} onNext={handleNext} goToStep={goToStep} />
|
||||
case 5:
|
||||
return <StepCoordinates />
|
||||
return <StepLlmReview sessionId={sessionId} onNext={handleNext} />
|
||||
case 6:
|
||||
return <StepReconstruction />
|
||||
case 7:
|
||||
|
||||
@@ -208,7 +208,7 @@ export const PIPELINE_STEPS: PipelineStep[] = [
|
||||
{ id: 'columns', name: 'Spalten', icon: '📊', status: 'pending' },
|
||||
{ id: 'rows', name: 'Zeilen', icon: '📏', status: 'pending' },
|
||||
{ id: 'words', name: 'Woerter', icon: '🔤', status: 'pending' },
|
||||
{ id: 'coordinates', name: 'Koordinaten', icon: '📍', status: 'pending' },
|
||||
{ id: 'llm-review', name: 'LLM-Korrektur', icon: '🤖', status: 'pending' },
|
||||
{ id: 'reconstruction', name: 'Rekonstruktion', icon: '🏗️', status: 'pending' },
|
||||
{ id: 'ground-truth', name: 'Validierung', icon: '✅', status: 'pending' },
|
||||
]
|
||||
|
||||
345
admin-lehrer/components/ocr-pipeline/StepLlmReview.tsx
Normal file
345
admin-lehrer/components/ocr-pipeline/StepLlmReview.tsx
Normal file
@@ -0,0 +1,345 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useState } from 'react'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface LlmChange {
|
||||
row_index: number
|
||||
field: 'english' | 'german' | 'example'
|
||||
old: string
|
||||
new: string
|
||||
}
|
||||
|
||||
interface LlmReviewResult {
|
||||
changes: LlmChange[]
|
||||
model_used: string
|
||||
duration_ms: number
|
||||
total_entries: number
|
||||
corrections_found: number
|
||||
}
|
||||
|
||||
interface StepLlmReviewProps {
|
||||
sessionId: string | null
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
const FIELD_LABELS: Record<string, string> = {
|
||||
english: 'EN',
|
||||
german: 'DE',
|
||||
example: 'Beispiel',
|
||||
}
|
||||
|
||||
export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
|
||||
const [status, setStatus] = useState<'idle' | 'running' | 'done' | 'error' | 'applied'>('idle')
|
||||
const [result, setResult] = useState<LlmReviewResult | null>(null)
|
||||
const [error, setError] = useState<string>('')
|
||||
const [accepted, setAccepted] = useState<Set<number>>(new Set())
|
||||
const [applying, setApplying] = useState(false)
|
||||
|
||||
const runReview = useCallback(async () => {
|
||||
if (!sessionId) return
|
||||
setStatus('running')
|
||||
setError('')
|
||||
setResult(null)
|
||||
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/llm-review`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({}),
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}))
|
||||
throw new Error(data.detail || `HTTP ${res.status}`)
|
||||
}
|
||||
|
||||
const data: LlmReviewResult = await res.json()
|
||||
setResult(data)
|
||||
// Accept all changes by default
|
||||
setAccepted(new Set(data.changes.map((_, i) => i)))
|
||||
setStatus('done')
|
||||
} catch (e: unknown) {
|
||||
const msg = e instanceof Error ? e.message : String(e)
|
||||
setError(msg)
|
||||
setStatus('error')
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
const toggleChange = (index: number) => {
|
||||
setAccepted((prev) => {
|
||||
const next = new Set(prev)
|
||||
if (next.has(index)) next.delete(index)
|
||||
else next.add(index)
|
||||
return next
|
||||
})
|
||||
}
|
||||
|
||||
const toggleAll = () => {
|
||||
if (!result) return
|
||||
if (accepted.size === result.changes.length) {
|
||||
setAccepted(new Set())
|
||||
} else {
|
||||
setAccepted(new Set(result.changes.map((_, i) => i)))
|
||||
}
|
||||
}
|
||||
|
||||
const applyChanges = useCallback(async () => {
|
||||
if (!sessionId || !result) return
|
||||
setApplying(true)
|
||||
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/llm-review/apply`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ accepted_indices: Array.from(accepted) }),
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}))
|
||||
throw new Error(data.detail || `HTTP ${res.status}`)
|
||||
}
|
||||
|
||||
setStatus('applied')
|
||||
} catch (e: unknown) {
|
||||
const msg = e instanceof Error ? e.message : String(e)
|
||||
setError(msg)
|
||||
} finally {
|
||||
setApplying(false)
|
||||
}
|
||||
}, [sessionId, result, accepted])
|
||||
|
||||
if (!sessionId) {
|
||||
return (
|
||||
<div className="text-center py-12 text-gray-400">
|
||||
Bitte zuerst eine Session auswaehlen.
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// --- Idle state ---
|
||||
if (status === 'idle') {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-12 text-center">
|
||||
<div className="text-5xl mb-4">🤖</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Schritt 6: LLM-Korrektur
|
||||
</h3>
|
||||
<p className="text-gray-500 dark:text-gray-400 max-w-lg mb-2">
|
||||
Ein lokales Sprachmodell prueft die OCR-Ergebnisse auf typische Erkennungsfehler
|
||||
(z.B. "8en" statt "Ben") und schlaegt Korrekturen vor.
|
||||
</p>
|
||||
<p className="text-xs text-gray-400 dark:text-gray-500 mb-6">
|
||||
Modell: <span className="font-mono">qwen3:30b-a3b</span> via Ollama (lokal)
|
||||
</p>
|
||||
<button
|
||||
onClick={runReview}
|
||||
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium"
|
||||
>
|
||||
LLM-Korrektur starten
|
||||
</button>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// --- Running state ---
|
||||
if (status === 'running') {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
||||
<div className="animate-spin rounded-full h-10 w-10 border-b-2 border-teal-500 mb-4" />
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-1">
|
||||
Korrektur laeuft...
|
||||
</h3>
|
||||
<p className="text-sm text-gray-400">
|
||||
<span className="font-mono">qwen3:30b-a3b</span> prueft die Vokabeleintraege
|
||||
</p>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// --- Error state ---
|
||||
if (status === 'error') {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-12 text-center">
|
||||
<div className="text-5xl mb-4">⚠️</div>
|
||||
<h3 className="text-lg font-medium text-red-600 dark:text-red-400 mb-2">
|
||||
Fehler bei LLM-Korrektur
|
||||
</h3>
|
||||
<p className="text-sm text-gray-500 dark:text-gray-400 max-w-lg mb-4">
|
||||
{error}
|
||||
</p>
|
||||
<div className="flex gap-3">
|
||||
<button
|
||||
onClick={runReview}
|
||||
className="px-5 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm"
|
||||
>
|
||||
Erneut versuchen
|
||||
</button>
|
||||
<button
|
||||
onClick={onNext}
|
||||
className="px-5 py-2 bg-gray-200 dark:bg-gray-700 text-gray-700 dark:text-gray-300 rounded-lg hover:bg-gray-300 dark:hover:bg-gray-600 transition-colors text-sm"
|
||||
>
|
||||
Ueberspringen →
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// --- Applied state ---
|
||||
if (status === 'applied') {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-12 text-center">
|
||||
<div className="text-5xl mb-4">✅</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Korrekturen uebernommen
|
||||
</h3>
|
||||
<p className="text-sm text-gray-500 dark:text-gray-400 mb-6">
|
||||
{accepted.size} von {result?.changes.length ?? 0} Korrekturen wurden angewendet.
|
||||
</p>
|
||||
<button
|
||||
onClick={onNext}
|
||||
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium"
|
||||
>
|
||||
Weiter →
|
||||
</button>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// --- Done state: show diff table ---
|
||||
const changes = result?.changes ?? []
|
||||
|
||||
if (changes.length === 0) {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-12 text-center">
|
||||
<div className="text-5xl mb-4">👍</div>
|
||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
||||
Keine Korrekturen noetig
|
||||
</h3>
|
||||
<p className="text-sm text-gray-500 dark:text-gray-400 mb-1">
|
||||
Das LLM hat keine OCR-Fehler gefunden.
|
||||
</p>
|
||||
<p className="text-xs text-gray-400 dark:text-gray-500 mb-6">
|
||||
{result?.total_entries} Eintraege geprueft in {result?.duration_ms}ms
|
||||
({result?.model_used})
|
||||
</p>
|
||||
<button
|
||||
onClick={onNext}
|
||||
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium"
|
||||
>
|
||||
Weiter →
|
||||
</button>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Header */}
|
||||
<div className="flex items-center justify-between">
|
||||
<div>
|
||||
<h3 className="text-base font-medium text-gray-700 dark:text-gray-300">
|
||||
LLM-Korrekturvorschlaege
|
||||
</h3>
|
||||
<p className="text-xs text-gray-400 mt-0.5">
|
||||
{changes.length} Korrektur{changes.length !== 1 ? 'en' : ''} gefunden
|
||||
· {result?.duration_ms}ms · {result?.model_used}
|
||||
</p>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<button
|
||||
onClick={toggleAll}
|
||||
className="text-xs px-3 py-1.5 border border-gray-300 dark:border-gray-600 rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 transition-colors text-gray-600 dark:text-gray-400"
|
||||
>
|
||||
{accepted.size === changes.length ? 'Keine' : 'Alle'} auswaehlen
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Diff table */}
|
||||
<div className="border border-gray-200 dark:border-gray-700 rounded-lg overflow-hidden">
|
||||
<table className="w-full text-sm">
|
||||
<thead>
|
||||
<tr className="bg-gray-50 dark:bg-gray-800 border-b border-gray-200 dark:border-gray-700">
|
||||
<th className="w-10 px-3 py-2 text-center">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={accepted.size === changes.length}
|
||||
onChange={toggleAll}
|
||||
className="rounded border-gray-300 dark:border-gray-600"
|
||||
/>
|
||||
</th>
|
||||
<th className="px-3 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">Zeile</th>
|
||||
<th className="px-3 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">Feld</th>
|
||||
<th className="px-3 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">Vorher</th>
|
||||
<th className="px-3 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">Nachher</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{changes.map((change, idx) => (
|
||||
<tr
|
||||
key={idx}
|
||||
className={`border-b border-gray-100 dark:border-gray-700/50 ${
|
||||
accepted.has(idx)
|
||||
? 'bg-teal-50/50 dark:bg-teal-900/10'
|
||||
: 'bg-white dark:bg-gray-800/50'
|
||||
}`}
|
||||
>
|
||||
<td className="px-3 py-2 text-center">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={accepted.has(idx)}
|
||||
onChange={() => toggleChange(idx)}
|
||||
className="rounded border-gray-300 dark:border-gray-600"
|
||||
/>
|
||||
</td>
|
||||
<td className="px-3 py-2 text-gray-500 dark:text-gray-400 font-mono text-xs">
|
||||
R{change.row_index}
|
||||
</td>
|
||||
<td className="px-3 py-2">
|
||||
<span className="text-xs px-1.5 py-0.5 rounded bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-400">
|
||||
{FIELD_LABELS[change.field] || change.field}
|
||||
</span>
|
||||
</td>
|
||||
<td className="px-3 py-2">
|
||||
<span className="line-through text-red-500 dark:text-red-400">
|
||||
{change.old}
|
||||
</span>
|
||||
</td>
|
||||
<td className="px-3 py-2">
|
||||
<span className="text-green-600 dark:text-green-400 font-medium">
|
||||
{change.new}
|
||||
</span>
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
{/* Actions */}
|
||||
<div className="flex items-center justify-between pt-2">
|
||||
<p className="text-xs text-gray-400">
|
||||
{accepted.size} von {changes.length} ausgewaehlt
|
||||
</p>
|
||||
<div className="flex gap-3">
|
||||
<button
|
||||
onClick={onNext}
|
||||
className="px-4 py-2 text-sm border border-gray-300 dark:border-gray-600 rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 transition-colors text-gray-600 dark:text-gray-400"
|
||||
>
|
||||
Alle ablehnen
|
||||
</button>
|
||||
<button
|
||||
onClick={applyChanges}
|
||||
disabled={applying || accepted.size === 0}
|
||||
className="px-5 py-2 text-sm bg-teal-600 text-white rounded-lg hover:bg-teal-700 disabled:opacity-50 disabled:cursor-not-allowed transition-colors font-medium"
|
||||
>
|
||||
{applying ? 'Wird uebernommen...' : `${accepted.size} Korrektur${accepted.size !== 1 ? 'en' : ''} uebernehmen`}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -4304,3 +4304,119 @@ async def run_cv_pipeline(
|
||||
result.duration_seconds = round(time.time() - total_start, 2)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# LLM-based OCR Correction (Step 6)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
import httpx
|
||||
import os
|
||||
import json as _json
|
||||
import re as _re
|
||||
|
||||
_OLLAMA_URL = os.getenv("OLLAMA_URL", os.getenv("OLLAMA_BASE_URL", "http://host.docker.internal:11434"))
|
||||
OLLAMA_REVIEW_MODEL = os.getenv("OLLAMA_REVIEW_MODEL", "qwen3:30b-a3b")
|
||||
|
||||
|
||||
async def llm_review_entries(
|
||||
entries: List[Dict],
|
||||
model: str = None,
|
||||
) -> Dict:
|
||||
"""Send vocab entries to a local LLM for OCR error correction."""
|
||||
model = model or OLLAMA_REVIEW_MODEL
|
||||
|
||||
# Build a compact table representation for the prompt
|
||||
table_lines = []
|
||||
for e in entries:
|
||||
table_lines.append({
|
||||
"row": e.get("row_index", 0),
|
||||
"en": e.get("english", ""),
|
||||
"de": e.get("german", ""),
|
||||
"ex": e.get("example", ""),
|
||||
})
|
||||
|
||||
prompt = f"""Du bist ein Korrekturleser fuer OCR-erkannte Vokabeltabellen (Englisch-Deutsch).
|
||||
Die Tabelle wurde per OCR aus einem Schulbuch-Scan extrahiert. Korrigiere NUR offensichtliche OCR-Fehler.
|
||||
|
||||
Haeufige OCR-Fehler die du korrigieren sollst:
|
||||
- Ziffern statt Buchstaben: 8→B, 0→O, 1→l/I, 5→S, 6→G
|
||||
- Fehlende oder falsche Satzzeichen
|
||||
- Offensichtliche Tippfehler die durch OCR entstanden sind
|
||||
|
||||
WICHTIG:
|
||||
- Aendere NICHTS was korrekt aussieht
|
||||
- Erfinde KEINE neuen Woerter oder Uebersetzungen
|
||||
- Behalte Abkuerzungen wie sth., sb., etc. bei
|
||||
- Behalte die exakte Struktur (gleiche Anzahl Eintraege)
|
||||
|
||||
Antworte NUR mit dem korrigierten JSON-Array. Kein erklaerener Text.
|
||||
Fuer jeden Eintrag den du aenderst, setze "corrected": true.
|
||||
Fuer unveraenderte Eintraege setze "corrected": false.
|
||||
|
||||
Eingabe:
|
||||
{_json.dumps(table_lines, ensure_ascii=False, indent=2)}"""
|
||||
|
||||
t0 = time.time()
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
resp = await client.post(
|
||||
f"{_OLLAMA_URL}/api/chat",
|
||||
json={
|
||||
"model": model,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"stream": False,
|
||||
"options": {"temperature": 0.1, "num_predict": 8192},
|
||||
},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
content = resp.json().get("message", {}).get("content", "")
|
||||
duration_ms = int((time.time() - t0) * 1000)
|
||||
|
||||
# Parse LLM response — extract JSON array
|
||||
corrected = _parse_llm_json_array(content)
|
||||
|
||||
# Build diff: compare original vs corrected
|
||||
changes = []
|
||||
entries_corrected = []
|
||||
for i, orig in enumerate(entries):
|
||||
if i < len(corrected):
|
||||
c = corrected[i]
|
||||
entry = dict(orig)
|
||||
for field_name, key in [("english", "en"), ("german", "de"), ("example", "ex")]:
|
||||
new_val = c.get(key, "").strip()
|
||||
old_val = (orig.get(field_name, "") or "").strip()
|
||||
if new_val and new_val != old_val:
|
||||
changes.append({
|
||||
"row_index": orig.get("row_index", i),
|
||||
"field": field_name,
|
||||
"old": old_val,
|
||||
"new": new_val,
|
||||
})
|
||||
entry[field_name] = new_val
|
||||
entry["llm_corrected"] = True
|
||||
entries_corrected.append(entry)
|
||||
else:
|
||||
entries_corrected.append(dict(orig))
|
||||
|
||||
return {
|
||||
"entries_original": entries,
|
||||
"entries_corrected": entries_corrected,
|
||||
"changes": changes,
|
||||
"model_used": model,
|
||||
"duration_ms": duration_ms,
|
||||
}
|
||||
|
||||
|
||||
def _parse_llm_json_array(text: str) -> List[Dict]:
|
||||
"""Extract JSON array from LLM response (may contain markdown fences)."""
|
||||
# Strip markdown code fences
|
||||
text = _re.sub(r'```json\s*', '', text)
|
||||
text = _re.sub(r'```\s*', '', text)
|
||||
# Find array
|
||||
match = _re.search(r'\[.*\]', text, _re.DOTALL)
|
||||
if match:
|
||||
try:
|
||||
return _json.loads(match.group())
|
||||
except (ValueError, _json.JSONDecodeError):
|
||||
pass
|
||||
return []
|
||||
|
||||
@@ -7,7 +7,7 @@ Zerlegt den OCR-Prozess in 8 einzelne Schritte:
|
||||
3. Spaltenerkennung - Unsichtbare Spalten finden
|
||||
4. Zeilenerkennung - Horizontale Zeilen + Kopf-/Fusszeilen
|
||||
5. Worterkennung - OCR mit Bounding Boxes
|
||||
6. Koordinatenzuweisung - Exakte Positionen
|
||||
6. LLM-Korrektur - OCR-Fehler per LLM korrigieren
|
||||
7. Seitenrekonstruktion - Seite nachbauen
|
||||
8. Ground Truth Validierung - Gesamtpruefung
|
||||
|
||||
@@ -30,6 +30,7 @@ from fastapi.responses import Response, StreamingResponse
|
||||
from pydantic import BaseModel
|
||||
|
||||
from cv_vocab_pipeline import (
|
||||
OLLAMA_REVIEW_MODEL,
|
||||
PageRegion,
|
||||
RowGeometry,
|
||||
_cells_to_vocab_entries,
|
||||
@@ -49,6 +50,7 @@ from cv_vocab_pipeline import (
|
||||
detect_row_geometry,
|
||||
dewarp_image,
|
||||
dewarp_image_manual,
|
||||
llm_review_entries,
|
||||
render_image_high_res,
|
||||
render_pdf_high_res,
|
||||
)
|
||||
@@ -1387,6 +1389,124 @@ async def get_word_ground_truth(session_id: str):
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# LLM Review Endpoints (Step 6)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/llm-review")
|
||||
async def run_llm_review(session_id: str, request: Request):
|
||||
"""Run LLM-based correction on vocab entries from Step 5."""
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
|
||||
word_result = session.get("word_result")
|
||||
if not word_result:
|
||||
raise HTTPException(status_code=400, detail="No word result found — run Step 5 first")
|
||||
|
||||
entries = word_result.get("vocab_entries") or word_result.get("entries") or []
|
||||
if not entries:
|
||||
raise HTTPException(status_code=400, detail="No vocab entries found — run Step 5 first")
|
||||
|
||||
# Optional model override from request body
|
||||
body = {}
|
||||
try:
|
||||
body = await request.json()
|
||||
except Exception:
|
||||
pass
|
||||
model = body.get("model") or OLLAMA_REVIEW_MODEL
|
||||
|
||||
try:
|
||||
result = await llm_review_entries(entries, model=model)
|
||||
except Exception as e:
|
||||
logger.error(f"LLM review failed for session {session_id}: {e}")
|
||||
raise HTTPException(status_code=502, detail=f"LLM review failed: {e}")
|
||||
|
||||
# Store result inside word_result as a sub-key
|
||||
word_result["llm_review"] = {
|
||||
"changes": result["changes"],
|
||||
"model_used": result["model_used"],
|
||||
"duration_ms": result["duration_ms"],
|
||||
"entries_corrected": result["entries_corrected"],
|
||||
}
|
||||
await update_session_db(session_id, word_result=word_result, current_step=6)
|
||||
|
||||
if session_id in _cache:
|
||||
_cache[session_id]["word_result"] = word_result
|
||||
|
||||
logger.info(f"LLM review session {session_id}: {len(result['changes'])} changes, "
|
||||
f"{result['duration_ms']}ms, model={result['model_used']}")
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"changes": result["changes"],
|
||||
"model_used": result["model_used"],
|
||||
"duration_ms": result["duration_ms"],
|
||||
"total_entries": len(entries),
|
||||
"corrections_found": len(result["changes"]),
|
||||
}
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/llm-review/apply")
|
||||
async def apply_llm_corrections(session_id: str, request: Request):
|
||||
"""Apply selected LLM corrections to vocab entries."""
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
|
||||
word_result = session.get("word_result")
|
||||
if not word_result:
|
||||
raise HTTPException(status_code=400, detail="No word result found")
|
||||
|
||||
llm_review = word_result.get("llm_review")
|
||||
if not llm_review:
|
||||
raise HTTPException(status_code=400, detail="No LLM review found — run /llm-review first")
|
||||
|
||||
body = await request.json()
|
||||
accepted_indices = set(body.get("accepted_indices", [])) # indices into changes[]
|
||||
|
||||
changes = llm_review.get("changes", [])
|
||||
entries = word_result.get("vocab_entries") or word_result.get("entries") or []
|
||||
|
||||
# Build a lookup: (row_index, field) -> new_value for accepted changes
|
||||
corrections = {}
|
||||
applied_count = 0
|
||||
for idx, change in enumerate(changes):
|
||||
if idx in accepted_indices:
|
||||
key = (change["row_index"], change["field"])
|
||||
corrections[key] = change["new"]
|
||||
applied_count += 1
|
||||
|
||||
# Apply corrections to entries
|
||||
for entry in entries:
|
||||
row_idx = entry.get("row_index", -1)
|
||||
for field_name in ("english", "german", "example"):
|
||||
key = (row_idx, field_name)
|
||||
if key in corrections:
|
||||
entry[field_name] = corrections[key]
|
||||
entry["llm_corrected"] = True
|
||||
|
||||
# Update word_result
|
||||
word_result["vocab_entries"] = entries
|
||||
word_result["entries"] = entries
|
||||
word_result["llm_review"]["applied_count"] = applied_count
|
||||
word_result["llm_review"]["applied_at"] = datetime.utcnow().isoformat()
|
||||
|
||||
await update_session_db(session_id, word_result=word_result)
|
||||
|
||||
if session_id in _cache:
|
||||
_cache[session_id]["word_result"] = word_result
|
||||
|
||||
logger.info(f"Applied {applied_count}/{len(changes)} LLM corrections for session {session_id}")
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"applied_count": applied_count,
|
||||
"total_changes": len(changes),
|
||||
}
|
||||
|
||||
|
||||
async def _get_rows_overlay(session_id: str) -> Response:
|
||||
"""Generate dewarped image with row bands drawn on it."""
|
||||
session = await get_session_db(session_id)
|
||||
|
||||
Reference in New Issue
Block a user