feat(ocr-pipeline): add SSE streaming and phonetic filter to LLM review
- Stream LLM review results batch-by-batch (8 entries per batch) via SSE - Frontend shows live progress bar, batch log, and corrections appearing - Skip entries with IPA phonetic transcriptions (already dictionary-corrected) - Refactor llm_review_entries into reusable helpers for both streaming and non-streaming paths Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
'use client'
|
'use client'
|
||||||
|
|
||||||
import { useCallback, useState } from 'react'
|
import { useCallback, useRef, useState } from 'react'
|
||||||
|
|
||||||
const KLAUSUR_API = '/klausur-api'
|
const KLAUSUR_API = '/klausur-api'
|
||||||
|
|
||||||
@@ -11,19 +11,23 @@ interface LlmChange {
|
|||||||
new: string
|
new: string
|
||||||
}
|
}
|
||||||
|
|
||||||
interface LlmReviewResult {
|
|
||||||
changes: LlmChange[]
|
|
||||||
model_used: string
|
|
||||||
duration_ms: number
|
|
||||||
total_entries: number
|
|
||||||
corrections_found: number
|
|
||||||
}
|
|
||||||
|
|
||||||
interface StepLlmReviewProps {
|
interface StepLlmReviewProps {
|
||||||
sessionId: string | null
|
sessionId: string | null
|
||||||
onNext: () => void
|
onNext: () => void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface ReviewMeta {
|
||||||
|
total_entries: number
|
||||||
|
to_review: number
|
||||||
|
skipped: number
|
||||||
|
model: string
|
||||||
|
}
|
||||||
|
|
||||||
|
interface StreamProgress {
|
||||||
|
current: number
|
||||||
|
total: number
|
||||||
|
}
|
||||||
|
|
||||||
const FIELD_LABELS: Record<string, string> = {
|
const FIELD_LABELS: Record<string, string> = {
|
||||||
english: 'EN',
|
english: 'EN',
|
||||||
german: 'DE',
|
german: 'DE',
|
||||||
@@ -32,34 +36,96 @@ const FIELD_LABELS: Record<string, string> = {
|
|||||||
|
|
||||||
export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
|
export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
|
||||||
const [status, setStatus] = useState<'idle' | 'running' | 'done' | 'error' | 'applied'>('idle')
|
const [status, setStatus] = useState<'idle' | 'running' | 'done' | 'error' | 'applied'>('idle')
|
||||||
const [result, setResult] = useState<LlmReviewResult | null>(null)
|
const [meta, setMeta] = useState<ReviewMeta | null>(null)
|
||||||
const [error, setError] = useState<string>('')
|
const [changes, setChanges] = useState<LlmChange[]>([])
|
||||||
|
const [progress, setProgress] = useState<StreamProgress | null>(null)
|
||||||
|
const [batchLog, setBatchLog] = useState<string[]>([])
|
||||||
|
const [totalDuration, setTotalDuration] = useState(0)
|
||||||
|
const [error, setError] = useState('')
|
||||||
const [accepted, setAccepted] = useState<Set<number>>(new Set())
|
const [accepted, setAccepted] = useState<Set<number>>(new Set())
|
||||||
const [applying, setApplying] = useState(false)
|
const [applying, setApplying] = useState(false)
|
||||||
|
const tableEndRef = useRef<HTMLDivElement>(null)
|
||||||
|
|
||||||
const runReview = useCallback(async () => {
|
const runReview = useCallback(async () => {
|
||||||
if (!sessionId) return
|
if (!sessionId) return
|
||||||
setStatus('running')
|
setStatus('running')
|
||||||
setError('')
|
setError('')
|
||||||
setResult(null)
|
setChanges([])
|
||||||
|
setBatchLog([])
|
||||||
|
setProgress(null)
|
||||||
|
setMeta(null)
|
||||||
|
setTotalDuration(0)
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/llm-review`, {
|
const res = await fetch(
|
||||||
method: 'POST',
|
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/llm-review?stream=true`,
|
||||||
headers: { 'Content-Type': 'application/json' },
|
{ method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({}) },
|
||||||
body: JSON.stringify({}),
|
)
|
||||||
})
|
|
||||||
|
|
||||||
if (!res.ok) {
|
if (!res.ok) {
|
||||||
const data = await res.json().catch(() => ({}))
|
const data = await res.json().catch(() => ({}))
|
||||||
throw new Error(data.detail || `HTTP ${res.status}`)
|
throw new Error(data.detail || `HTTP ${res.status}`)
|
||||||
}
|
}
|
||||||
|
|
||||||
const data: LlmReviewResult = await res.json()
|
const reader = res.body!.getReader()
|
||||||
setResult(data)
|
const decoder = new TextDecoder()
|
||||||
// Accept all changes by default
|
let buffer = ''
|
||||||
setAccepted(new Set(data.changes.map((_, i) => i)))
|
let allChanges: LlmChange[] = []
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
const { done, value } = await reader.read()
|
||||||
|
if (done) break
|
||||||
|
buffer += decoder.decode(value, { stream: true })
|
||||||
|
|
||||||
|
while (buffer.includes('\n\n')) {
|
||||||
|
const idx = buffer.indexOf('\n\n')
|
||||||
|
const chunk = buffer.slice(0, idx).trim()
|
||||||
|
buffer = buffer.slice(idx + 2)
|
||||||
|
|
||||||
|
if (!chunk.startsWith('data: ')) continue
|
||||||
|
const dataStr = chunk.slice(6)
|
||||||
|
|
||||||
|
let event: any
|
||||||
|
try { event = JSON.parse(dataStr) } catch { continue }
|
||||||
|
|
||||||
|
if (event.type === 'meta') {
|
||||||
|
setMeta({
|
||||||
|
total_entries: event.total_entries,
|
||||||
|
to_review: event.to_review,
|
||||||
|
skipped: event.skipped,
|
||||||
|
model: event.model,
|
||||||
|
})
|
||||||
|
setBatchLog([`${event.total_entries} Eintraege, ${event.skipped} uebersprungen (Lautschrift), ${event.to_review} zu pruefen`])
|
||||||
|
}
|
||||||
|
|
||||||
|
if (event.type === 'batch') {
|
||||||
|
const batchChanges: LlmChange[] = event.changes || []
|
||||||
|
allChanges = [...allChanges, ...batchChanges]
|
||||||
|
setChanges(allChanges)
|
||||||
|
setProgress(event.progress)
|
||||||
|
const rows = (event.entries_reviewed || []).map((r: number) => `R${r}`).join(', ')
|
||||||
|
setBatchLog(prev => [...prev,
|
||||||
|
`Batch ${event.batch_index + 1}: ${rows} — ${batchChanges.length} Korrektur${batchChanges.length !== 1 ? 'en' : ''} (${event.duration_ms}ms)`
|
||||||
|
])
|
||||||
|
setTimeout(() => tableEndRef.current?.scrollIntoView({ behavior: 'smooth', block: 'nearest' }), 16)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (event.type === 'complete') {
|
||||||
|
setTotalDuration(event.duration_ms)
|
||||||
|
setAccepted(new Set(allChanges.map((_, i) => i)))
|
||||||
setStatus('done')
|
setStatus('done')
|
||||||
|
}
|
||||||
|
|
||||||
|
if (event.type === 'error') {
|
||||||
|
throw new Error(event.detail || 'Unbekannter Fehler')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no complete event was received (e.g. 0 entries to review)
|
||||||
|
if (allChanges.length === 0 && status !== 'done') {
|
||||||
|
setStatus('done')
|
||||||
|
}
|
||||||
} catch (e: unknown) {
|
} catch (e: unknown) {
|
||||||
const msg = e instanceof Error ? e.message : String(e)
|
const msg = e instanceof Error ? e.message : String(e)
|
||||||
setError(msg)
|
setError(msg)
|
||||||
@@ -68,7 +134,7 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
|
|||||||
}, [sessionId])
|
}, [sessionId])
|
||||||
|
|
||||||
const toggleChange = (index: number) => {
|
const toggleChange = (index: number) => {
|
||||||
setAccepted((prev) => {
|
setAccepted(prev => {
|
||||||
const next = new Set(prev)
|
const next = new Set(prev)
|
||||||
if (next.has(index)) next.delete(index)
|
if (next.has(index)) next.delete(index)
|
||||||
else next.add(index)
|
else next.add(index)
|
||||||
@@ -77,48 +143,39 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const toggleAll = () => {
|
const toggleAll = () => {
|
||||||
if (!result) return
|
if (accepted.size === changes.length) {
|
||||||
if (accepted.size === result.changes.length) {
|
|
||||||
setAccepted(new Set())
|
setAccepted(new Set())
|
||||||
} else {
|
} else {
|
||||||
setAccepted(new Set(result.changes.map((_, i) => i)))
|
setAccepted(new Set(changes.map((_, i) => i)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const applyChanges = useCallback(async () => {
|
const applyChanges = useCallback(async () => {
|
||||||
if (!sessionId || !result) return
|
if (!sessionId) return
|
||||||
setApplying(true)
|
setApplying(true)
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/llm-review/apply`, {
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/llm-review/apply`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json' },
|
||||||
body: JSON.stringify({ accepted_indices: Array.from(accepted) }),
|
body: JSON.stringify({ accepted_indices: Array.from(accepted) }),
|
||||||
})
|
})
|
||||||
|
|
||||||
if (!res.ok) {
|
if (!res.ok) {
|
||||||
const data = await res.json().catch(() => ({}))
|
const data = await res.json().catch(() => ({}))
|
||||||
throw new Error(data.detail || `HTTP ${res.status}`)
|
throw new Error(data.detail || `HTTP ${res.status}`)
|
||||||
}
|
}
|
||||||
|
|
||||||
setStatus('applied')
|
setStatus('applied')
|
||||||
} catch (e: unknown) {
|
} catch (e: unknown) {
|
||||||
const msg = e instanceof Error ? e.message : String(e)
|
setError(e instanceof Error ? e.message : String(e))
|
||||||
setError(msg)
|
|
||||||
} finally {
|
} finally {
|
||||||
setApplying(false)
|
setApplying(false)
|
||||||
}
|
}
|
||||||
}, [sessionId, result, accepted])
|
}, [sessionId, accepted])
|
||||||
|
|
||||||
if (!sessionId) {
|
if (!sessionId) {
|
||||||
return (
|
return <div className="text-center py-12 text-gray-400">Bitte zuerst eine Session auswaehlen.</div>
|
||||||
<div className="text-center py-12 text-gray-400">
|
|
||||||
Bitte zuerst eine Session auswaehlen.
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Idle state ---
|
// --- Idle ---
|
||||||
if (status === 'idle') {
|
if (status === 'idle') {
|
||||||
return (
|
return (
|
||||||
<div className="flex flex-col items-center justify-center py-12 text-center">
|
<div className="flex flex-col items-center justify-center py-12 text-center">
|
||||||
@@ -127,59 +184,104 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
|
|||||||
Schritt 6: LLM-Korrektur
|
Schritt 6: LLM-Korrektur
|
||||||
</h3>
|
</h3>
|
||||||
<p className="text-gray-500 dark:text-gray-400 max-w-lg mb-2">
|
<p className="text-gray-500 dark:text-gray-400 max-w-lg mb-2">
|
||||||
Ein lokales Sprachmodell prueft die OCR-Ergebnisse auf typische Erkennungsfehler
|
Ein lokales Sprachmodell prueft die OCR-Ergebnisse auf typische Erkennungsfehler.
|
||||||
(z.B. "8en" statt "Ben") und schlaegt Korrekturen vor.
|
Eintraege mit Lautschrift werden automatisch uebersprungen.
|
||||||
</p>
|
</p>
|
||||||
<p className="text-xs text-gray-400 dark:text-gray-500 mb-6">
|
<p className="text-xs text-gray-400 dark:text-gray-500 mb-6">
|
||||||
Modell: <span className="font-mono">qwen3:30b-a3b</span> via Ollama (lokal)
|
Modell: <span className="font-mono">qwen3:30b-a3b</span> via Ollama (lokal)
|
||||||
</p>
|
</p>
|
||||||
<button
|
<button onClick={runReview}
|
||||||
onClick={runReview}
|
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium">
|
||||||
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium"
|
|
||||||
>
|
|
||||||
LLM-Korrektur starten
|
LLM-Korrektur starten
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Running state ---
|
// --- Running (with live progress) ---
|
||||||
if (status === 'running') {
|
if (status === 'running') {
|
||||||
|
const pct = progress ? Math.round((progress.current / progress.total) * 100) : 0
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="flex flex-col items-center justify-center py-16 text-center">
|
<div className="space-y-4">
|
||||||
<div className="animate-spin rounded-full h-10 w-10 border-b-2 border-teal-500 mb-4" />
|
<div className="flex items-center gap-3">
|
||||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-1">
|
<div className="animate-spin rounded-full h-5 w-5 border-b-2 border-teal-500" />
|
||||||
Korrektur laeuft...
|
<h3 className="text-base font-medium text-gray-700 dark:text-gray-300">
|
||||||
|
LLM-Korrektur laeuft...
|
||||||
</h3>
|
</h3>
|
||||||
<p className="text-sm text-gray-400">
|
{meta && (
|
||||||
<span className="font-mono">qwen3:30b-a3b</span> prueft die Vokabeleintraege
|
<span className="text-xs text-gray-400 font-mono">{meta.model}</span>
|
||||||
</p>
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Progress bar */}
|
||||||
|
{progress && (
|
||||||
|
<div className="space-y-1">
|
||||||
|
<div className="flex justify-between text-xs text-gray-400">
|
||||||
|
<span>{progress.current} / {progress.total} Eintraege geprueft</span>
|
||||||
|
<span>{pct}%</span>
|
||||||
|
</div>
|
||||||
|
<div className="w-full bg-gray-200 dark:bg-gray-700 rounded-full h-2">
|
||||||
|
<div className="bg-teal-500 h-2 rounded-full transition-all duration-500" style={{ width: `${pct}%` }} />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Live batch log */}
|
||||||
|
<div className="bg-gray-50 dark:bg-gray-800/50 rounded-lg p-3 max-h-40 overflow-y-auto">
|
||||||
|
{batchLog.map((line, i) => (
|
||||||
|
<div key={i} className="text-xs text-gray-500 dark:text-gray-400 font-mono py-0.5">{line}</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Live changes appearing */}
|
||||||
|
{changes.length > 0 && (
|
||||||
|
<div className="border border-gray-200 dark:border-gray-700 rounded-lg overflow-hidden">
|
||||||
|
<table className="w-full text-sm">
|
||||||
|
<thead>
|
||||||
|
<tr className="bg-gray-50 dark:bg-gray-800 border-b border-gray-200 dark:border-gray-700">
|
||||||
|
<th className="px-3 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">Zeile</th>
|
||||||
|
<th className="px-3 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">Feld</th>
|
||||||
|
<th className="px-3 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">Vorher</th>
|
||||||
|
<th className="px-3 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">Nachher</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{changes.map((change, idx) => (
|
||||||
|
<tr key={idx} className="border-b border-gray-100 dark:border-gray-700/50 bg-teal-50/50 dark:bg-teal-900/10">
|
||||||
|
<td className="px-3 py-1.5 text-gray-500 dark:text-gray-400 font-mono text-xs">R{change.row_index}</td>
|
||||||
|
<td className="px-3 py-1.5">
|
||||||
|
<span className="text-xs px-1.5 py-0.5 rounded bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-400">
|
||||||
|
{FIELD_LABELS[change.field] || change.field}
|
||||||
|
</span>
|
||||||
|
</td>
|
||||||
|
<td className="px-3 py-1.5"><span className="line-through text-red-500 dark:text-red-400">{change.old}</span></td>
|
||||||
|
<td className="px-3 py-1.5"><span className="text-green-600 dark:text-green-400 font-medium">{change.new}</span></td>
|
||||||
|
</tr>
|
||||||
|
))}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
<div ref={tableEndRef} />
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Error state ---
|
// --- Error ---
|
||||||
if (status === 'error') {
|
if (status === 'error') {
|
||||||
return (
|
return (
|
||||||
<div className="flex flex-col items-center justify-center py-12 text-center">
|
<div className="flex flex-col items-center justify-center py-12 text-center">
|
||||||
<div className="text-5xl mb-4">⚠️</div>
|
<div className="text-5xl mb-4">⚠️</div>
|
||||||
<h3 className="text-lg font-medium text-red-600 dark:text-red-400 mb-2">
|
<h3 className="text-lg font-medium text-red-600 dark:text-red-400 mb-2">Fehler bei LLM-Korrektur</h3>
|
||||||
Fehler bei LLM-Korrektur
|
<p className="text-sm text-gray-500 dark:text-gray-400 max-w-lg mb-4">{error}</p>
|
||||||
</h3>
|
|
||||||
<p className="text-sm text-gray-500 dark:text-gray-400 max-w-lg mb-4">
|
|
||||||
{error}
|
|
||||||
</p>
|
|
||||||
<div className="flex gap-3">
|
<div className="flex gap-3">
|
||||||
<button
|
<button onClick={runReview}
|
||||||
onClick={runReview}
|
className="px-5 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm">
|
||||||
className="px-5 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm"
|
|
||||||
>
|
|
||||||
Erneut versuchen
|
Erneut versuchen
|
||||||
</button>
|
</button>
|
||||||
<button
|
<button onClick={onNext}
|
||||||
onClick={onNext}
|
className="px-5 py-2 bg-gray-200 dark:bg-gray-700 text-gray-700 dark:text-gray-300 rounded-lg hover:bg-gray-300 dark:hover:bg-gray-600 transition-colors text-sm">
|
||||||
className="px-5 py-2 bg-gray-200 dark:bg-gray-700 text-gray-700 dark:text-gray-300 rounded-lg hover:bg-gray-300 dark:hover:bg-gray-600 transition-colors text-sm"
|
|
||||||
>
|
|
||||||
Ueberspringen →
|
Ueberspringen →
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
@@ -187,48 +289,37 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Applied state ---
|
// --- Applied ---
|
||||||
if (status === 'applied') {
|
if (status === 'applied') {
|
||||||
return (
|
return (
|
||||||
<div className="flex flex-col items-center justify-center py-12 text-center">
|
<div className="flex flex-col items-center justify-center py-12 text-center">
|
||||||
<div className="text-5xl mb-4">✅</div>
|
<div className="text-5xl mb-4">✅</div>
|
||||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">Korrekturen uebernommen</h3>
|
||||||
Korrekturen uebernommen
|
|
||||||
</h3>
|
|
||||||
<p className="text-sm text-gray-500 dark:text-gray-400 mb-6">
|
<p className="text-sm text-gray-500 dark:text-gray-400 mb-6">
|
||||||
{accepted.size} von {result?.changes.length ?? 0} Korrekturen wurden angewendet.
|
{accepted.size} von {changes.length} Korrekturen wurden angewendet.
|
||||||
</p>
|
</p>
|
||||||
<button
|
<button onClick={onNext}
|
||||||
onClick={onNext}
|
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium">
|
||||||
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium"
|
|
||||||
>
|
|
||||||
Weiter →
|
Weiter →
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Done state: show diff table ---
|
// --- Done: diff table with checkboxes ---
|
||||||
const changes = result?.changes ?? []
|
|
||||||
|
|
||||||
if (changes.length === 0) {
|
if (changes.length === 0) {
|
||||||
return (
|
return (
|
||||||
<div className="flex flex-col items-center justify-center py-12 text-center">
|
<div className="flex flex-col items-center justify-center py-12 text-center">
|
||||||
<div className="text-5xl mb-4">👍</div>
|
<div className="text-5xl mb-4">👍</div>
|
||||||
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">
|
<h3 className="text-lg font-medium text-gray-700 dark:text-gray-300 mb-2">Keine Korrekturen noetig</h3>
|
||||||
Keine Korrekturen noetig
|
<p className="text-sm text-gray-500 dark:text-gray-400 mb-1">Das LLM hat keine OCR-Fehler gefunden.</p>
|
||||||
</h3>
|
{meta && (
|
||||||
<p className="text-sm text-gray-500 dark:text-gray-400 mb-1">
|
|
||||||
Das LLM hat keine OCR-Fehler gefunden.
|
|
||||||
</p>
|
|
||||||
<p className="text-xs text-gray-400 dark:text-gray-500 mb-6">
|
<p className="text-xs text-gray-400 dark:text-gray-500 mb-6">
|
||||||
{result?.total_entries} Eintraege geprueft in {result?.duration_ms}ms
|
{meta.to_review} geprueft, {meta.skipped} uebersprungen · {totalDuration}ms · {meta.model}
|
||||||
({result?.model_used})
|
|
||||||
</p>
|
</p>
|
||||||
<button
|
)}
|
||||||
onClick={onNext}
|
<button onClick={onNext}
|
||||||
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium"
|
className="px-6 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors font-medium">
|
||||||
>
|
|
||||||
Weiter →
|
Weiter →
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
@@ -240,23 +331,18 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
|
|||||||
{/* Header */}
|
{/* Header */}
|
||||||
<div className="flex items-center justify-between">
|
<div className="flex items-center justify-between">
|
||||||
<div>
|
<div>
|
||||||
<h3 className="text-base font-medium text-gray-700 dark:text-gray-300">
|
<h3 className="text-base font-medium text-gray-700 dark:text-gray-300">LLM-Korrekturvorschlaege</h3>
|
||||||
LLM-Korrekturvorschlaege
|
|
||||||
</h3>
|
|
||||||
<p className="text-xs text-gray-400 mt-0.5">
|
<p className="text-xs text-gray-400 mt-0.5">
|
||||||
{changes.length} Korrektur{changes.length !== 1 ? 'en' : ''} gefunden
|
{changes.length} Korrektur{changes.length !== 1 ? 'en' : ''} gefunden
|
||||||
· {result?.duration_ms}ms · {result?.model_used}
|
{meta && <> · {meta.skipped} uebersprungen (Lautschrift)</>}
|
||||||
|
{' '}· {totalDuration}ms · {meta?.model}
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
<div className="flex items-center gap-2">
|
<button onClick={toggleAll}
|
||||||
<button
|
className="text-xs px-3 py-1.5 border border-gray-300 dark:border-gray-600 rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 transition-colors text-gray-600 dark:text-gray-400">
|
||||||
onClick={toggleAll}
|
|
||||||
className="text-xs px-3 py-1.5 border border-gray-300 dark:border-gray-600 rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 transition-colors text-gray-600 dark:text-gray-400"
|
|
||||||
>
|
|
||||||
{accepted.size === changes.length ? 'Keine' : 'Alle'} auswaehlen
|
{accepted.size === changes.length ? 'Keine' : 'Alle'} auswaehlen
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
|
||||||
|
|
||||||
{/* Diff table */}
|
{/* Diff table */}
|
||||||
<div className="border border-gray-200 dark:border-gray-700 rounded-lg overflow-hidden">
|
<div className="border border-gray-200 dark:border-gray-700 rounded-lg overflow-hidden">
|
||||||
@@ -264,12 +350,8 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
|
|||||||
<thead>
|
<thead>
|
||||||
<tr className="bg-gray-50 dark:bg-gray-800 border-b border-gray-200 dark:border-gray-700">
|
<tr className="bg-gray-50 dark:bg-gray-800 border-b border-gray-200 dark:border-gray-700">
|
||||||
<th className="w-10 px-3 py-2 text-center">
|
<th className="w-10 px-3 py-2 text-center">
|
||||||
<input
|
<input type="checkbox" checked={accepted.size === changes.length} onChange={toggleAll}
|
||||||
type="checkbox"
|
className="rounded border-gray-300 dark:border-gray-600" />
|
||||||
checked={accepted.size === changes.length}
|
|
||||||
onChange={toggleAll}
|
|
||||||
className="rounded border-gray-300 dark:border-gray-600"
|
|
||||||
/>
|
|
||||||
</th>
|
</th>
|
||||||
<th className="px-3 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">Zeile</th>
|
<th className="px-3 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">Zeile</th>
|
||||||
<th className="px-3 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">Feld</th>
|
<th className="px-3 py-2 text-left text-gray-500 dark:text-gray-400 font-medium">Feld</th>
|
||||||
@@ -279,40 +361,21 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
|
|||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
{changes.map((change, idx) => (
|
{changes.map((change, idx) => (
|
||||||
<tr
|
<tr key={idx} className={`border-b border-gray-100 dark:border-gray-700/50 ${
|
||||||
key={idx}
|
accepted.has(idx) ? 'bg-teal-50/50 dark:bg-teal-900/10' : 'bg-white dark:bg-gray-800/50'
|
||||||
className={`border-b border-gray-100 dark:border-gray-700/50 ${
|
}`}>
|
||||||
accepted.has(idx)
|
|
||||||
? 'bg-teal-50/50 dark:bg-teal-900/10'
|
|
||||||
: 'bg-white dark:bg-gray-800/50'
|
|
||||||
}`}
|
|
||||||
>
|
|
||||||
<td className="px-3 py-2 text-center">
|
<td className="px-3 py-2 text-center">
|
||||||
<input
|
<input type="checkbox" checked={accepted.has(idx)} onChange={() => toggleChange(idx)}
|
||||||
type="checkbox"
|
className="rounded border-gray-300 dark:border-gray-600" />
|
||||||
checked={accepted.has(idx)}
|
|
||||||
onChange={() => toggleChange(idx)}
|
|
||||||
className="rounded border-gray-300 dark:border-gray-600"
|
|
||||||
/>
|
|
||||||
</td>
|
|
||||||
<td className="px-3 py-2 text-gray-500 dark:text-gray-400 font-mono text-xs">
|
|
||||||
R{change.row_index}
|
|
||||||
</td>
|
</td>
|
||||||
|
<td className="px-3 py-2 text-gray-500 dark:text-gray-400 font-mono text-xs">R{change.row_index}</td>
|
||||||
<td className="px-3 py-2">
|
<td className="px-3 py-2">
|
||||||
<span className="text-xs px-1.5 py-0.5 rounded bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-400">
|
<span className="text-xs px-1.5 py-0.5 rounded bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-400">
|
||||||
{FIELD_LABELS[change.field] || change.field}
|
{FIELD_LABELS[change.field] || change.field}
|
||||||
</span>
|
</span>
|
||||||
</td>
|
</td>
|
||||||
<td className="px-3 py-2">
|
<td className="px-3 py-2"><span className="line-through text-red-500 dark:text-red-400">{change.old}</span></td>
|
||||||
<span className="line-through text-red-500 dark:text-red-400">
|
<td className="px-3 py-2"><span className="text-green-600 dark:text-green-400 font-medium">{change.new}</span></td>
|
||||||
{change.old}
|
|
||||||
</span>
|
|
||||||
</td>
|
|
||||||
<td className="px-3 py-2">
|
|
||||||
<span className="text-green-600 dark:text-green-400 font-medium">
|
|
||||||
{change.new}
|
|
||||||
</span>
|
|
||||||
</td>
|
|
||||||
</tr>
|
</tr>
|
||||||
))}
|
))}
|
||||||
</tbody>
|
</tbody>
|
||||||
@@ -321,21 +384,14 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
|
|||||||
|
|
||||||
{/* Actions */}
|
{/* Actions */}
|
||||||
<div className="flex items-center justify-between pt-2">
|
<div className="flex items-center justify-between pt-2">
|
||||||
<p className="text-xs text-gray-400">
|
<p className="text-xs text-gray-400">{accepted.size} von {changes.length} ausgewaehlt</p>
|
||||||
{accepted.size} von {changes.length} ausgewaehlt
|
|
||||||
</p>
|
|
||||||
<div className="flex gap-3">
|
<div className="flex gap-3">
|
||||||
<button
|
<button onClick={onNext}
|
||||||
onClick={onNext}
|
className="px-4 py-2 text-sm border border-gray-300 dark:border-gray-600 rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 transition-colors text-gray-600 dark:text-gray-400">
|
||||||
className="px-4 py-2 text-sm border border-gray-300 dark:border-gray-600 rounded-lg hover:bg-gray-50 dark:hover:bg-gray-700 transition-colors text-gray-600 dark:text-gray-400"
|
|
||||||
>
|
|
||||||
Alle ablehnen
|
Alle ablehnen
|
||||||
</button>
|
</button>
|
||||||
<button
|
<button onClick={applyChanges} disabled={applying || accepted.size === 0}
|
||||||
onClick={applyChanges}
|
className="px-5 py-2 text-sm bg-teal-600 text-white rounded-lg hover:bg-teal-700 disabled:opacity-50 disabled:cursor-not-allowed transition-colors font-medium">
|
||||||
disabled={applying || accepted.size === 0}
|
|
||||||
className="px-5 py-2 text-sm bg-teal-600 text-white rounded-lg hover:bg-teal-700 disabled:opacity-50 disabled:cursor-not-allowed transition-colors font-medium"
|
|
||||||
>
|
|
||||||
{applying ? 'Wird uebernommen...' : `${accepted.size} Korrektur${accepted.size !== 1 ? 'en' : ''} uebernehmen`}
|
{applying ? 'Wird uebernommen...' : `${accepted.size} Korrektur${accepted.size !== 1 ? 'en' : ''} uebernehmen`}
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -4318,25 +4318,30 @@ import re as _re
|
|||||||
_OLLAMA_URL = os.getenv("OLLAMA_URL", os.getenv("OLLAMA_BASE_URL", "http://host.docker.internal:11434"))
|
_OLLAMA_URL = os.getenv("OLLAMA_URL", os.getenv("OLLAMA_BASE_URL", "http://host.docker.internal:11434"))
|
||||||
OLLAMA_REVIEW_MODEL = os.getenv("OLLAMA_REVIEW_MODEL", "qwen3:30b-a3b")
|
OLLAMA_REVIEW_MODEL = os.getenv("OLLAMA_REVIEW_MODEL", "qwen3:30b-a3b")
|
||||||
|
|
||||||
|
# Regex: entry contains IPA phonetic brackets like "dance [dɑːns]"
|
||||||
|
_HAS_PHONETIC_RE = _re.compile(r'\[.*?[ˈˌːʃʒθðŋɑɒɔəɜɪʊʌæ].*?\]')
|
||||||
|
|
||||||
async def llm_review_entries(
|
|
||||||
entries: List[Dict],
|
|
||||||
model: str = None,
|
|
||||||
) -> Dict:
|
|
||||||
"""Send vocab entries to a local LLM for OCR error correction."""
|
|
||||||
model = model or OLLAMA_REVIEW_MODEL
|
|
||||||
|
|
||||||
# Build a compact table representation for the prompt
|
def _entry_needs_review(entry: Dict) -> bool:
|
||||||
table_lines = []
|
"""Check if an entry should be sent to the LLM for review.
|
||||||
for e in entries:
|
|
||||||
table_lines.append({
|
|
||||||
"row": e.get("row_index", 0),
|
|
||||||
"en": e.get("english", ""),
|
|
||||||
"de": e.get("german", ""),
|
|
||||||
"ex": e.get("example", ""),
|
|
||||||
})
|
|
||||||
|
|
||||||
prompt = f"""Du bist ein Korrekturleser fuer OCR-erkannte Vokabeltabellen (Englisch-Deutsch).
|
Skip entries that are empty or contain IPA phonetic transcriptions
|
||||||
|
(those were already corrected by the word dictionary lookup).
|
||||||
|
"""
|
||||||
|
en = entry.get("english", "") or ""
|
||||||
|
de = entry.get("german", "") or ""
|
||||||
|
# Skip completely empty entries
|
||||||
|
if not en.strip() and not de.strip():
|
||||||
|
return False
|
||||||
|
# Skip entries with phonetic/IPA brackets — these are dictionary-corrected
|
||||||
|
if _HAS_PHONETIC_RE.search(en):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def _build_llm_prompt(table_lines: List[Dict]) -> str:
|
||||||
|
"""Build the LLM correction prompt for a batch of entries."""
|
||||||
|
return f"""Du bist ein Korrekturleser fuer OCR-erkannte Vokabeltabellen (Englisch-Deutsch).
|
||||||
Die Tabelle wurde per OCR aus einem Schulbuch-Scan extrahiert. Korrigiere NUR offensichtliche OCR-Fehler.
|
Die Tabelle wurde per OCR aus einem Schulbuch-Scan extrahiert. Korrigiere NUR offensichtliche OCR-Fehler.
|
||||||
|
|
||||||
Haeufige OCR-Fehler die du korrigieren sollst:
|
Haeufige OCR-Fehler die du korrigieren sollst:
|
||||||
@@ -4359,28 +4364,12 @@ Fuer unveraenderte Eintraege setze "corrected": false.
|
|||||||
Eingabe:
|
Eingabe:
|
||||||
{_json.dumps(table_lines, ensure_ascii=False, indent=2)}"""
|
{_json.dumps(table_lines, ensure_ascii=False, indent=2)}"""
|
||||||
|
|
||||||
t0 = time.time()
|
|
||||||
async with httpx.AsyncClient(timeout=300.0) as client:
|
|
||||||
resp = await client.post(
|
|
||||||
f"{_OLLAMA_URL}/api/chat",
|
|
||||||
json={
|
|
||||||
"model": model,
|
|
||||||
"messages": [{"role": "user", "content": prompt}],
|
|
||||||
"stream": False,
|
|
||||||
"options": {"temperature": 0.1, "num_predict": 8192},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
resp.raise_for_status()
|
|
||||||
content = resp.json().get("message", {}).get("content", "")
|
|
||||||
duration_ms = int((time.time() - t0) * 1000)
|
|
||||||
|
|
||||||
# Parse LLM response — extract JSON array
|
def _diff_batch(originals: List[Dict], corrected: List[Dict]) -> Tuple[List[Dict], List[Dict]]:
|
||||||
corrected = _parse_llm_json_array(content)
|
"""Compare original entries with LLM-corrected ones, return (changes, corrected_entries)."""
|
||||||
|
|
||||||
# Build diff: compare original vs corrected
|
|
||||||
changes = []
|
changes = []
|
||||||
entries_corrected = []
|
entries_out = []
|
||||||
for i, orig in enumerate(entries):
|
for i, orig in enumerate(originals):
|
||||||
if i < len(corrected):
|
if i < len(corrected):
|
||||||
c = corrected[i]
|
c = corrected[i]
|
||||||
entry = dict(orig)
|
entry = dict(orig)
|
||||||
@@ -4396,19 +4385,171 @@ Eingabe:
|
|||||||
})
|
})
|
||||||
entry[field_name] = new_val
|
entry[field_name] = new_val
|
||||||
entry["llm_corrected"] = True
|
entry["llm_corrected"] = True
|
||||||
entries_corrected.append(entry)
|
entries_out.append(entry)
|
||||||
else:
|
else:
|
||||||
entries_corrected.append(dict(orig))
|
entries_out.append(dict(orig))
|
||||||
|
return changes, entries_out
|
||||||
|
|
||||||
|
|
||||||
|
async def llm_review_entries(
|
||||||
|
entries: List[Dict],
|
||||||
|
model: str = None,
|
||||||
|
) -> Dict:
|
||||||
|
"""Send vocab entries to a local LLM for OCR error correction (single batch)."""
|
||||||
|
model = model or OLLAMA_REVIEW_MODEL
|
||||||
|
|
||||||
|
# Filter: only entries that need review
|
||||||
|
reviewable = [(i, e) for i, e in enumerate(entries) if _entry_needs_review(e)]
|
||||||
|
|
||||||
|
if not reviewable:
|
||||||
|
return {
|
||||||
|
"entries_original": entries,
|
||||||
|
"entries_corrected": [dict(e) for e in entries],
|
||||||
|
"changes": [],
|
||||||
|
"skipped_count": len(entries),
|
||||||
|
"model_used": model,
|
||||||
|
"duration_ms": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
review_entries = [e for _, e in reviewable]
|
||||||
|
table_lines = [
|
||||||
|
{"row": e.get("row_index", 0), "en": e.get("english", ""), "de": e.get("german", ""), "ex": e.get("example", "")}
|
||||||
|
for e in review_entries
|
||||||
|
]
|
||||||
|
|
||||||
|
prompt = _build_llm_prompt(table_lines)
|
||||||
|
|
||||||
|
t0 = time.time()
|
||||||
|
async with httpx.AsyncClient(timeout=300.0) as client:
|
||||||
|
resp = await client.post(
|
||||||
|
f"{_OLLAMA_URL}/api/chat",
|
||||||
|
json={
|
||||||
|
"model": model,
|
||||||
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
|
"stream": False,
|
||||||
|
"options": {"temperature": 0.1, "num_predict": 8192},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
content = resp.json().get("message", {}).get("content", "")
|
||||||
|
duration_ms = int((time.time() - t0) * 1000)
|
||||||
|
|
||||||
|
corrected = _parse_llm_json_array(content)
|
||||||
|
changes, corrected_entries = _diff_batch(review_entries, corrected)
|
||||||
|
|
||||||
|
# Merge corrected entries back into the full list
|
||||||
|
all_corrected = [dict(e) for e in entries]
|
||||||
|
for batch_idx, (orig_idx, _) in enumerate(reviewable):
|
||||||
|
if batch_idx < len(corrected_entries):
|
||||||
|
all_corrected[orig_idx] = corrected_entries[batch_idx]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"entries_original": entries,
|
"entries_original": entries,
|
||||||
"entries_corrected": entries_corrected,
|
"entries_corrected": all_corrected,
|
||||||
"changes": changes,
|
"changes": changes,
|
||||||
|
"skipped_count": len(entries) - len(reviewable),
|
||||||
"model_used": model,
|
"model_used": model,
|
||||||
"duration_ms": duration_ms,
|
"duration_ms": duration_ms,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def llm_review_entries_streaming(
|
||||||
|
entries: List[Dict],
|
||||||
|
model: str = None,
|
||||||
|
batch_size: int = 8,
|
||||||
|
):
|
||||||
|
"""Async generator: yield SSE events while reviewing entries in batches."""
|
||||||
|
model = model or OLLAMA_REVIEW_MODEL
|
||||||
|
|
||||||
|
# Separate reviewable from skipped entries
|
||||||
|
reviewable = []
|
||||||
|
skipped_indices = []
|
||||||
|
for i, e in enumerate(entries):
|
||||||
|
if _entry_needs_review(e):
|
||||||
|
reviewable.append((i, e))
|
||||||
|
else:
|
||||||
|
skipped_indices.append(i)
|
||||||
|
|
||||||
|
total_to_review = len(reviewable)
|
||||||
|
|
||||||
|
# meta event
|
||||||
|
yield {
|
||||||
|
"type": "meta",
|
||||||
|
"total_entries": len(entries),
|
||||||
|
"to_review": total_to_review,
|
||||||
|
"skipped": len(skipped_indices),
|
||||||
|
"model": model,
|
||||||
|
"batch_size": batch_size,
|
||||||
|
}
|
||||||
|
|
||||||
|
all_changes = []
|
||||||
|
all_corrected = [dict(e) for e in entries]
|
||||||
|
total_duration_ms = 0
|
||||||
|
reviewed_count = 0
|
||||||
|
|
||||||
|
# Process in batches
|
||||||
|
for batch_start in range(0, total_to_review, batch_size):
|
||||||
|
batch_items = reviewable[batch_start:batch_start + batch_size]
|
||||||
|
batch_entries = [e for _, e in batch_items]
|
||||||
|
|
||||||
|
table_lines = [
|
||||||
|
{"row": e.get("row_index", 0), "en": e.get("english", ""), "de": e.get("german", ""), "ex": e.get("example", "")}
|
||||||
|
for e in batch_entries
|
||||||
|
]
|
||||||
|
|
||||||
|
prompt = _build_llm_prompt(table_lines)
|
||||||
|
|
||||||
|
t0 = time.time()
|
||||||
|
async with httpx.AsyncClient(timeout=300.0) as client:
|
||||||
|
resp = await client.post(
|
||||||
|
f"{_OLLAMA_URL}/api/chat",
|
||||||
|
json={
|
||||||
|
"model": model,
|
||||||
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
|
"stream": False,
|
||||||
|
"options": {"temperature": 0.1, "num_predict": 4096},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
content = resp.json().get("message", {}).get("content", "")
|
||||||
|
batch_ms = int((time.time() - t0) * 1000)
|
||||||
|
total_duration_ms += batch_ms
|
||||||
|
|
||||||
|
corrected = _parse_llm_json_array(content)
|
||||||
|
batch_changes, batch_corrected = _diff_batch(batch_entries, corrected)
|
||||||
|
|
||||||
|
# Merge back
|
||||||
|
for batch_idx, (orig_idx, _) in enumerate(batch_items):
|
||||||
|
if batch_idx < len(batch_corrected):
|
||||||
|
all_corrected[orig_idx] = batch_corrected[batch_idx]
|
||||||
|
|
||||||
|
all_changes.extend(batch_changes)
|
||||||
|
reviewed_count += len(batch_items)
|
||||||
|
|
||||||
|
# Yield batch result
|
||||||
|
yield {
|
||||||
|
"type": "batch",
|
||||||
|
"batch_index": batch_start // batch_size,
|
||||||
|
"entries_reviewed": [e.get("row_index", 0) for _, e in batch_items],
|
||||||
|
"changes": batch_changes,
|
||||||
|
"duration_ms": batch_ms,
|
||||||
|
"progress": {"current": reviewed_count, "total": total_to_review},
|
||||||
|
}
|
||||||
|
|
||||||
|
# Complete event
|
||||||
|
yield {
|
||||||
|
"type": "complete",
|
||||||
|
"changes": all_changes,
|
||||||
|
"model_used": model,
|
||||||
|
"duration_ms": total_duration_ms,
|
||||||
|
"total_entries": len(entries),
|
||||||
|
"reviewed": total_to_review,
|
||||||
|
"skipped": len(skipped_indices),
|
||||||
|
"corrections_found": len(all_changes),
|
||||||
|
"entries_corrected": all_corrected,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def _parse_llm_json_array(text: str) -> List[Dict]:
|
def _parse_llm_json_array(text: str) -> List[Dict]:
|
||||||
"""Extract JSON array from LLM response (may contain markdown fences)."""
|
"""Extract JSON array from LLM response (may contain markdown fences)."""
|
||||||
# Strip markdown code fences
|
# Strip markdown code fences
|
||||||
|
|||||||
@@ -51,6 +51,7 @@ from cv_vocab_pipeline import (
|
|||||||
dewarp_image,
|
dewarp_image,
|
||||||
dewarp_image_manual,
|
dewarp_image_manual,
|
||||||
llm_review_entries,
|
llm_review_entries,
|
||||||
|
llm_review_entries_streaming,
|
||||||
render_image_high_res,
|
render_image_high_res,
|
||||||
render_pdf_high_res,
|
render_pdf_high_res,
|
||||||
)
|
)
|
||||||
@@ -1395,8 +1396,12 @@ async def get_word_ground_truth(session_id: str):
|
|||||||
|
|
||||||
|
|
||||||
@router.post("/sessions/{session_id}/llm-review")
|
@router.post("/sessions/{session_id}/llm-review")
|
||||||
async def run_llm_review(session_id: str, request: Request):
|
async def run_llm_review(session_id: str, request: Request, stream: bool = False):
|
||||||
"""Run LLM-based correction on vocab entries from Step 5."""
|
"""Run LLM-based correction on vocab entries from Step 5.
|
||||||
|
|
||||||
|
Query params:
|
||||||
|
stream: false (default) for JSON response, true for SSE streaming
|
||||||
|
"""
|
||||||
session = await get_session_db(session_id)
|
session = await get_session_db(session_id)
|
||||||
if not session:
|
if not session:
|
||||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||||
@@ -1417,6 +1422,14 @@ async def run_llm_review(session_id: str, request: Request):
|
|||||||
pass
|
pass
|
||||||
model = body.get("model") or OLLAMA_REVIEW_MODEL
|
model = body.get("model") or OLLAMA_REVIEW_MODEL
|
||||||
|
|
||||||
|
if stream:
|
||||||
|
return StreamingResponse(
|
||||||
|
_llm_review_stream_generator(session_id, entries, word_result, model, request),
|
||||||
|
media_type="text/event-stream",
|
||||||
|
headers={"Cache-Control": "no-cache", "Connection": "keep-alive", "X-Accel-Buffering": "no"},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Non-streaming path
|
||||||
try:
|
try:
|
||||||
result = await llm_review_entries(entries, model=model)
|
result = await llm_review_entries(entries, model=model)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -1449,6 +1462,44 @@ async def run_llm_review(session_id: str, request: Request):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def _llm_review_stream_generator(
|
||||||
|
session_id: str,
|
||||||
|
entries: List[Dict],
|
||||||
|
word_result: Dict,
|
||||||
|
model: str,
|
||||||
|
request: Request,
|
||||||
|
):
|
||||||
|
"""SSE generator that yields batch-by-batch LLM review progress."""
|
||||||
|
try:
|
||||||
|
async for event in llm_review_entries_streaming(entries, model=model):
|
||||||
|
if await request.is_disconnected():
|
||||||
|
logger.info(f"SSE: client disconnected during LLM review for {session_id}")
|
||||||
|
return
|
||||||
|
|
||||||
|
yield f"data: {json.dumps(event, ensure_ascii=False)}\n\n"
|
||||||
|
|
||||||
|
# On complete: persist to DB
|
||||||
|
if event.get("type") == "complete":
|
||||||
|
word_result["llm_review"] = {
|
||||||
|
"changes": event["changes"],
|
||||||
|
"model_used": event["model_used"],
|
||||||
|
"duration_ms": event["duration_ms"],
|
||||||
|
"entries_corrected": event["entries_corrected"],
|
||||||
|
}
|
||||||
|
await update_session_db(session_id, word_result=word_result, current_step=6)
|
||||||
|
if session_id in _cache:
|
||||||
|
_cache[session_id]["word_result"] = word_result
|
||||||
|
|
||||||
|
logger.info(f"LLM review SSE session {session_id}: {event['corrections_found']} changes, "
|
||||||
|
f"{event['duration_ms']}ms, skipped={event['skipped']}, model={event['model_used']}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
import traceback
|
||||||
|
logger.error(f"LLM review SSE failed for {session_id}: {type(e).__name__}: {e}\n{traceback.format_exc()}")
|
||||||
|
error_event = {"type": "error", "detail": f"{type(e).__name__}: {e}"}
|
||||||
|
yield f"data: {json.dumps(error_event)}\n\n"
|
||||||
|
|
||||||
|
|
||||||
@router.post("/sessions/{session_id}/llm-review/apply")
|
@router.post("/sessions/{session_id}/llm-review/apply")
|
||||||
async def apply_llm_corrections(session_id: str, request: Request):
|
async def apply_llm_corrections(session_id: str, request: Request):
|
||||||
"""Apply selected LLM corrections to vocab entries."""
|
"""Apply selected LLM corrections to vocab entries."""
|
||||||
|
|||||||
Reference in New Issue
Block a user