Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Failing after 1m55s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 19s
Track A (Backend): - Compound word IPA decomposition (schoolbag→school+bag) - Trailing garbled IPA fragment removal after brackets (R21 fix) - Regression runner with DB persistence, history endpoints - Page crop determinism verified with tests Track B (Frontend): - OCR Regression dashboard (/ai/ocr-regression) - Ground Truth Review workflow (/ai/ocr-ground-truth) with split-view, confidence highlighting, inline edit, batch mark, progress tracking Track C (Docs): - OCR-Pipeline.md v5.0 (Steps 5e-5h) - Regression testing guide - mkdocs.yml nav update Track D (Infra): - TrOCR baseline benchmark script - run-regression.sh shell script - Migration 008: regression_runs table Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
392 lines
16 KiB
TypeScript
392 lines
16 KiB
TypeScript
'use client'
|
|
|
|
/**
|
|
* OCR Regression Dashboard
|
|
*
|
|
* Shows all ground-truth sessions, runs regression tests,
|
|
* displays pass/fail results with diff details, and shows history.
|
|
*/
|
|
|
|
import { useState, useEffect, useCallback } from 'react'
|
|
import { PagePurpose } from '@/components/common/PagePurpose'
|
|
import { AIToolsSidebarResponsive } from '@/components/ai/AIToolsSidebar'
|
|
|
|
const KLAUSUR_API = '/klausur-api'
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Types
|
|
// ---------------------------------------------------------------------------
|
|
|
|
interface GTSession {
|
|
session_id: string
|
|
name: string
|
|
filename: string
|
|
document_category: string | null
|
|
pipeline: string | null
|
|
saved_at: string | null
|
|
summary: {
|
|
total_zones: number
|
|
total_columns: number
|
|
total_rows: number
|
|
total_cells: number
|
|
}
|
|
}
|
|
|
|
interface DiffSummary {
|
|
structural_changes: number
|
|
cells_missing: number
|
|
cells_added: number
|
|
text_changes: number
|
|
col_type_changes: number
|
|
}
|
|
|
|
interface RegressionResult {
|
|
session_id: string
|
|
name: string
|
|
status: 'pass' | 'fail' | 'error'
|
|
error?: string
|
|
diff_summary?: DiffSummary
|
|
reference_summary?: Record<string, number>
|
|
current_summary?: Record<string, number>
|
|
structural_diffs?: Array<{ field: string; reference: number; current: number }>
|
|
cell_diffs?: Array<{ type: string; cell_id: string; reference?: string; current?: string }>
|
|
}
|
|
|
|
interface RegressionRun {
|
|
id: string
|
|
run_at: string
|
|
status: string
|
|
total: number
|
|
passed: number
|
|
failed: number
|
|
errors: number
|
|
duration_ms: number
|
|
triggered_by: string
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Helpers
|
|
// ---------------------------------------------------------------------------
|
|
|
|
function StatusBadge({ status }: { status: string }) {
|
|
const cls =
|
|
status === 'pass'
|
|
? 'bg-emerald-100 text-emerald-800 border-emerald-200'
|
|
: status === 'fail'
|
|
? 'bg-red-100 text-red-800 border-red-200'
|
|
: 'bg-amber-100 text-amber-800 border-amber-200'
|
|
return (
|
|
<span className={`inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium border ${cls}`}>
|
|
{status === 'pass' ? 'Pass' : status === 'fail' ? 'Fail' : 'Error'}
|
|
</span>
|
|
)
|
|
}
|
|
|
|
function formatDate(iso: string | null) {
|
|
if (!iso) return '—'
|
|
return new Date(iso).toLocaleString('de-DE', {
|
|
day: '2-digit', month: '2-digit', year: 'numeric',
|
|
hour: '2-digit', minute: '2-digit',
|
|
})
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Component
|
|
// ---------------------------------------------------------------------------
|
|
|
|
export default function OCRRegressionPage() {
|
|
const [sessions, setSessions] = useState<GTSession[]>([])
|
|
const [results, setResults] = useState<RegressionResult[]>([])
|
|
const [history, setHistory] = useState<RegressionRun[]>([])
|
|
const [running, setRunning] = useState(false)
|
|
const [overallStatus, setOverallStatus] = useState<string | null>(null)
|
|
const [durationMs, setDurationMs] = useState<number | null>(null)
|
|
const [expandedSession, setExpandedSession] = useState<string | null>(null)
|
|
const [tab, setTab] = useState<'current' | 'history'>('current')
|
|
|
|
// Load ground-truth sessions
|
|
const loadSessions = useCallback(async () => {
|
|
try {
|
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/ground-truth-sessions`)
|
|
if (res.ok) {
|
|
const data = await res.json()
|
|
setSessions(data.sessions || [])
|
|
}
|
|
} catch (e) {
|
|
console.error('Failed to load GT sessions:', e)
|
|
}
|
|
}, [])
|
|
|
|
// Load history
|
|
const loadHistory = useCallback(async () => {
|
|
try {
|
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/regression/history?limit=20`)
|
|
if (res.ok) {
|
|
const data = await res.json()
|
|
setHistory(data.runs || [])
|
|
}
|
|
} catch (e) {
|
|
console.error('Failed to load history:', e)
|
|
}
|
|
}, [])
|
|
|
|
useEffect(() => {
|
|
loadSessions()
|
|
loadHistory()
|
|
}, [loadSessions, loadHistory])
|
|
|
|
// Run all regressions
|
|
const runAll = async () => {
|
|
setRunning(true)
|
|
setResults([])
|
|
setOverallStatus(null)
|
|
setDurationMs(null)
|
|
try {
|
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/regression/run?triggered_by=manual`, {
|
|
method: 'POST',
|
|
})
|
|
if (res.ok) {
|
|
const data = await res.json()
|
|
setResults(data.results || [])
|
|
setOverallStatus(data.status)
|
|
setDurationMs(data.duration_ms)
|
|
loadHistory()
|
|
}
|
|
} catch (e) {
|
|
console.error('Regression run failed:', e)
|
|
setOverallStatus('error')
|
|
} finally {
|
|
setRunning(false)
|
|
}
|
|
}
|
|
|
|
const totalPass = results.filter(r => r.status === 'pass').length
|
|
const totalFail = results.filter(r => r.status === 'fail').length
|
|
const totalError = results.filter(r => r.status === 'error').length
|
|
|
|
return (
|
|
<AIToolsSidebarResponsive>
|
|
<div className="max-w-7xl mx-auto p-6 space-y-6">
|
|
<PagePurpose moduleId="ocr-regression" />
|
|
|
|
{/* Header + Run Button */}
|
|
<div className="flex items-center justify-between">
|
|
<div>
|
|
<h1 className="text-2xl font-bold text-slate-900">OCR Regression Tests</h1>
|
|
<p className="text-sm text-slate-500 mt-1">
|
|
{sessions.length} Ground-Truth Session{sessions.length !== 1 ? 's' : ''}
|
|
</p>
|
|
</div>
|
|
<button
|
|
onClick={runAll}
|
|
disabled={running || sessions.length === 0}
|
|
className="inline-flex items-center gap-2 px-4 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 disabled:opacity-50 disabled:cursor-not-allowed font-medium transition-colors"
|
|
>
|
|
{running ? (
|
|
<>
|
|
<svg className="animate-spin h-4 w-4" fill="none" viewBox="0 0 24 24">
|
|
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
|
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
|
|
</svg>
|
|
Laeuft...
|
|
</>
|
|
) : (
|
|
'Alle Tests starten'
|
|
)}
|
|
</button>
|
|
</div>
|
|
|
|
{/* Overall Result Banner */}
|
|
{overallStatus && (
|
|
<div className={`rounded-lg p-4 border ${
|
|
overallStatus === 'pass'
|
|
? 'bg-emerald-50 border-emerald-200'
|
|
: 'bg-red-50 border-red-200'
|
|
}`}>
|
|
<div className="flex items-center justify-between">
|
|
<div className="flex items-center gap-3">
|
|
<StatusBadge status={overallStatus} />
|
|
<span className="font-medium text-slate-900">
|
|
{totalPass} bestanden, {totalFail} fehlgeschlagen, {totalError} Fehler
|
|
</span>
|
|
</div>
|
|
{durationMs !== null && (
|
|
<span className="text-sm text-slate-500">{(durationMs / 1000).toFixed(1)}s</span>
|
|
)}
|
|
</div>
|
|
</div>
|
|
)}
|
|
|
|
{/* Tabs */}
|
|
<div className="border-b border-slate-200">
|
|
<nav className="flex gap-4">
|
|
{(['current', 'history'] as const).map(t => (
|
|
<button
|
|
key={t}
|
|
onClick={() => setTab(t)}
|
|
className={`pb-3 px-1 text-sm font-medium border-b-2 transition-colors ${
|
|
tab === t
|
|
? 'border-teal-500 text-teal-600'
|
|
: 'border-transparent text-slate-500 hover:text-slate-700'
|
|
}`}
|
|
>
|
|
{t === 'current' ? 'Aktuelle Ergebnisse' : 'Verlauf'}
|
|
</button>
|
|
))}
|
|
</nav>
|
|
</div>
|
|
|
|
{/* Current Results Tab */}
|
|
{tab === 'current' && (
|
|
<div className="space-y-3">
|
|
{results.length === 0 && !running && (
|
|
<div className="text-center py-12 text-slate-400">
|
|
<p className="text-lg">Keine Ergebnisse</p>
|
|
<p className="text-sm mt-1">Klicken Sie "Alle Tests starten" um die Regression zu laufen.</p>
|
|
</div>
|
|
)}
|
|
{results.map(r => (
|
|
<div
|
|
key={r.session_id}
|
|
className="bg-white rounded-lg border border-slate-200 overflow-hidden"
|
|
>
|
|
<div
|
|
className="flex items-center justify-between px-4 py-3 cursor-pointer hover:bg-slate-50 transition-colors"
|
|
onClick={() => setExpandedSession(expandedSession === r.session_id ? null : r.session_id)}
|
|
>
|
|
<div className="flex items-center gap-3 min-w-0">
|
|
<StatusBadge status={r.status} />
|
|
<span className="font-medium text-slate-900 truncate">{r.name || r.session_id}</span>
|
|
</div>
|
|
<div className="flex items-center gap-4 text-sm text-slate-500">
|
|
{r.diff_summary && (
|
|
<span>
|
|
{r.diff_summary.text_changes} Text, {r.diff_summary.structural_changes} Struktur
|
|
</span>
|
|
)}
|
|
{r.error && <span className="text-red-500">{r.error}</span>}
|
|
<svg className={`w-4 h-4 transition-transform ${expandedSession === r.session_id ? 'rotate-180' : ''}`} fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 9l-7 7-7-7" />
|
|
</svg>
|
|
</div>
|
|
</div>
|
|
|
|
{/* Expanded Details */}
|
|
{expandedSession === r.session_id && r.status === 'fail' && (
|
|
<div className="border-t border-slate-100 px-4 py-3 bg-slate-50 space-y-3">
|
|
{/* Structural Diffs */}
|
|
{r.structural_diffs && r.structural_diffs.length > 0 && (
|
|
<div>
|
|
<h4 className="text-xs font-medium text-slate-500 uppercase mb-1">Strukturelle Aenderungen</h4>
|
|
<div className="space-y-1">
|
|
{r.structural_diffs.map((d, i) => (
|
|
<div key={i} className="text-sm">
|
|
<span className="font-mono text-slate-600">{d.field}</span>: {d.reference} → {d.current}
|
|
</div>
|
|
))}
|
|
</div>
|
|
</div>
|
|
)}
|
|
{/* Cell Diffs */}
|
|
{r.cell_diffs && r.cell_diffs.length > 0 && (
|
|
<div>
|
|
<h4 className="text-xs font-medium text-slate-500 uppercase mb-1">
|
|
Zellen-Aenderungen ({r.cell_diffs.length})
|
|
</h4>
|
|
<div className="max-h-60 overflow-y-auto space-y-1">
|
|
{r.cell_diffs.slice(0, 50).map((d, i) => (
|
|
<div key={i} className="text-sm font-mono bg-white rounded px-2 py-1 border border-slate-100">
|
|
<span className={`text-xs px-1 rounded ${
|
|
d.type === 'text_change' ? 'bg-amber-100 text-amber-700'
|
|
: d.type === 'cell_missing' ? 'bg-red-100 text-red-700'
|
|
: 'bg-blue-100 text-blue-700'
|
|
}`}>
|
|
{d.type}
|
|
</span>{' '}
|
|
<span className="text-slate-500">{d.cell_id}</span>
|
|
{d.reference && (
|
|
<>
|
|
{' '}<span className="line-through text-red-400">{d.reference}</span>
|
|
</>
|
|
)}
|
|
{d.current && (
|
|
<>
|
|
{' '}<span className="text-emerald-600">{d.current}</span>
|
|
</>
|
|
)}
|
|
</div>
|
|
))}
|
|
{r.cell_diffs.length > 50 && (
|
|
<p className="text-xs text-slate-400">... und {r.cell_diffs.length - 50} weitere</p>
|
|
)}
|
|
</div>
|
|
</div>
|
|
)}
|
|
</div>
|
|
)}
|
|
</div>
|
|
))}
|
|
|
|
{/* Ground Truth Sessions Overview (when no results yet) */}
|
|
{results.length === 0 && sessions.length > 0 && (
|
|
<div>
|
|
<h3 className="text-sm font-medium text-slate-700 mb-2">Ground-Truth Sessions</h3>
|
|
<div className="grid gap-2">
|
|
{sessions.map(s => (
|
|
<div key={s.session_id} className="bg-white rounded-lg border border-slate-200 px-4 py-3 flex items-center justify-between">
|
|
<div>
|
|
<span className="font-medium text-slate-900">{s.name || s.session_id}</span>
|
|
<span className="text-sm text-slate-400 ml-2">{s.filename}</span>
|
|
</div>
|
|
<div className="text-sm text-slate-500">
|
|
{s.summary.total_cells} Zellen, {s.summary.total_zones} Zonen
|
|
{s.pipeline && <span className="ml-2 text-xs bg-slate-100 px-1.5 py-0.5 rounded">{s.pipeline}</span>}
|
|
</div>
|
|
</div>
|
|
))}
|
|
</div>
|
|
</div>
|
|
)}
|
|
</div>
|
|
)}
|
|
|
|
{/* History Tab */}
|
|
{tab === 'history' && (
|
|
<div className="space-y-2">
|
|
{history.length === 0 ? (
|
|
<p className="text-center py-8 text-slate-400">Noch keine Laeufe aufgezeichnet.</p>
|
|
) : (
|
|
<table className="w-full text-sm">
|
|
<thead>
|
|
<tr className="border-b border-slate-200 text-left text-slate-500">
|
|
<th className="pb-2 font-medium">Datum</th>
|
|
<th className="pb-2 font-medium">Status</th>
|
|
<th className="pb-2 font-medium text-right">Gesamt</th>
|
|
<th className="pb-2 font-medium text-right">Pass</th>
|
|
<th className="pb-2 font-medium text-right">Fail</th>
|
|
<th className="pb-2 font-medium text-right">Dauer</th>
|
|
<th className="pb-2 font-medium">Trigger</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
{history.map(run => (
|
|
<tr key={run.id} className="border-b border-slate-100 hover:bg-slate-50">
|
|
<td className="py-2">{formatDate(run.run_at)}</td>
|
|
<td className="py-2"><StatusBadge status={run.status} /></td>
|
|
<td className="py-2 text-right">{run.total}</td>
|
|
<td className="py-2 text-right text-emerald-600">{run.passed}</td>
|
|
<td className="py-2 text-right text-red-600">{run.failed + run.errors}</td>
|
|
<td className="py-2 text-right text-slate-500">{(run.duration_ms / 1000).toFixed(1)}s</td>
|
|
<td className="py-2 text-slate-400">{run.triggered_by}</td>
|
|
</tr>
|
|
))}
|
|
</tbody>
|
|
</table>
|
|
)}
|
|
</div>
|
|
)}
|
|
</div>
|
|
</AIToolsSidebarResponsive>
|
|
)
|
|
}
|