feat: Sprint 1 — IPA hardening, regression framework, ground-truth review
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Failing after 1m55s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 19s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Failing after 1m55s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 19s
Track A (Backend): - Compound word IPA decomposition (schoolbag→school+bag) - Trailing garbled IPA fragment removal after brackets (R21 fix) - Regression runner with DB persistence, history endpoints - Page crop determinism verified with tests Track B (Frontend): - OCR Regression dashboard (/ai/ocr-regression) - Ground Truth Review workflow (/ai/ocr-ground-truth) with split-view, confidence highlighting, inline edit, batch mark, progress tracking Track C (Docs): - OCR-Pipeline.md v5.0 (Steps 5e-5h) - Regression testing guide - mkdocs.yml nav update Track D (Infra): - TrOCR baseline benchmark script - run-regression.sh shell script - Migration 008: regression_runs table Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
580
admin-lehrer/app/(admin)/ai/ocr-ground-truth/page.tsx
Normal file
580
admin-lehrer/app/(admin)/ai/ocr-ground-truth/page.tsx
Normal file
@@ -0,0 +1,580 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ground-Truth Review Workflow
|
||||||
|
*
|
||||||
|
* Efficient mass-review of OCR sessions:
|
||||||
|
* - Session queue with auto-advance
|
||||||
|
* - Split-view: original image left, grid right
|
||||||
|
* - Confidence highlighting on cells
|
||||||
|
* - Quick-accept per row
|
||||||
|
* - Inline cell editing
|
||||||
|
* - Batch mark as ground truth
|
||||||
|
* - Progress tracking
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { useState, useEffect, useCallback, useRef } from 'react'
|
||||||
|
import { PagePurpose } from '@/components/common/PagePurpose'
|
||||||
|
import { AIToolsSidebarResponsive } from '@/components/ai/AIToolsSidebar'
|
||||||
|
|
||||||
|
const KLAUSUR_API = '/klausur-api'
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Types
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
interface Session {
|
||||||
|
id: string
|
||||||
|
name: string
|
||||||
|
filename: string
|
||||||
|
status: string
|
||||||
|
created_at: string
|
||||||
|
document_category: string | null
|
||||||
|
has_ground_truth: boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
interface GridZone {
|
||||||
|
zone_id: string
|
||||||
|
zone_type: string
|
||||||
|
columns: Array<{ col_index: number; col_type: string; header: string }>
|
||||||
|
rows: Array<{ row_index: number; is_header: boolean }>
|
||||||
|
cells: GridCell[]
|
||||||
|
}
|
||||||
|
|
||||||
|
interface GridCell {
|
||||||
|
cell_id: string
|
||||||
|
row_index: number
|
||||||
|
col_index: number
|
||||||
|
col_type: string
|
||||||
|
text: string
|
||||||
|
confidence?: number
|
||||||
|
is_bold?: boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
interface GridResult {
|
||||||
|
zones: GridZone[]
|
||||||
|
summary?: {
|
||||||
|
total_zones: number
|
||||||
|
total_columns: number
|
||||||
|
total_rows: number
|
||||||
|
total_cells: number
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Helpers
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
function confidenceColor(conf: number | undefined): string {
|
||||||
|
if (conf === undefined) return ''
|
||||||
|
if (conf >= 80) return 'bg-emerald-50'
|
||||||
|
if (conf >= 50) return 'bg-amber-50'
|
||||||
|
return 'bg-red-50'
|
||||||
|
}
|
||||||
|
|
||||||
|
function confidenceBorder(conf: number | undefined): string {
|
||||||
|
if (conf === undefined) return 'border-slate-200'
|
||||||
|
if (conf >= 80) return 'border-emerald-200'
|
||||||
|
if (conf >= 50) return 'border-amber-300'
|
||||||
|
return 'border-red-300'
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Component
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
export default function GroundTruthReviewPage() {
|
||||||
|
// Session list & queue
|
||||||
|
const [allSessions, setAllSessions] = useState<Session[]>([])
|
||||||
|
const [filter, setFilter] = useState<'all' | 'unreviewed' | 'reviewed'>('unreviewed')
|
||||||
|
const [currentIdx, setCurrentIdx] = useState(0)
|
||||||
|
const [loading, setLoading] = useState(true)
|
||||||
|
|
||||||
|
// Current session data
|
||||||
|
const [grid, setGrid] = useState<GridResult | null>(null)
|
||||||
|
const [loadingGrid, setLoadingGrid] = useState(false)
|
||||||
|
const [editingCell, setEditingCell] = useState<string | null>(null)
|
||||||
|
const [editText, setEditText] = useState('')
|
||||||
|
const [acceptedRows, setAcceptedRows] = useState<Set<string>>(new Set())
|
||||||
|
const [zoom, setZoom] = useState(100)
|
||||||
|
|
||||||
|
// Batch operations
|
||||||
|
const [selectedSessions, setSelectedSessions] = useState<Set<string>>(new Set())
|
||||||
|
const [marking, setMarking] = useState(false)
|
||||||
|
const [markResult, setMarkResult] = useState<string | null>(null)
|
||||||
|
|
||||||
|
// Stats
|
||||||
|
const [reviewedCount, setReviewedCount] = useState(0)
|
||||||
|
const [totalCount, setTotalCount] = useState(0)
|
||||||
|
|
||||||
|
const imageRef = useRef<HTMLDivElement>(null)
|
||||||
|
|
||||||
|
// Load all sessions
|
||||||
|
const loadSessions = useCallback(async () => {
|
||||||
|
setLoading(true)
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions?limit=200`)
|
||||||
|
if (!res.ok) return
|
||||||
|
const data = await res.json()
|
||||||
|
const sessions: Session[] = (data.sessions || []).map((s: any) => ({
|
||||||
|
id: s.id,
|
||||||
|
name: s.name || '',
|
||||||
|
filename: s.filename || '',
|
||||||
|
status: s.status || 'active',
|
||||||
|
created_at: s.created_at || '',
|
||||||
|
document_category: s.document_category || null,
|
||||||
|
has_ground_truth: !!(s.ground_truth && s.ground_truth.build_grid_reference),
|
||||||
|
}))
|
||||||
|
setAllSessions(sessions)
|
||||||
|
setTotalCount(sessions.length)
|
||||||
|
setReviewedCount(sessions.filter(s => s.has_ground_truth).length)
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to load sessions:', e)
|
||||||
|
} finally {
|
||||||
|
setLoading(false)
|
||||||
|
}
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
useEffect(() => { loadSessions() }, [loadSessions])
|
||||||
|
|
||||||
|
// Filtered sessions
|
||||||
|
const filteredSessions = allSessions.filter(s => {
|
||||||
|
if (filter === 'unreviewed') return !s.has_ground_truth && s.status === 'active'
|
||||||
|
if (filter === 'reviewed') return s.has_ground_truth
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
|
||||||
|
const currentSession = filteredSessions[currentIdx] || null
|
||||||
|
|
||||||
|
// Load grid for current session
|
||||||
|
const loadGrid = useCallback(async (sessionId: string) => {
|
||||||
|
setLoadingGrid(true)
|
||||||
|
setGrid(null)
|
||||||
|
setAcceptedRows(new Set())
|
||||||
|
setEditingCell(null)
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/grid-editor`)
|
||||||
|
if (res.ok) {
|
||||||
|
const data = await res.json()
|
||||||
|
setGrid(data.grid || data)
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to load grid:', e)
|
||||||
|
} finally {
|
||||||
|
setLoadingGrid(false)
|
||||||
|
}
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (currentSession) loadGrid(currentSession.id)
|
||||||
|
}, [currentSession, loadGrid])
|
||||||
|
|
||||||
|
// Navigation
|
||||||
|
const goNext = () => {
|
||||||
|
if (currentIdx < filteredSessions.length - 1) setCurrentIdx(currentIdx + 1)
|
||||||
|
}
|
||||||
|
const goPrev = () => {
|
||||||
|
if (currentIdx > 0) setCurrentIdx(currentIdx - 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Accept row
|
||||||
|
const acceptRow = (zoneId: string, rowIdx: number) => {
|
||||||
|
const key = `${zoneId}-${rowIdx}`
|
||||||
|
setAcceptedRows(prev => new Set([...prev, key]))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Edit cell
|
||||||
|
const startEdit = (cell: GridCell) => {
|
||||||
|
setEditingCell(cell.cell_id)
|
||||||
|
setEditText(cell.text)
|
||||||
|
}
|
||||||
|
|
||||||
|
const saveEdit = async () => {
|
||||||
|
if (!editingCell || !currentSession) return
|
||||||
|
try {
|
||||||
|
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${currentSession.id}/update-cell`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ cell_id: editingCell, text: editText }),
|
||||||
|
})
|
||||||
|
// Update local state
|
||||||
|
if (grid) {
|
||||||
|
const newGrid = { ...grid }
|
||||||
|
for (const zone of newGrid.zones) {
|
||||||
|
for (const cell of zone.cells) {
|
||||||
|
if (cell.cell_id === editingCell) {
|
||||||
|
cell.text = editText
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
setGrid(newGrid)
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to save cell:', e)
|
||||||
|
}
|
||||||
|
setEditingCell(null)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark as ground truth
|
||||||
|
const markGroundTruth = async (sessionId: string) => {
|
||||||
|
setMarking(true)
|
||||||
|
setMarkResult(null)
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/mark-ground-truth`, {
|
||||||
|
method: 'POST',
|
||||||
|
})
|
||||||
|
if (res.ok) {
|
||||||
|
setMarkResult('success')
|
||||||
|
// Update local session state
|
||||||
|
setAllSessions(prev => prev.map(s =>
|
||||||
|
s.id === sessionId ? { ...s, has_ground_truth: true } : s
|
||||||
|
))
|
||||||
|
setReviewedCount(prev => prev + 1)
|
||||||
|
} else {
|
||||||
|
setMarkResult('error')
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
setMarkResult('error')
|
||||||
|
} finally {
|
||||||
|
setMarking(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Batch mark
|
||||||
|
const batchMark = async () => {
|
||||||
|
setMarking(true)
|
||||||
|
let success = 0
|
||||||
|
for (const sid of selectedSessions) {
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}/mark-ground-truth`, {
|
||||||
|
method: 'POST',
|
||||||
|
})
|
||||||
|
if (res.ok) success++
|
||||||
|
} catch { /* skip */ }
|
||||||
|
}
|
||||||
|
setAllSessions(prev => prev.map(s =>
|
||||||
|
selectedSessions.has(s.id) ? { ...s, has_ground_truth: true } : s
|
||||||
|
))
|
||||||
|
setReviewedCount(prev => prev + success)
|
||||||
|
setSelectedSessions(new Set())
|
||||||
|
setMarking(false)
|
||||||
|
setMarkResult(`${success} Sessions als Ground Truth markiert`)
|
||||||
|
setTimeout(() => setMarkResult(null), 3000)
|
||||||
|
}
|
||||||
|
|
||||||
|
// All cells for current grid
|
||||||
|
const allCells = grid?.zones?.flatMap(z => z.cells) || []
|
||||||
|
const lowConfCells = allCells.filter(c => (c.confidence ?? 100) < 50)
|
||||||
|
|
||||||
|
const imageUrl = currentSession
|
||||||
|
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${currentSession.id}/image/original`
|
||||||
|
: null
|
||||||
|
|
||||||
|
return (
|
||||||
|
<AIToolsSidebarResponsive>
|
||||||
|
<div className="max-w-[1600px] mx-auto p-4 space-y-4">
|
||||||
|
<PagePurpose moduleId="ocr-ground-truth" />
|
||||||
|
|
||||||
|
{/* Progress Bar */}
|
||||||
|
<div className="bg-white rounded-lg border border-slate-200 p-4">
|
||||||
|
<div className="flex items-center justify-between mb-2">
|
||||||
|
<h2 className="text-lg font-bold text-slate-900">Ground Truth Review</h2>
|
||||||
|
<span className="text-sm text-slate-500">
|
||||||
|
{reviewedCount} von {totalCount} geprueft ({totalCount > 0 ? Math.round(reviewedCount / totalCount * 100) : 0}%)
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<div className="w-full bg-slate-100 rounded-full h-2.5">
|
||||||
|
<div
|
||||||
|
className="bg-teal-500 h-2.5 rounded-full transition-all duration-500"
|
||||||
|
style={{ width: `${totalCount > 0 ? (reviewedCount / totalCount) * 100 : 0}%` }}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Filter + Queue */}
|
||||||
|
<div className="flex items-center gap-4">
|
||||||
|
<div className="flex gap-1 bg-slate-100 rounded-lg p-1">
|
||||||
|
{(['unreviewed', 'reviewed', 'all'] as const).map(f => (
|
||||||
|
<button
|
||||||
|
key={f}
|
||||||
|
onClick={() => { setFilter(f); setCurrentIdx(0) }}
|
||||||
|
className={`px-3 py-1.5 text-sm rounded-md transition-colors ${
|
||||||
|
filter === f
|
||||||
|
? 'bg-white text-slate-900 shadow-sm font-medium'
|
||||||
|
: 'text-slate-500 hover:text-slate-700'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
{f === 'unreviewed' ? 'Offen' : f === 'reviewed' ? 'Geprueft' : 'Alle'}
|
||||||
|
<span className="ml-1 text-xs text-slate-400">
|
||||||
|
({allSessions.filter(s =>
|
||||||
|
f === 'unreviewed' ? !s.has_ground_truth && s.status === 'active'
|
||||||
|
: f === 'reviewed' ? s.has_ground_truth
|
||||||
|
: true
|
||||||
|
).length})
|
||||||
|
</span>
|
||||||
|
</button>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Navigation */}
|
||||||
|
<div className="flex items-center gap-2 ml-auto">
|
||||||
|
<button onClick={goPrev} disabled={currentIdx === 0}
|
||||||
|
className="p-2 rounded hover:bg-slate-100 disabled:opacity-30 disabled:cursor-not-allowed">
|
||||||
|
<svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M15 19l-7-7 7-7" />
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
<span className="text-sm text-slate-500 min-w-[80px] text-center">
|
||||||
|
{filteredSessions.length > 0 ? `${currentIdx + 1} / ${filteredSessions.length}` : '—'}
|
||||||
|
</span>
|
||||||
|
<button onClick={goNext} disabled={currentIdx >= filteredSessions.length - 1}
|
||||||
|
className="p-2 rounded hover:bg-slate-100 disabled:opacity-30 disabled:cursor-not-allowed">
|
||||||
|
<svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Batch mark button */}
|
||||||
|
{selectedSessions.size > 0 && (
|
||||||
|
<button
|
||||||
|
onClick={batchMark}
|
||||||
|
disabled={marking}
|
||||||
|
className="px-3 py-1.5 bg-teal-600 text-white text-sm rounded-lg hover:bg-teal-700 disabled:opacity-50"
|
||||||
|
>
|
||||||
|
{selectedSessions.size} markieren
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Toast */}
|
||||||
|
{markResult && (
|
||||||
|
<div className={`p-3 rounded-lg text-sm ${
|
||||||
|
markResult === 'error' ? 'bg-red-50 text-red-700 border border-red-200'
|
||||||
|
: markResult === 'success' ? 'bg-emerald-50 text-emerald-700 border border-emerald-200'
|
||||||
|
: 'bg-blue-50 text-blue-700 border border-blue-200'
|
||||||
|
}`}>
|
||||||
|
{markResult === 'success' ? 'Als Ground Truth markiert!' : markResult === 'error' ? 'Fehler beim Markieren' : markResult}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Main Content: Split View */}
|
||||||
|
{loading ? (
|
||||||
|
<div className="text-center py-12 text-slate-400">Lade Sessions...</div>
|
||||||
|
) : !currentSession ? (
|
||||||
|
<div className="text-center py-12 text-slate-400">
|
||||||
|
<p className="text-lg">Keine Sessions in dieser Ansicht</p>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<div className="grid grid-cols-2 gap-4" style={{ minHeight: '70vh' }}>
|
||||||
|
{/* Left: Original Image */}
|
||||||
|
<div className="bg-white rounded-lg border border-slate-200 overflow-hidden flex flex-col">
|
||||||
|
<div className="flex items-center justify-between px-3 py-2 border-b border-slate-100 bg-slate-50">
|
||||||
|
<span className="text-sm font-medium text-slate-700 truncate">
|
||||||
|
{currentSession.name || currentSession.filename}
|
||||||
|
</span>
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<button onClick={() => setZoom(z => Math.max(50, z - 25))}
|
||||||
|
className="px-2 py-0.5 text-xs bg-slate-200 rounded hover:bg-slate-300">-</button>
|
||||||
|
<span className="text-xs text-slate-500 w-10 text-center">{zoom}%</span>
|
||||||
|
<button onClick={() => setZoom(z => Math.min(300, z + 25))}
|
||||||
|
className="px-2 py-0.5 text-xs bg-slate-200 rounded hover:bg-slate-300">+</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div ref={imageRef} className="flex-1 overflow-auto p-2">
|
||||||
|
{imageUrl && (
|
||||||
|
<img
|
||||||
|
src={imageUrl}
|
||||||
|
alt="Original scan"
|
||||||
|
style={{ width: `${zoom}%`, maxWidth: 'none' }}
|
||||||
|
className="block"
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Right: Grid Review */}
|
||||||
|
<div className="bg-white rounded-lg border border-slate-200 overflow-hidden flex flex-col">
|
||||||
|
<div className="flex items-center justify-between px-3 py-2 border-b border-slate-100 bg-slate-50">
|
||||||
|
<div className="flex items-center gap-3">
|
||||||
|
<span className="text-sm font-medium text-slate-700">
|
||||||
|
{allCells.length} Zellen
|
||||||
|
</span>
|
||||||
|
{lowConfCells.length > 0 && (
|
||||||
|
<span className="text-xs bg-red-100 text-red-700 px-2 py-0.5 rounded-full">
|
||||||
|
{lowConfCells.length} niedrige Konfidenz
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
{!currentSession.has_ground_truth && (
|
||||||
|
<button
|
||||||
|
onClick={() => markGroundTruth(currentSession.id)}
|
||||||
|
disabled={marking}
|
||||||
|
className="px-3 py-1 bg-teal-600 text-white text-xs rounded hover:bg-teal-700 disabled:opacity-50"
|
||||||
|
>
|
||||||
|
{marking ? 'Markiere...' : 'Als Ground Truth markieren'}
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
{currentSession.has_ground_truth && (
|
||||||
|
<span className="text-xs bg-emerald-100 text-emerald-700 px-2 py-0.5 rounded-full">
|
||||||
|
Ground Truth
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
<button
|
||||||
|
onClick={() => { markGroundTruth(currentSession.id); setTimeout(goNext, 500) }}
|
||||||
|
disabled={marking}
|
||||||
|
className="px-3 py-1 bg-slate-600 text-white text-xs rounded hover:bg-slate-700 disabled:opacity-50"
|
||||||
|
>
|
||||||
|
Markieren & Weiter
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Grid Content */}
|
||||||
|
<div className="flex-1 overflow-auto">
|
||||||
|
{loadingGrid ? (
|
||||||
|
<div className="flex items-center justify-center h-full text-slate-400">
|
||||||
|
<svg className="animate-spin h-6 w-6 mr-2" fill="none" viewBox="0 0 24 24">
|
||||||
|
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||||
|
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
|
||||||
|
</svg>
|
||||||
|
Lade Grid...
|
||||||
|
</div>
|
||||||
|
) : !grid || !grid.zones ? (
|
||||||
|
<div className="text-center py-8 text-slate-400 text-sm">
|
||||||
|
Kein Grid vorhanden. Bitte zuerst die Pipeline ausfuehren.
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<div className="p-3 space-y-4">
|
||||||
|
{grid.zones.map((zone, zi) => (
|
||||||
|
<div key={zone.zone_id || zi} className="space-y-1">
|
||||||
|
{/* Zone header */}
|
||||||
|
<div className="text-xs text-slate-400 uppercase tracking-wide">
|
||||||
|
Zone {zi + 1} ({zone.zone_type})
|
||||||
|
{zone.columns?.length > 0 && (
|
||||||
|
<span className="ml-2">
|
||||||
|
{zone.columns.map(c => c.col_type.replace('column_', '')).join(' | ')}
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Group cells by row */}
|
||||||
|
{Array.from(new Set(zone.cells.map(c => c.row_index)))
|
||||||
|
.sort((a, b) => a - b)
|
||||||
|
.map(rowIdx => {
|
||||||
|
const rowCells = zone.cells
|
||||||
|
.filter(c => c.row_index === rowIdx)
|
||||||
|
.sort((a, b) => a.col_index - b.col_index)
|
||||||
|
const rowKey = `${zone.zone_id || zi}-${rowIdx}`
|
||||||
|
const isAccepted = acceptedRows.has(rowKey)
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div
|
||||||
|
key={rowKey}
|
||||||
|
className={`flex items-start gap-1 group ${isAccepted ? 'opacity-60' : ''}`}
|
||||||
|
>
|
||||||
|
{/* Quick accept button */}
|
||||||
|
<button
|
||||||
|
onClick={() => acceptRow(zone.zone_id || String(zi), rowIdx)}
|
||||||
|
className={`flex-shrink-0 w-6 h-6 rounded flex items-center justify-center mt-0.5 transition-colors ${
|
||||||
|
isAccepted
|
||||||
|
? 'bg-emerald-100 text-emerald-600'
|
||||||
|
: 'bg-slate-100 text-slate-400 hover:bg-emerald-100 hover:text-emerald-600'
|
||||||
|
}`}
|
||||||
|
title="Zeile als korrekt markieren"
|
||||||
|
>
|
||||||
|
<svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
|
||||||
|
{/* Cells */}
|
||||||
|
<div className="flex-1 flex gap-1 flex-wrap">
|
||||||
|
{rowCells.map(cell => (
|
||||||
|
<div
|
||||||
|
key={cell.cell_id}
|
||||||
|
className={`flex-1 min-w-[80px] px-2 py-1 rounded text-sm border cursor-pointer transition-colors
|
||||||
|
${confidenceColor(cell.confidence)}
|
||||||
|
${confidenceBorder(cell.confidence)}
|
||||||
|
${editingCell === cell.cell_id ? 'ring-2 ring-teal-400' : 'hover:border-teal-300'}
|
||||||
|
${cell.is_bold ? 'font-bold' : ''}
|
||||||
|
`}
|
||||||
|
onClick={() => !isAccepted && startEdit(cell)}
|
||||||
|
title={`Konfidenz: ${cell.confidence ?? '?'}% | ${cell.col_type}`}
|
||||||
|
>
|
||||||
|
{editingCell === cell.cell_id ? (
|
||||||
|
<input
|
||||||
|
autoFocus
|
||||||
|
value={editText}
|
||||||
|
onChange={e => setEditText(e.target.value)}
|
||||||
|
onBlur={saveEdit}
|
||||||
|
onKeyDown={e => {
|
||||||
|
if (e.key === 'Enter') saveEdit()
|
||||||
|
if (e.key === 'Escape') setEditingCell(null)
|
||||||
|
}}
|
||||||
|
className="w-full bg-transparent outline-none text-sm"
|
||||||
|
/>
|
||||||
|
) : (
|
||||||
|
<span className={cell.text ? '' : 'text-slate-300 italic'}>
|
||||||
|
{cell.text || '(leer)'}
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Session List (collapsed) */}
|
||||||
|
{filteredSessions.length > 1 && (
|
||||||
|
<details className="bg-white rounded-lg border border-slate-200">
|
||||||
|
<summary className="px-4 py-3 cursor-pointer text-sm font-medium text-slate-700 hover:bg-slate-50">
|
||||||
|
Session-Liste ({filteredSessions.length})
|
||||||
|
</summary>
|
||||||
|
<div className="border-t border-slate-100 max-h-60 overflow-y-auto">
|
||||||
|
{filteredSessions.map((s, idx) => (
|
||||||
|
<div
|
||||||
|
key={s.id}
|
||||||
|
className={`flex items-center gap-3 px-4 py-2 text-sm cursor-pointer hover:bg-slate-50 border-b border-slate-50 ${
|
||||||
|
idx === currentIdx ? 'bg-teal-50' : ''
|
||||||
|
}`}
|
||||||
|
onClick={() => setCurrentIdx(idx)}
|
||||||
|
>
|
||||||
|
<input
|
||||||
|
type="checkbox"
|
||||||
|
checked={selectedSessions.has(s.id)}
|
||||||
|
onChange={e => {
|
||||||
|
e.stopPropagation()
|
||||||
|
setSelectedSessions(prev => {
|
||||||
|
const next = new Set(prev)
|
||||||
|
if (next.has(s.id)) next.delete(s.id)
|
||||||
|
else next.add(s.id)
|
||||||
|
return next
|
||||||
|
})
|
||||||
|
}}
|
||||||
|
className="rounded border-slate-300"
|
||||||
|
/>
|
||||||
|
<span className={`w-2 h-2 rounded-full flex-shrink-0 ${s.has_ground_truth ? 'bg-emerald-400' : 'bg-slate-300'}`} />
|
||||||
|
<span className="truncate flex-1">{s.name || s.filename || s.id}</span>
|
||||||
|
{s.document_category && (
|
||||||
|
<span className="text-xs bg-slate-100 px-1.5 py-0.5 rounded text-slate-500">{s.document_category}</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</details>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</AIToolsSidebarResponsive>
|
||||||
|
)
|
||||||
|
}
|
||||||
391
admin-lehrer/app/(admin)/ai/ocr-regression/page.tsx
Normal file
391
admin-lehrer/app/(admin)/ai/ocr-regression/page.tsx
Normal file
@@ -0,0 +1,391 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* OCR Regression Dashboard
|
||||||
|
*
|
||||||
|
* Shows all ground-truth sessions, runs regression tests,
|
||||||
|
* displays pass/fail results with diff details, and shows history.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { useState, useEffect, useCallback } from 'react'
|
||||||
|
import { PagePurpose } from '@/components/common/PagePurpose'
|
||||||
|
import { AIToolsSidebarResponsive } from '@/components/ai/AIToolsSidebar'
|
||||||
|
|
||||||
|
const KLAUSUR_API = '/klausur-api'
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Types
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
interface GTSession {
|
||||||
|
session_id: string
|
||||||
|
name: string
|
||||||
|
filename: string
|
||||||
|
document_category: string | null
|
||||||
|
pipeline: string | null
|
||||||
|
saved_at: string | null
|
||||||
|
summary: {
|
||||||
|
total_zones: number
|
||||||
|
total_columns: number
|
||||||
|
total_rows: number
|
||||||
|
total_cells: number
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
interface DiffSummary {
|
||||||
|
structural_changes: number
|
||||||
|
cells_missing: number
|
||||||
|
cells_added: number
|
||||||
|
text_changes: number
|
||||||
|
col_type_changes: number
|
||||||
|
}
|
||||||
|
|
||||||
|
interface RegressionResult {
|
||||||
|
session_id: string
|
||||||
|
name: string
|
||||||
|
status: 'pass' | 'fail' | 'error'
|
||||||
|
error?: string
|
||||||
|
diff_summary?: DiffSummary
|
||||||
|
reference_summary?: Record<string, number>
|
||||||
|
current_summary?: Record<string, number>
|
||||||
|
structural_diffs?: Array<{ field: string; reference: number; current: number }>
|
||||||
|
cell_diffs?: Array<{ type: string; cell_id: string; reference?: string; current?: string }>
|
||||||
|
}
|
||||||
|
|
||||||
|
interface RegressionRun {
|
||||||
|
id: string
|
||||||
|
run_at: string
|
||||||
|
status: string
|
||||||
|
total: number
|
||||||
|
passed: number
|
||||||
|
failed: number
|
||||||
|
errors: number
|
||||||
|
duration_ms: number
|
||||||
|
triggered_by: string
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Helpers
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
function StatusBadge({ status }: { status: string }) {
|
||||||
|
const cls =
|
||||||
|
status === 'pass'
|
||||||
|
? 'bg-emerald-100 text-emerald-800 border-emerald-200'
|
||||||
|
: status === 'fail'
|
||||||
|
? 'bg-red-100 text-red-800 border-red-200'
|
||||||
|
: 'bg-amber-100 text-amber-800 border-amber-200'
|
||||||
|
return (
|
||||||
|
<span className={`inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium border ${cls}`}>
|
||||||
|
{status === 'pass' ? 'Pass' : status === 'fail' ? 'Fail' : 'Error'}
|
||||||
|
</span>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatDate(iso: string | null) {
|
||||||
|
if (!iso) return '—'
|
||||||
|
return new Date(iso).toLocaleString('de-DE', {
|
||||||
|
day: '2-digit', month: '2-digit', year: 'numeric',
|
||||||
|
hour: '2-digit', minute: '2-digit',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Component
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
export default function OCRRegressionPage() {
|
||||||
|
const [sessions, setSessions] = useState<GTSession[]>([])
|
||||||
|
const [results, setResults] = useState<RegressionResult[]>([])
|
||||||
|
const [history, setHistory] = useState<RegressionRun[]>([])
|
||||||
|
const [running, setRunning] = useState(false)
|
||||||
|
const [overallStatus, setOverallStatus] = useState<string | null>(null)
|
||||||
|
const [durationMs, setDurationMs] = useState<number | null>(null)
|
||||||
|
const [expandedSession, setExpandedSession] = useState<string | null>(null)
|
||||||
|
const [tab, setTab] = useState<'current' | 'history'>('current')
|
||||||
|
|
||||||
|
// Load ground-truth sessions
|
||||||
|
const loadSessions = useCallback(async () => {
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/ground-truth-sessions`)
|
||||||
|
if (res.ok) {
|
||||||
|
const data = await res.json()
|
||||||
|
setSessions(data.sessions || [])
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to load GT sessions:', e)
|
||||||
|
}
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
// Load history
|
||||||
|
const loadHistory = useCallback(async () => {
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/regression/history?limit=20`)
|
||||||
|
if (res.ok) {
|
||||||
|
const data = await res.json()
|
||||||
|
setHistory(data.runs || [])
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to load history:', e)
|
||||||
|
}
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
loadSessions()
|
||||||
|
loadHistory()
|
||||||
|
}, [loadSessions, loadHistory])
|
||||||
|
|
||||||
|
// Run all regressions
|
||||||
|
const runAll = async () => {
|
||||||
|
setRunning(true)
|
||||||
|
setResults([])
|
||||||
|
setOverallStatus(null)
|
||||||
|
setDurationMs(null)
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/regression/run?triggered_by=manual`, {
|
||||||
|
method: 'POST',
|
||||||
|
})
|
||||||
|
if (res.ok) {
|
||||||
|
const data = await res.json()
|
||||||
|
setResults(data.results || [])
|
||||||
|
setOverallStatus(data.status)
|
||||||
|
setDurationMs(data.duration_ms)
|
||||||
|
loadHistory()
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Regression run failed:', e)
|
||||||
|
setOverallStatus('error')
|
||||||
|
} finally {
|
||||||
|
setRunning(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const totalPass = results.filter(r => r.status === 'pass').length
|
||||||
|
const totalFail = results.filter(r => r.status === 'fail').length
|
||||||
|
const totalError = results.filter(r => r.status === 'error').length
|
||||||
|
|
||||||
|
return (
|
||||||
|
<AIToolsSidebarResponsive>
|
||||||
|
<div className="max-w-7xl mx-auto p-6 space-y-6">
|
||||||
|
<PagePurpose moduleId="ocr-regression" />
|
||||||
|
|
||||||
|
{/* Header + Run Button */}
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<div>
|
||||||
|
<h1 className="text-2xl font-bold text-slate-900">OCR Regression Tests</h1>
|
||||||
|
<p className="text-sm text-slate-500 mt-1">
|
||||||
|
{sessions.length} Ground-Truth Session{sessions.length !== 1 ? 's' : ''}
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<button
|
||||||
|
onClick={runAll}
|
||||||
|
disabled={running || sessions.length === 0}
|
||||||
|
className="inline-flex items-center gap-2 px-4 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 disabled:opacity-50 disabled:cursor-not-allowed font-medium transition-colors"
|
||||||
|
>
|
||||||
|
{running ? (
|
||||||
|
<>
|
||||||
|
<svg className="animate-spin h-4 w-4" fill="none" viewBox="0 0 24 24">
|
||||||
|
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||||
|
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
|
||||||
|
</svg>
|
||||||
|
Laeuft...
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
'Alle Tests starten'
|
||||||
|
)}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Overall Result Banner */}
|
||||||
|
{overallStatus && (
|
||||||
|
<div className={`rounded-lg p-4 border ${
|
||||||
|
overallStatus === 'pass'
|
||||||
|
? 'bg-emerald-50 border-emerald-200'
|
||||||
|
: 'bg-red-50 border-red-200'
|
||||||
|
}`}>
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<div className="flex items-center gap-3">
|
||||||
|
<StatusBadge status={overallStatus} />
|
||||||
|
<span className="font-medium text-slate-900">
|
||||||
|
{totalPass} bestanden, {totalFail} fehlgeschlagen, {totalError} Fehler
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
{durationMs !== null && (
|
||||||
|
<span className="text-sm text-slate-500">{(durationMs / 1000).toFixed(1)}s</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Tabs */}
|
||||||
|
<div className="border-b border-slate-200">
|
||||||
|
<nav className="flex gap-4">
|
||||||
|
{(['current', 'history'] as const).map(t => (
|
||||||
|
<button
|
||||||
|
key={t}
|
||||||
|
onClick={() => setTab(t)}
|
||||||
|
className={`pb-3 px-1 text-sm font-medium border-b-2 transition-colors ${
|
||||||
|
tab === t
|
||||||
|
? 'border-teal-500 text-teal-600'
|
||||||
|
: 'border-transparent text-slate-500 hover:text-slate-700'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
{t === 'current' ? 'Aktuelle Ergebnisse' : 'Verlauf'}
|
||||||
|
</button>
|
||||||
|
))}
|
||||||
|
</nav>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Current Results Tab */}
|
||||||
|
{tab === 'current' && (
|
||||||
|
<div className="space-y-3">
|
||||||
|
{results.length === 0 && !running && (
|
||||||
|
<div className="text-center py-12 text-slate-400">
|
||||||
|
<p className="text-lg">Keine Ergebnisse</p>
|
||||||
|
<p className="text-sm mt-1">Klicken Sie "Alle Tests starten" um die Regression zu laufen.</p>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{results.map(r => (
|
||||||
|
<div
|
||||||
|
key={r.session_id}
|
||||||
|
className="bg-white rounded-lg border border-slate-200 overflow-hidden"
|
||||||
|
>
|
||||||
|
<div
|
||||||
|
className="flex items-center justify-between px-4 py-3 cursor-pointer hover:bg-slate-50 transition-colors"
|
||||||
|
onClick={() => setExpandedSession(expandedSession === r.session_id ? null : r.session_id)}
|
||||||
|
>
|
||||||
|
<div className="flex items-center gap-3 min-w-0">
|
||||||
|
<StatusBadge status={r.status} />
|
||||||
|
<span className="font-medium text-slate-900 truncate">{r.name || r.session_id}</span>
|
||||||
|
</div>
|
||||||
|
<div className="flex items-center gap-4 text-sm text-slate-500">
|
||||||
|
{r.diff_summary && (
|
||||||
|
<span>
|
||||||
|
{r.diff_summary.text_changes} Text, {r.diff_summary.structural_changes} Struktur
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
{r.error && <span className="text-red-500">{r.error}</span>}
|
||||||
|
<svg className={`w-4 h-4 transition-transform ${expandedSession === r.session_id ? 'rotate-180' : ''}`} fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 9l-7 7-7-7" />
|
||||||
|
</svg>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Expanded Details */}
|
||||||
|
{expandedSession === r.session_id && r.status === 'fail' && (
|
||||||
|
<div className="border-t border-slate-100 px-4 py-3 bg-slate-50 space-y-3">
|
||||||
|
{/* Structural Diffs */}
|
||||||
|
{r.structural_diffs && r.structural_diffs.length > 0 && (
|
||||||
|
<div>
|
||||||
|
<h4 className="text-xs font-medium text-slate-500 uppercase mb-1">Strukturelle Aenderungen</h4>
|
||||||
|
<div className="space-y-1">
|
||||||
|
{r.structural_diffs.map((d, i) => (
|
||||||
|
<div key={i} className="text-sm">
|
||||||
|
<span className="font-mono text-slate-600">{d.field}</span>: {d.reference} → {d.current}
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{/* Cell Diffs */}
|
||||||
|
{r.cell_diffs && r.cell_diffs.length > 0 && (
|
||||||
|
<div>
|
||||||
|
<h4 className="text-xs font-medium text-slate-500 uppercase mb-1">
|
||||||
|
Zellen-Aenderungen ({r.cell_diffs.length})
|
||||||
|
</h4>
|
||||||
|
<div className="max-h-60 overflow-y-auto space-y-1">
|
||||||
|
{r.cell_diffs.slice(0, 50).map((d, i) => (
|
||||||
|
<div key={i} className="text-sm font-mono bg-white rounded px-2 py-1 border border-slate-100">
|
||||||
|
<span className={`text-xs px-1 rounded ${
|
||||||
|
d.type === 'text_change' ? 'bg-amber-100 text-amber-700'
|
||||||
|
: d.type === 'cell_missing' ? 'bg-red-100 text-red-700'
|
||||||
|
: 'bg-blue-100 text-blue-700'
|
||||||
|
}`}>
|
||||||
|
{d.type}
|
||||||
|
</span>{' '}
|
||||||
|
<span className="text-slate-500">{d.cell_id}</span>
|
||||||
|
{d.reference && (
|
||||||
|
<>
|
||||||
|
{' '}<span className="line-through text-red-400">{d.reference}</span>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
{d.current && (
|
||||||
|
<>
|
||||||
|
{' '}<span className="text-emerald-600">{d.current}</span>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
{r.cell_diffs.length > 50 && (
|
||||||
|
<p className="text-xs text-slate-400">... und {r.cell_diffs.length - 50} weitere</p>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
|
||||||
|
{/* Ground Truth Sessions Overview (when no results yet) */}
|
||||||
|
{results.length === 0 && sessions.length > 0 && (
|
||||||
|
<div>
|
||||||
|
<h3 className="text-sm font-medium text-slate-700 mb-2">Ground-Truth Sessions</h3>
|
||||||
|
<div className="grid gap-2">
|
||||||
|
{sessions.map(s => (
|
||||||
|
<div key={s.session_id} className="bg-white rounded-lg border border-slate-200 px-4 py-3 flex items-center justify-between">
|
||||||
|
<div>
|
||||||
|
<span className="font-medium text-slate-900">{s.name || s.session_id}</span>
|
||||||
|
<span className="text-sm text-slate-400 ml-2">{s.filename}</span>
|
||||||
|
</div>
|
||||||
|
<div className="text-sm text-slate-500">
|
||||||
|
{s.summary.total_cells} Zellen, {s.summary.total_zones} Zonen
|
||||||
|
{s.pipeline && <span className="ml-2 text-xs bg-slate-100 px-1.5 py-0.5 rounded">{s.pipeline}</span>}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* History Tab */}
|
||||||
|
{tab === 'history' && (
|
||||||
|
<div className="space-y-2">
|
||||||
|
{history.length === 0 ? (
|
||||||
|
<p className="text-center py-8 text-slate-400">Noch keine Laeufe aufgezeichnet.</p>
|
||||||
|
) : (
|
||||||
|
<table className="w-full text-sm">
|
||||||
|
<thead>
|
||||||
|
<tr className="border-b border-slate-200 text-left text-slate-500">
|
||||||
|
<th className="pb-2 font-medium">Datum</th>
|
||||||
|
<th className="pb-2 font-medium">Status</th>
|
||||||
|
<th className="pb-2 font-medium text-right">Gesamt</th>
|
||||||
|
<th className="pb-2 font-medium text-right">Pass</th>
|
||||||
|
<th className="pb-2 font-medium text-right">Fail</th>
|
||||||
|
<th className="pb-2 font-medium text-right">Dauer</th>
|
||||||
|
<th className="pb-2 font-medium">Trigger</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{history.map(run => (
|
||||||
|
<tr key={run.id} className="border-b border-slate-100 hover:bg-slate-50">
|
||||||
|
<td className="py-2">{formatDate(run.run_at)}</td>
|
||||||
|
<td className="py-2"><StatusBadge status={run.status} /></td>
|
||||||
|
<td className="py-2 text-right">{run.total}</td>
|
||||||
|
<td className="py-2 text-right text-emerald-600">{run.passed}</td>
|
||||||
|
<td className="py-2 text-right text-red-600">{run.failed + run.errors}</td>
|
||||||
|
<td className="py-2 text-right text-slate-500">{(run.duration_ms / 1000).toFixed(1)}s</td>
|
||||||
|
<td className="py-2 text-slate-400">{run.triggered_by}</td>
|
||||||
|
</tr>
|
||||||
|
))}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</AIToolsSidebarResponsive>
|
||||||
|
)
|
||||||
|
}
|
||||||
@@ -182,6 +182,24 @@ export const navigation: NavCategory[] = [
|
|||||||
// -----------------------------------------------------------------------
|
// -----------------------------------------------------------------------
|
||||||
// KI-Anwendungen: Endnutzer-orientierte KI-Module
|
// KI-Anwendungen: Endnutzer-orientierte KI-Module
|
||||||
// -----------------------------------------------------------------------
|
// -----------------------------------------------------------------------
|
||||||
|
{
|
||||||
|
id: 'ocr-regression',
|
||||||
|
name: 'OCR Regression',
|
||||||
|
href: '/ai/ocr-regression',
|
||||||
|
description: 'Regressions-Tests & Ground Truth',
|
||||||
|
purpose: 'Regressions-Tests fuer die OCR-Pipeline ausfuehren. Zeigt Pass/Fail pro Ground-Truth Session, Diff-Details und Verlauf vergangener Laeufe.',
|
||||||
|
audience: ['Entwickler', 'QA'],
|
||||||
|
subgroup: 'KI-Werkzeuge',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'ocr-ground-truth',
|
||||||
|
name: 'Ground Truth Review',
|
||||||
|
href: '/ai/ocr-ground-truth',
|
||||||
|
description: 'Ground Truth pruefen & markieren',
|
||||||
|
purpose: 'Effiziente Massenpruefung von OCR-Sessions. Split-View mit Confidence-Highlighting, Quick-Accept und Batch-Markierung als Ground Truth.',
|
||||||
|
audience: ['Entwickler', 'QA'],
|
||||||
|
subgroup: 'KI-Werkzeuge',
|
||||||
|
},
|
||||||
{
|
{
|
||||||
id: 'agents',
|
id: 'agents',
|
||||||
name: 'Agent Management',
|
name: 'Agent Management',
|
||||||
|
|||||||
166
docs-src/development/regression-testing.md
Normal file
166
docs-src/development/regression-testing.md
Normal file
@@ -0,0 +1,166 @@
|
|||||||
|
# OCR Pipeline Regression Testing
|
||||||
|
|
||||||
|
**Stand:** 2026-03-23
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Uebersicht
|
||||||
|
|
||||||
|
Das Regression Framework stellt sicher, dass Aenderungen an der OCR-Pipeline keine bestehenden
|
||||||
|
Ergebnisse verschlechtern. Ground-Truth Sessions dienen als Referenz — nach jeder Code-Aenderung
|
||||||
|
wird die Pipeline neu ausgefuehrt und das Ergebnis mit der Referenz verglichen.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Ground Truth markieren
|
||||||
|
|
||||||
|
### Via Admin-UI (empfohlen)
|
||||||
|
|
||||||
|
1. Oeffne die OCR Pipeline: [/ai/ocr-pipeline](https://macmini:3002/ai/ocr-pipeline)
|
||||||
|
2. Lade eine Session und fuehre alle Pipeline-Schritte aus
|
||||||
|
3. Pruefe das Ergebnis im Grid Editor (Schritt 10)
|
||||||
|
4. Korrigiere Fehler falls noetig (Inline-Edit)
|
||||||
|
5. Klicke **"Als Ground Truth markieren"**
|
||||||
|
|
||||||
|
### Via API
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Bestehende Session als Ground Truth markieren
|
||||||
|
curl -X POST "http://macmini:8086/api/v1/ocr-pipeline/sessions/{session_id}/mark-ground-truth"
|
||||||
|
|
||||||
|
# Ground Truth entfernen
|
||||||
|
curl -X DELETE "http://macmini:8086/api/v1/ocr-pipeline/sessions/{session_id}/mark-ground-truth"
|
||||||
|
|
||||||
|
# Alle Ground-Truth Sessions auflisten
|
||||||
|
curl "http://macmini:8086/api/v1/ocr-pipeline/ground-truth-sessions"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Via Ground-Truth Review UI
|
||||||
|
|
||||||
|
Fuer die Massenpruefung von 50-100 Sessions:
|
||||||
|
|
||||||
|
1. Oeffne [/ai/ocr-ground-truth](https://macmini:3002/ai/ocr-ground-truth)
|
||||||
|
2. Filter auf "Offen" (ungeprueft)
|
||||||
|
3. Split-View: Bild links, Grid rechts pruefen
|
||||||
|
4. Korrekte Zeilen mit Haekchen bestaetigen
|
||||||
|
5. Fehler inline korrigieren
|
||||||
|
6. "Markieren & Weiter" fuer naechste Session
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Regression ausfuehren
|
||||||
|
|
||||||
|
### Via Shell-Script (CI/CD)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Standard: macmini:8086
|
||||||
|
./scripts/run-regression.sh
|
||||||
|
|
||||||
|
# Custom URL
|
||||||
|
./scripts/run-regression.sh http://localhost:8086
|
||||||
|
|
||||||
|
# Exit-Codes:
|
||||||
|
# 0 = alle bestanden
|
||||||
|
# 1 = Fehler gefunden
|
||||||
|
# 2 = Verbindungsfehler
|
||||||
|
```
|
||||||
|
|
||||||
|
### Via Admin-UI
|
||||||
|
|
||||||
|
1. Oeffne [/ai/ocr-regression](https://macmini:3002/ai/ocr-regression)
|
||||||
|
2. Klicke **"Alle Tests starten"**
|
||||||
|
3. Ergebnis: Pass/Fail pro Session mit Diff-Details
|
||||||
|
|
||||||
|
### Via API
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Alle Ground-Truth Sessions testen
|
||||||
|
curl -X POST "http://macmini:8086/api/v1/ocr-pipeline/regression/run?triggered_by=script"
|
||||||
|
|
||||||
|
# Einzelne Session testen
|
||||||
|
curl -X POST "http://macmini:8086/api/v1/ocr-pipeline/sessions/{session_id}/regression/run"
|
||||||
|
|
||||||
|
# Verlauf abrufen
|
||||||
|
curl "http://macmini:8086/api/v1/ocr-pipeline/regression/history?limit=20"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Ergebnisse lesen
|
||||||
|
|
||||||
|
### Diff-Typen
|
||||||
|
|
||||||
|
| Typ | Beschreibung |
|
||||||
|
|-----|-------------|
|
||||||
|
| `structural_changes` | Anzahl Zonen, Spalten oder Zeilen hat sich geaendert |
|
||||||
|
| `text_change` | Text einer Zelle hat sich geaendert |
|
||||||
|
| `cell_missing` | Zelle war in der Referenz, fehlt jetzt |
|
||||||
|
| `cell_added` | Neue Zelle die in der Referenz nicht existierte |
|
||||||
|
| `col_type_change` | Spaltentyp einer Zelle hat sich geaendert |
|
||||||
|
|
||||||
|
### Status-Bewertung
|
||||||
|
|
||||||
|
- **pass**: Keine Diffs → Code-Aenderung hat keine Auswirkung
|
||||||
|
- **fail**: Diffs gefunden → pruefen ob gewollt (Feature) oder ungewollt (Regression)
|
||||||
|
- **error**: Pipeline-Fehler → Build oder Config-Problem
|
||||||
|
|
||||||
|
### Verlauf
|
||||||
|
|
||||||
|
Alle Laeufe werden in der Tabelle `regression_runs` persistiert:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT id, run_at, status, total, passed, failed, errors, duration_ms, triggered_by
|
||||||
|
FROM regression_runs
|
||||||
|
ORDER BY run_at DESC
|
||||||
|
LIMIT 10;
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Best Practices
|
||||||
|
|
||||||
|
### Ground-Truth Sessions waehlen
|
||||||
|
|
||||||
|
Decke verschiedene Seitentypen ab:
|
||||||
|
|
||||||
|
- Woerterbuchseiten (2-3 Spalten, IPA-Klammern)
|
||||||
|
- Uebungsseiten (Tabellen, Checkboxen)
|
||||||
|
- Seiten mit Illustrationen
|
||||||
|
- Seiten ohne IPA (reines Deutsch-Vokabular)
|
||||||
|
- Verschiedene Verlage und Layouts
|
||||||
|
|
||||||
|
### Workflow vor jedem Commit
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Regression laufen lassen
|
||||||
|
./scripts/run-regression.sh
|
||||||
|
|
||||||
|
# 2. Bei Failure: Diff pruefen
|
||||||
|
# - Gewollte Aenderung? → Ground Truth aktualisieren
|
||||||
|
# - Ungewollte Regression? → Code fixen
|
||||||
|
|
||||||
|
# 3. Bei Pass: Commit
|
||||||
|
git add . && git commit -m "fix: ..."
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Datenbank-Schema
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE regression_runs (
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
run_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
||||||
|
status VARCHAR(20) NOT NULL, -- pass, fail, error
|
||||||
|
total INT NOT NULL DEFAULT 0,
|
||||||
|
passed INT NOT NULL DEFAULT 0,
|
||||||
|
failed INT NOT NULL DEFAULT 0,
|
||||||
|
errors INT NOT NULL DEFAULT 0,
|
||||||
|
duration_ms INT,
|
||||||
|
results JSONB NOT NULL DEFAULT '[]', -- Detail-Ergebnisse pro Session
|
||||||
|
triggered_by VARCHAR(50) DEFAULT 'manual'
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
Ground-Truth Referenzen werden im `ground_truth` JSONB-Feld der
|
||||||
|
`ocr_pipeline_sessions` Tabelle gespeichert.
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
# OCR Pipeline - Schrittweise Seitenrekonstruktion
|
# OCR Pipeline - Schrittweise Seitenrekonstruktion
|
||||||
|
|
||||||
**Version:** 4.7.0
|
**Version:** 5.0.0
|
||||||
**Status:** Produktiv (Schritte 1–10 + Grid Editor implementiert)
|
**Status:** Produktiv (Schritte 1–10 + Grid Editor + Regression Framework)
|
||||||
**URL:** https://macmini:3002/ai/ocr-pipeline
|
**URL:** https://macmini:3002/ai/ocr-pipeline
|
||||||
|
|
||||||
## Uebersicht
|
## Uebersicht
|
||||||
@@ -1197,6 +1197,62 @@ des Headwords der vorherigen Zeile). Diese werden von PaddleOCR als garbled Text
|
|||||||
4. Schlaegt IPA im Britfone-Woerterbuch nach
|
4. Schlaegt IPA im Britfone-Woerterbuch nach
|
||||||
5. Beruecksichtigt alle Wortteile (z.B. "close sth. down" → `[klˈəʊz dˈaʊn]`)
|
5. Beruecksichtigt alle Wortteile (z.B. "close sth. down" → `[klˈəʊz dˈaʊn]`)
|
||||||
|
|
||||||
|
### Compound Word IPA Decomposition (Step 5e)
|
||||||
|
|
||||||
|
Zusammengesetzte Woerter wie "schoolbag" oder "blackbird" haben oft keinen eigenen
|
||||||
|
IPA-Eintrag im Woerterbuch. Die Funktion `_decompose_compound()` zerlegt sie:
|
||||||
|
|
||||||
|
1. Probiere jede Teilungsposition (min. 3 Zeichen pro Teil)
|
||||||
|
2. Wenn beide Teile im Woerterbuch stehen → IPA verketten
|
||||||
|
3. Waehle die Teilung mit dem laengsten ersten Teil
|
||||||
|
|
||||||
|
**Beispiele:**
|
||||||
|
|
||||||
|
| Eingabe | Zerlegung | IPA |
|
||||||
|
|---------|-----------|-----|
|
||||||
|
| schoolbag | school + bag | skˈuːl + bæɡ |
|
||||||
|
| blackbird | black + bird | blæk + bˈɜːd |
|
||||||
|
| ice-cream | ice + cream | aɪs + kɹˈiːm |
|
||||||
|
|
||||||
|
### Trailing Garbled Fragment Removal (Step 5f)
|
||||||
|
|
||||||
|
Nach korrekt erkanntem IPA (z.B. `seat [sˈiːt]`) haengt OCR manchmal
|
||||||
|
eine garbled Kopie der IPA-Transkription an: `seat [sˈiːt] belt si:t belt`.
|
||||||
|
|
||||||
|
**`_strip_post_bracket_garbled()`** erkennt und entfernt diese:
|
||||||
|
|
||||||
|
1. Alles nach dem letzten `]` scannen
|
||||||
|
2. Woerter mit IPA-Markern (`:`, `ə`, `ɪ` etc.) → garbled, entfernen
|
||||||
|
3. Echte Woerter (Woerterbuch, Deutsch, Delimiter) → behalten
|
||||||
|
4. **Multi-Wort-Headword:** "belt" ist ein echtes Wort, aber wenn danach
|
||||||
|
garbled IPA kommt, wird nur "belt" behalten, der Rest entfernt
|
||||||
|
|
||||||
|
### Regression Framework (Step 5g)
|
||||||
|
|
||||||
|
Ground-Truth Sessions koennen als Referenz markiert werden. Nach jeder
|
||||||
|
Code-Aenderung vergleicht `POST /regression/run` die aktuelle Pipeline-Ausgabe
|
||||||
|
mit den gespeicherten Referenzen:
|
||||||
|
|
||||||
|
- **Strukturelle Diffs:** Zonen, Spalten, Zeilen (Anzahl-Aenderungen)
|
||||||
|
- **Zellen-Diffs:** Text-Aenderungen, fehlende/neue Zellen, col_type-Aenderungen
|
||||||
|
- **Persistenz:** Ergebnisse in `regression_runs` Tabelle fuer Trend-Analyse
|
||||||
|
- **Shell-Script:** `scripts/run-regression.sh` fuer CI-Integration
|
||||||
|
|
||||||
|
Admin-UI: [/ai/ocr-regression](https://macmini:3002/ai/ocr-regression)
|
||||||
|
|
||||||
|
### Ground Truth Review Workflow (Step 5h)
|
||||||
|
|
||||||
|
Admin-UI fuer effiziente Massenpruefung von Sessions:
|
||||||
|
|
||||||
|
- **Split-View:** Original-Bild links, erkannter Grid rechts
|
||||||
|
- **Confidence-Highlighting:** Niedrige Konfidenz rot hervorgehoben
|
||||||
|
- **Quick-Accept:** Korrekte Zeilen mit einem Klick bestaetigen
|
||||||
|
- **Inline-Edit:** Text direkt im Grid korrigieren
|
||||||
|
- **Session-Queue:** Automatisch naechste Session laden
|
||||||
|
- **Batch-Mark:** Mehrere Sessions gleichzeitig als Ground Truth markieren
|
||||||
|
|
||||||
|
Admin-UI: [/ai/ocr-ground-truth](https://macmini:3002/ai/ocr-ground-truth)
|
||||||
|
|
||||||
### `en_col_type` Erkennung
|
### `en_col_type` Erkennung
|
||||||
|
|
||||||
Die Erkennung der Englisch-Headword-Spalte nutzt **Bracket-IPA-Pattern-Count**
|
Die Erkennung der Englisch-Headword-Spalte nutzt **Bracket-IPA-Pattern-Count**
|
||||||
@@ -1536,6 +1592,7 @@ cd klausur-service/backend && pytest tests/test_paddle_kombi.py -v # 36 Tests
|
|||||||
|
|
||||||
| Datum | Version | Aenderung |
|
| Datum | Version | Aenderung |
|
||||||
|-------|---------|----------|
|
|-------|---------|----------|
|
||||||
|
| 2026-03-23 | 5.0.0 | **Phase 1 Sprint 1:** Compound-IPA-Zerlegung (`_decompose_compound`), Trailing-Garbled-Fragment-Entfernung (Multi-Wort-Headwords), Regression Framework mit DB-Persistenz + History + Shell-Script, Ground-Truth Review Workflow UI, Page-Crop Determinismus verifiziert. Admin-Seiten: `/ai/ocr-regression`, `/ai/ocr-ground-truth`. |
|
||||||
| 2026-03-20 | 4.7.0 | Grid Editor: Zone Merging ueber Bilder (`image_overlays`), Heading Detection (Farbe + Hoehe), Ghost-Filter (borderless-aware), Oversized Word Box Removal, IPA Phonetic Correction (Britfone), IPA Continuation Detection, `en_col_type` via Bracket-Count. 27 Tests. |
|
| 2026-03-20 | 4.7.0 | Grid Editor: Zone Merging ueber Bilder (`image_overlays`), Heading Detection (Farbe + Hoehe), Ghost-Filter (borderless-aware), Oversized Word Box Removal, IPA Phonetic Correction (Britfone), IPA Continuation Detection, `en_col_type` via Bracket-Count. 27 Tests. |
|
||||||
| 2026-03-16 | 4.6.0 | Strukturerkennung (Schritt 8): Region-basierte Grafikerkennung (`cv_graphic_detect.py`) mit Zwei-Pass-Verfahren (Farbregionen + schwarze Illustrationen), Wort-Ueberlappungs-Filter, Box/Zonen/Farb-Analyse. Schritt laeuft nach Worterkennung. |
|
| 2026-03-16 | 4.6.0 | Strukturerkennung (Schritt 8): Region-basierte Grafikerkennung (`cv_graphic_detect.py`) mit Zwei-Pass-Verfahren (Farbregionen + schwarze Illustrationen), Wort-Ueberlappungs-Filter, Box/Zonen/Farb-Analyse. Schritt laeuft nach Worterkennung. |
|
||||||
| 2026-03-12 | 4.5.0 | Kombi-Modus (PaddleOCR + Tesseract): Beide Engines laufen parallel, Koordinaten werden IoU-basiert gematcht und confidence-gewichtet gemittelt. Ungematchte Tesseract-Woerter (Bullets, Symbole) werden hinzugefuegt. 3er-Toggle in OCR Overlay. |
|
| 2026-03-12 | 4.5.0 | Kombi-Modus (PaddleOCR + Tesseract): Beide Engines laufen parallel, Koordinaten werden IoU-basiert gematcht und confidence-gewichtet gemittelt. Ungematchte Tesseract-Woerter (Bullets, Symbole) werden hinzugefuegt. 3er-Toggle in OCR Overlay. |
|
||||||
|
|||||||
@@ -1032,6 +1032,37 @@ def _text_has_garbled_ipa(text: str) -> bool:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _decompose_compound(word: str, pronunciation: str = 'british') -> Optional[str]:
|
||||||
|
"""Try to decompose a compound word and concatenate IPA for each part.
|
||||||
|
|
||||||
|
E.g. "schoolbag" → "school"+"bag" → IPA for both concatenated.
|
||||||
|
Only returns IPA if ALL parts are found in the dictionary.
|
||||||
|
|
||||||
|
Tries splits at every position (min 3 chars per part) and picks the
|
||||||
|
split where the first part is longest.
|
||||||
|
"""
|
||||||
|
if not IPA_AVAILABLE:
|
||||||
|
return None
|
||||||
|
lower = word.lower().strip()
|
||||||
|
if len(lower) < 6:
|
||||||
|
return None # too short for a compound
|
||||||
|
|
||||||
|
best_ipa = None
|
||||||
|
best_first_len = 0
|
||||||
|
|
||||||
|
for split_pos in range(3, len(lower) - 2): # min 3 chars each part
|
||||||
|
first = lower[:split_pos]
|
||||||
|
second = lower[split_pos:]
|
||||||
|
ipa_first = _lookup_ipa(first, pronunciation)
|
||||||
|
ipa_second = _lookup_ipa(second, pronunciation)
|
||||||
|
if ipa_first and ipa_second:
|
||||||
|
if split_pos > best_first_len:
|
||||||
|
best_first_len = split_pos
|
||||||
|
best_ipa = ipa_first + ipa_second
|
||||||
|
|
||||||
|
return best_ipa
|
||||||
|
|
||||||
|
|
||||||
def _insert_missing_ipa(text: str, pronunciation: str = 'british') -> str:
|
def _insert_missing_ipa(text: str, pronunciation: str = 'british') -> str:
|
||||||
"""Insert IPA pronunciation for English words that have no brackets at all.
|
"""Insert IPA pronunciation for English words that have no brackets at all.
|
||||||
|
|
||||||
@@ -1077,6 +1108,10 @@ def _insert_missing_ipa(text: str, pronunciation: str = 'british') -> str:
|
|||||||
# Fallback: try without hyphens (e.g. "second-hand" → "secondhand")
|
# Fallback: try without hyphens (e.g. "second-hand" → "secondhand")
|
||||||
if not ipa and '-' in clean:
|
if not ipa and '-' in clean:
|
||||||
ipa = _lookup_ipa(clean.replace('-', ''), pronunciation)
|
ipa = _lookup_ipa(clean.replace('-', ''), pronunciation)
|
||||||
|
# Fallback 0b: compound word decomposition
|
||||||
|
# E.g. "schoolbag" → "school"+"bag" → concatenated IPA
|
||||||
|
if not ipa:
|
||||||
|
ipa = _decompose_compound(clean, pronunciation)
|
||||||
# Fallback 1: IPA-marker split for merged tokens where OCR
|
# Fallback 1: IPA-marker split for merged tokens where OCR
|
||||||
# joined headword with its IPA (e.g. "schoolbagsku:lbæg").
|
# joined headword with its IPA (e.g. "schoolbagsku:lbæg").
|
||||||
# Find the first IPA marker character (:, æ, ɪ, etc.), walk
|
# Find the first IPA marker character (:, æ, ɪ, etc.), walk
|
||||||
@@ -1098,6 +1133,9 @@ def _insert_missing_ipa(text: str, pronunciation: str = 'british') -> str:
|
|||||||
headword = w[:split]
|
headword = w[:split]
|
||||||
ocr_ipa = w[split:]
|
ocr_ipa = w[split:]
|
||||||
hw_ipa = _lookup_ipa(headword, pronunciation)
|
hw_ipa = _lookup_ipa(headword, pronunciation)
|
||||||
|
if not hw_ipa:
|
||||||
|
# Try compound decomposition for the headword part
|
||||||
|
hw_ipa = _decompose_compound(headword, pronunciation)
|
||||||
if hw_ipa:
|
if hw_ipa:
|
||||||
words[i] = f"{headword} [{hw_ipa}]"
|
words[i] = f"{headword} [{hw_ipa}]"
|
||||||
else:
|
else:
|
||||||
@@ -1197,6 +1235,12 @@ def _strip_post_bracket_garbled(
|
|||||||
|
|
||||||
E.g. ``sea [sˈiː] si:`` → ``sea [sˈiː]``
|
E.g. ``sea [sˈiː] si:`` → ``sea [sˈiː]``
|
||||||
``seat [sˈiːt] si:t`` → ``seat [sˈiːt]``
|
``seat [sˈiːt] si:t`` → ``seat [sˈiːt]``
|
||||||
|
``seat [sˈiːt] belt si:t belt`` → ``seat [sˈiːt] belt``
|
||||||
|
|
||||||
|
For multi-word headwords like "seat belt", a real English word ("belt")
|
||||||
|
may be followed by garbled IPA duplicates. We detect this by checking
|
||||||
|
whether the sequence after a real word contains IPA markers (`:`, `ə`,
|
||||||
|
etc.) — if so, everything from the first garbled token onward is stripped.
|
||||||
"""
|
"""
|
||||||
if ']' not in text:
|
if ']' not in text:
|
||||||
return text
|
return text
|
||||||
@@ -1207,6 +1251,8 @@ def _strip_post_bracket_garbled(
|
|||||||
after = text[last_bracket + 1:].strip()
|
after = text[last_bracket + 1:].strip()
|
||||||
if not after:
|
if not after:
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
_IPA_MARKER_CHARS = set(':əɪɛɒʊʌæɑɔʃʒθðŋˈˌ')
|
||||||
after_words = after.split()
|
after_words = after.split()
|
||||||
kept: List[str] = []
|
kept: List[str] = []
|
||||||
for idx, w in enumerate(after_words):
|
for idx, w in enumerate(after_words):
|
||||||
@@ -1215,17 +1261,42 @@ def _strip_post_bracket_garbled(
|
|||||||
kept.extend(after_words[idx:])
|
kept.extend(after_words[idx:])
|
||||||
break
|
break
|
||||||
# Contains IPA markers (length mark, IPA chars) — garbled, skip
|
# Contains IPA markers (length mark, IPA chars) — garbled, skip
|
||||||
if ':' in w or any(c in w for c in 'əɪɛɒʊʌæɑɔʃʒθðŋˈˌ'):
|
if any(c in w for c in _IPA_MARKER_CHARS):
|
||||||
|
# Everything from here is garbled IPA — stop scanning
|
||||||
|
# but look ahead: if any remaining words are real English
|
||||||
|
# words WITHOUT IPA markers, they might be a different headword
|
||||||
|
# following. Only skip the contiguous garbled run.
|
||||||
continue
|
continue
|
||||||
clean = re.sub(r'[^a-zA-Z]', '', w)
|
clean = re.sub(r'[^a-zA-Z]', '', w)
|
||||||
# Uppercase — likely German, keep rest
|
# Uppercase — likely German, keep rest
|
||||||
if clean and clean[0].isupper():
|
if clean and clean[0].isupper():
|
||||||
kept.extend(after_words[idx:])
|
kept.extend(after_words[idx:])
|
||||||
break
|
break
|
||||||
# Known English word — keep rest
|
# Known English word — keep it, but check if followed by garbled IPA
|
||||||
|
# (multi-word headword case like "seat [siːt] belt si:t belt")
|
||||||
if clean and len(clean) >= 2 and _lookup_ipa(clean, pronunciation):
|
if clean and len(clean) >= 2 and _lookup_ipa(clean, pronunciation):
|
||||||
kept.extend(after_words[idx:])
|
# Peek ahead: if next word has IPA markers, the rest is garbled
|
||||||
break
|
remaining = after_words[idx + 1:]
|
||||||
|
has_garbled_after = any(
|
||||||
|
any(c in rw for c in _IPA_MARKER_CHARS)
|
||||||
|
for rw in remaining
|
||||||
|
)
|
||||||
|
if has_garbled_after:
|
||||||
|
# Keep this real word but stop — rest is garbled duplication
|
||||||
|
kept.append(w)
|
||||||
|
# Still scan for delimiters/German in the remaining words
|
||||||
|
for ridx, rw in enumerate(remaining):
|
||||||
|
if rw in ('–', '—', '-', '/', '|', ',', ';'):
|
||||||
|
kept.extend(remaining[ridx:])
|
||||||
|
break
|
||||||
|
rclean = re.sub(r'[^a-zA-Z]', '', rw)
|
||||||
|
if rclean and rclean[0].isupper():
|
||||||
|
kept.extend(remaining[ridx:])
|
||||||
|
break
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
kept.extend(after_words[idx:])
|
||||||
|
break
|
||||||
# Unknown short word — likely garbled, skip
|
# Unknown short word — likely garbled, skip
|
||||||
if kept:
|
if kept:
|
||||||
return before + ' ' + ' '.join(kept)
|
return before + ' ' + ' '.join(kept)
|
||||||
|
|||||||
18
klausur-service/backend/migrations/008_regression_runs.sql
Normal file
18
klausur-service/backend/migrations/008_regression_runs.sql
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
-- Migration 008: Regression test run history
|
||||||
|
-- Stores results of regression test runs for trend analysis.
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS regression_runs (
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
run_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
||||||
|
status VARCHAR(20) NOT NULL, -- 'pass', 'fail', 'error'
|
||||||
|
total INT NOT NULL DEFAULT 0,
|
||||||
|
passed INT NOT NULL DEFAULT 0,
|
||||||
|
failed INT NOT NULL DEFAULT 0,
|
||||||
|
errors INT NOT NULL DEFAULT 0,
|
||||||
|
duration_ms INT,
|
||||||
|
results JSONB NOT NULL DEFAULT '[]',
|
||||||
|
triggered_by VARCHAR(50) DEFAULT 'manual' -- 'manual', 'script', 'ci'
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_regression_runs_run_at
|
||||||
|
ON regression_runs (run_at DESC);
|
||||||
@@ -8,7 +8,11 @@ Lizenz: Apache 2.0
|
|||||||
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
@@ -16,6 +20,7 @@ from fastapi import APIRouter, HTTPException, Query
|
|||||||
|
|
||||||
from grid_editor_api import _build_grid_core
|
from grid_editor_api import _build_grid_core
|
||||||
from ocr_pipeline_session_store import (
|
from ocr_pipeline_session_store import (
|
||||||
|
get_pool,
|
||||||
get_session_db,
|
get_session_db,
|
||||||
list_ground_truth_sessions_db,
|
list_ground_truth_sessions_db,
|
||||||
update_session_db,
|
update_session_db,
|
||||||
@@ -26,6 +31,60 @@ logger = logging.getLogger(__name__)
|
|||||||
router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["regression"])
|
router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["regression"])
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# DB persistence for regression runs
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def _init_regression_table():
|
||||||
|
"""Ensure regression_runs table exists (idempotent)."""
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
migration_path = os.path.join(
|
||||||
|
os.path.dirname(__file__),
|
||||||
|
"migrations/008_regression_runs.sql",
|
||||||
|
)
|
||||||
|
if os.path.exists(migration_path):
|
||||||
|
with open(migration_path, "r") as f:
|
||||||
|
sql = f.read()
|
||||||
|
await conn.execute(sql)
|
||||||
|
|
||||||
|
|
||||||
|
async def _persist_regression_run(
|
||||||
|
status: str,
|
||||||
|
summary: dict,
|
||||||
|
results: list,
|
||||||
|
duration_ms: int,
|
||||||
|
triggered_by: str = "manual",
|
||||||
|
) -> str:
|
||||||
|
"""Save a regression run to the database. Returns the run ID."""
|
||||||
|
try:
|
||||||
|
await _init_regression_table()
|
||||||
|
pool = await get_pool()
|
||||||
|
run_id = str(uuid.uuid4())
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
await conn.execute(
|
||||||
|
"""
|
||||||
|
INSERT INTO regression_runs
|
||||||
|
(id, status, total, passed, failed, errors, duration_ms, results, triggered_by)
|
||||||
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8::jsonb, $9)
|
||||||
|
""",
|
||||||
|
run_id,
|
||||||
|
status,
|
||||||
|
summary.get("total", 0),
|
||||||
|
summary.get("passed", 0),
|
||||||
|
summary.get("failed", 0),
|
||||||
|
summary.get("errors", 0),
|
||||||
|
duration_ms,
|
||||||
|
json.dumps(results),
|
||||||
|
triggered_by,
|
||||||
|
)
|
||||||
|
logger.info("Regression run %s persisted: %s", run_id, status)
|
||||||
|
return run_id
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Failed to persist regression run: %s", e)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Helpers
|
# Helpers
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -299,8 +358,11 @@ async def run_single_regression(session_id: str):
|
|||||||
|
|
||||||
|
|
||||||
@router.post("/regression/run")
|
@router.post("/regression/run")
|
||||||
async def run_all_regressions():
|
async def run_all_regressions(
|
||||||
|
triggered_by: str = Query("manual", description="Who triggered: manual, script, ci"),
|
||||||
|
):
|
||||||
"""Re-run build_grid for ALL ground-truth sessions and compare."""
|
"""Re-run build_grid for ALL ground-truth sessions and compare."""
|
||||||
|
start_time = time.monotonic()
|
||||||
sessions = await list_ground_truth_sessions_db()
|
sessions = await list_ground_truth_sessions_db()
|
||||||
|
|
||||||
if not sessions:
|
if not sessions:
|
||||||
@@ -370,19 +432,105 @@ async def run_all_regressions():
|
|||||||
results.append(entry)
|
results.append(entry)
|
||||||
|
|
||||||
overall = "pass" if failed == 0 and errors == 0 else "fail"
|
overall = "pass" if failed == 0 and errors == 0 else "fail"
|
||||||
|
duration_ms = int((time.monotonic() - start_time) * 1000)
|
||||||
|
|
||||||
|
summary = {
|
||||||
|
"total": len(results),
|
||||||
|
"passed": passed,
|
||||||
|
"failed": failed,
|
||||||
|
"errors": errors,
|
||||||
|
}
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"Regression suite: %s — %d passed, %d failed, %d errors (of %d)",
|
"Regression suite: %s — %d passed, %d failed, %d errors (of %d) in %dms",
|
||||||
overall, passed, failed, errors, len(results),
|
overall, passed, failed, errors, len(results), duration_ms,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Persist to DB
|
||||||
|
run_id = await _persist_regression_run(
|
||||||
|
status=overall,
|
||||||
|
summary=summary,
|
||||||
|
results=results,
|
||||||
|
duration_ms=duration_ms,
|
||||||
|
triggered_by=triggered_by,
|
||||||
)
|
)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"status": overall,
|
"status": overall,
|
||||||
|
"run_id": run_id,
|
||||||
|
"duration_ms": duration_ms,
|
||||||
"results": results,
|
"results": results,
|
||||||
"summary": {
|
"summary": summary,
|
||||||
"total": len(results),
|
|
||||||
"passed": passed,
|
|
||||||
"failed": failed,
|
|
||||||
"errors": errors,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/regression/history")
|
||||||
|
async def get_regression_history(
|
||||||
|
limit: int = Query(20, ge=1, le=100),
|
||||||
|
):
|
||||||
|
"""Get recent regression run history from the database."""
|
||||||
|
try:
|
||||||
|
await _init_regression_table()
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
rows = await conn.fetch(
|
||||||
|
"""
|
||||||
|
SELECT id, run_at, status, total, passed, failed, errors,
|
||||||
|
duration_ms, triggered_by
|
||||||
|
FROM regression_runs
|
||||||
|
ORDER BY run_at DESC
|
||||||
|
LIMIT $1
|
||||||
|
""",
|
||||||
|
limit,
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"runs": [
|
||||||
|
{
|
||||||
|
"id": str(row["id"]),
|
||||||
|
"run_at": row["run_at"].isoformat() if row["run_at"] else None,
|
||||||
|
"status": row["status"],
|
||||||
|
"total": row["total"],
|
||||||
|
"passed": row["passed"],
|
||||||
|
"failed": row["failed"],
|
||||||
|
"errors": row["errors"],
|
||||||
|
"duration_ms": row["duration_ms"],
|
||||||
|
"triggered_by": row["triggered_by"],
|
||||||
|
}
|
||||||
|
for row in rows
|
||||||
|
],
|
||||||
|
"count": len(rows),
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Failed to fetch regression history: %s", e)
|
||||||
|
return {"runs": [], "count": 0, "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/regression/history/{run_id}")
|
||||||
|
async def get_regression_run_detail(run_id: str):
|
||||||
|
"""Get detailed results of a specific regression run."""
|
||||||
|
try:
|
||||||
|
await _init_regression_table()
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
row = await conn.fetchrow(
|
||||||
|
"SELECT * FROM regression_runs WHERE id = $1",
|
||||||
|
run_id,
|
||||||
|
)
|
||||||
|
if not row:
|
||||||
|
raise HTTPException(status_code=404, detail="Run not found")
|
||||||
|
return {
|
||||||
|
"id": str(row["id"]),
|
||||||
|
"run_at": row["run_at"].isoformat() if row["run_at"] else None,
|
||||||
|
"status": row["status"],
|
||||||
|
"total": row["total"],
|
||||||
|
"passed": row["passed"],
|
||||||
|
"failed": row["failed"],
|
||||||
|
"errors": row["errors"],
|
||||||
|
"duration_ms": row["duration_ms"],
|
||||||
|
"triggered_by": row["triggered_by"],
|
||||||
|
"results": json.loads(row["results"]) if row["results"] else [],
|
||||||
|
}
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|||||||
@@ -57,6 +57,63 @@ class TestInsertMissingIpa:
|
|||||||
result = _insert_missing_ipa("Anstecknadel", "british")
|
result = _insert_missing_ipa("Anstecknadel", "british")
|
||||||
assert result == "Anstecknadel"
|
assert result == "Anstecknadel"
|
||||||
|
|
||||||
|
def test_compound_word_schoolbag_gets_ipa(self):
|
||||||
|
"""R07: Compound word 'schoolbag' should get decomposed IPA (school+bag)."""
|
||||||
|
from cv_ocr_engines import _insert_missing_ipa
|
||||||
|
result = _insert_missing_ipa("schoolbag", "british")
|
||||||
|
assert "[" in result and "]" in result
|
||||||
|
assert result.startswith("schoolbag [")
|
||||||
|
|
||||||
|
def test_compound_word_blackbird(self):
|
||||||
|
"""Compound word 'blackbird' should get decomposed IPA."""
|
||||||
|
from cv_ocr_engines import _insert_missing_ipa
|
||||||
|
result = _insert_missing_ipa("blackbird", "british")
|
||||||
|
assert "[" in result and "]" in result
|
||||||
|
|
||||||
|
def test_compound_word_too_short(self):
|
||||||
|
"""Words shorter than 6 chars should not attempt compound decomposition."""
|
||||||
|
from cv_ocr_engines import _decompose_compound
|
||||||
|
assert _decompose_compound("bag", "british") is None
|
||||||
|
|
||||||
|
def test_decompose_compound_direct(self):
|
||||||
|
"""Direct test of _decompose_compound for known compounds."""
|
||||||
|
from cv_ocr_engines import _decompose_compound
|
||||||
|
# schoolbag = school + bag — both should be in dictionary
|
||||||
|
result = _decompose_compound("schoolbag", "british")
|
||||||
|
assert result is not None
|
||||||
|
|
||||||
|
|
||||||
|
class TestStripPostBracketGarbled:
|
||||||
|
"""Tests for _strip_post_bracket_garbled — trailing garbled IPA removal."""
|
||||||
|
|
||||||
|
def test_simple_trailing_garbled(self):
|
||||||
|
"""R21-simple: 'sea [sˈiː] si:' → trailing IPA marker removed."""
|
||||||
|
from cv_ocr_engines import _strip_post_bracket_garbled
|
||||||
|
result = _strip_post_bracket_garbled("sea [sˈiː] si:")
|
||||||
|
assert "si:" not in result
|
||||||
|
assert result.startswith("sea [sˈiː]")
|
||||||
|
|
||||||
|
def test_multi_word_trailing_garbled(self):
|
||||||
|
"""R21: 'seat [sˈiːt] belt si:t belt' → keep 'belt', remove garbled."""
|
||||||
|
from cv_ocr_engines import _strip_post_bracket_garbled
|
||||||
|
result = _strip_post_bracket_garbled("seat [sˈiːt] belt si:t belt")
|
||||||
|
assert "belt" in result # real word kept
|
||||||
|
assert "si:t" not in result # garbled removed
|
||||||
|
# Should contain "seat [sˈiːt] belt" but not the garbled duplication
|
||||||
|
assert result.count("belt") == 1
|
||||||
|
|
||||||
|
def test_delimiter_after_bracket_kept(self):
|
||||||
|
"""Delimiters after IPA bracket are kept."""
|
||||||
|
from cv_ocr_engines import _strip_post_bracket_garbled
|
||||||
|
result = _strip_post_bracket_garbled("dance [dˈɑːns] – tanzen")
|
||||||
|
assert "– tanzen" in result
|
||||||
|
|
||||||
|
def test_german_after_bracket_kept(self):
|
||||||
|
"""German words (uppercase) after IPA bracket are kept."""
|
||||||
|
from cv_ocr_engines import _strip_post_bracket_garbled
|
||||||
|
result = _strip_post_bracket_garbled("badge [bædʒ] Abzeichen")
|
||||||
|
assert "Abzeichen" in result
|
||||||
|
|
||||||
|
|
||||||
class TestFixCellPhonetics:
|
class TestFixCellPhonetics:
|
||||||
"""Tests for fix_cell_phonetics function."""
|
"""Tests for fix_cell_phonetics function."""
|
||||||
|
|||||||
@@ -415,3 +415,53 @@ class TestDetectAndCropPage:
|
|||||||
assert 0 <= pct["y"] <= 100
|
assert 0 <= pct["y"] <= 100
|
||||||
assert 0 < pct["width"] <= 100
|
assert 0 < pct["width"] <= 100
|
||||||
assert 0 < pct["height"] <= 100
|
assert 0 < pct["height"] <= 100
|
||||||
|
|
||||||
|
|
||||||
|
class TestCropDeterminism:
|
||||||
|
"""A3: Verify that page crop produces identical results across N runs."""
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("image_factory,desc", [
|
||||||
|
(
|
||||||
|
lambda: _make_image_with_content(800, 600, (100, 700, 80, 520)),
|
||||||
|
"standard content",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
lambda: _make_book_scan(1000, 800),
|
||||||
|
"book scan with spine shadow",
|
||||||
|
),
|
||||||
|
])
|
||||||
|
def test_determinism_10_runs(self, image_factory, desc):
|
||||||
|
"""Same image must produce identical crops in 10 consecutive runs."""
|
||||||
|
img = image_factory()
|
||||||
|
results = []
|
||||||
|
for _ in range(10):
|
||||||
|
cropped, result = detect_and_crop_page(img.copy())
|
||||||
|
results.append({
|
||||||
|
"crop_applied": result["crop_applied"],
|
||||||
|
"cropped_size": result["cropped_size"],
|
||||||
|
"border_fractions": result["border_fractions"],
|
||||||
|
"shape": cropped.shape,
|
||||||
|
})
|
||||||
|
|
||||||
|
first = results[0]
|
||||||
|
for i, r in enumerate(results[1:], 1):
|
||||||
|
assert r["crop_applied"] == first["crop_applied"], (
|
||||||
|
f"Run {i} crop_applied differs from run 0 ({desc})"
|
||||||
|
)
|
||||||
|
assert r["cropped_size"] == first["cropped_size"], (
|
||||||
|
f"Run {i} cropped_size differs from run 0 ({desc})"
|
||||||
|
)
|
||||||
|
assert r["shape"] == first["shape"], (
|
||||||
|
f"Run {i} output shape differs from run 0 ({desc})"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_determinism_pixel_identical(self):
|
||||||
|
"""Crop output pixels must be identical across runs."""
|
||||||
|
img = _make_image_with_content(800, 600, (100, 700, 80, 520))
|
||||||
|
ref_crop, _ = detect_and_crop_page(img.copy())
|
||||||
|
|
||||||
|
for i in range(5):
|
||||||
|
crop, _ = detect_and_crop_page(img.copy())
|
||||||
|
assert np.array_equal(ref_crop, crop), (
|
||||||
|
f"Run {i} produced different pixel output"
|
||||||
|
)
|
||||||
|
|||||||
@@ -84,5 +84,6 @@ nav:
|
|||||||
- Zeugnis-System: architecture/zeugnis-system.md
|
- Zeugnis-System: architecture/zeugnis-system.md
|
||||||
- Entwicklung:
|
- Entwicklung:
|
||||||
- Testing: development/testing.md
|
- Testing: development/testing.md
|
||||||
|
- Regression Testing: development/regression-testing.md
|
||||||
- Dokumentation: development/documentation.md
|
- Dokumentation: development/documentation.md
|
||||||
- CI/CD Pipeline: development/ci-cd-pipeline.md
|
- CI/CD Pipeline: development/ci-cd-pipeline.md
|
||||||
|
|||||||
163
scripts/benchmark-trocr.py
Executable file
163
scripts/benchmark-trocr.py
Executable file
@@ -0,0 +1,163 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
TrOCR Baseline Benchmark — measures PyTorch TrOCR performance.
|
||||||
|
|
||||||
|
Metrics:
|
||||||
|
- RAM usage (RSS) before and after model load
|
||||||
|
- Inference time per line (min, max, mean, p50, p95)
|
||||||
|
- Model size on disk
|
||||||
|
|
||||||
|
Output: JSON report to stdout (redirect to file for Sprint 2 comparison).
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python scripts/benchmark-trocr.py [--model trocr-base-printed] [--runs 10]
|
||||||
|
python scripts/benchmark-trocr.py > benchmark-trocr-baseline.json
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# Add backend to path for imports
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'klausur-service', 'backend'))
|
||||||
|
|
||||||
|
|
||||||
|
def get_rss_mb():
|
||||||
|
"""Get current process RSS in MB."""
|
||||||
|
import resource
|
||||||
|
# resource.getrusage returns KB on Linux, bytes on macOS
|
||||||
|
usage = resource.getrusage(resource.RUSAGE_SELF)
|
||||||
|
rss = usage.ru_maxrss
|
||||||
|
if sys.platform == 'darwin':
|
||||||
|
return rss / (1024 * 1024) # bytes to MB on macOS
|
||||||
|
return rss / 1024 # KB to MB on Linux
|
||||||
|
|
||||||
|
|
||||||
|
def get_model_size_mb(model_name):
|
||||||
|
"""Estimate model size from HuggingFace cache."""
|
||||||
|
cache_dir = os.path.expanduser("~/.cache/huggingface/hub")
|
||||||
|
total = 0
|
||||||
|
model_dir_pattern = model_name.replace('/', '--')
|
||||||
|
for root, dirs, files in os.walk(cache_dir):
|
||||||
|
if model_dir_pattern in root:
|
||||||
|
for f in files:
|
||||||
|
total += os.path.getsize(os.path.join(root, f))
|
||||||
|
return total / (1024 * 1024) # bytes to MB
|
||||||
|
|
||||||
|
|
||||||
|
def benchmark_trocr(model_name: str = "microsoft/trocr-base-printed", num_runs: int = 10):
|
||||||
|
"""Run TrOCR benchmark and return results dict."""
|
||||||
|
import numpy as np
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
rss_before = get_rss_mb()
|
||||||
|
|
||||||
|
# Load model
|
||||||
|
print(f"Loading model: {model_name}", file=sys.stderr)
|
||||||
|
load_start = time.monotonic()
|
||||||
|
|
||||||
|
try:
|
||||||
|
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
||||||
|
processor = TrOCRProcessor.from_pretrained(model_name)
|
||||||
|
model = VisionEncoderDecoderModel.from_pretrained(model_name)
|
||||||
|
model.eval()
|
||||||
|
except Exception as e:
|
||||||
|
return {"error": f"Failed to load model: {e}"}
|
||||||
|
|
||||||
|
load_time = time.monotonic() - load_start
|
||||||
|
rss_after_load = get_rss_mb()
|
||||||
|
model_size = get_model_size_mb(model_name)
|
||||||
|
|
||||||
|
print(f"Model loaded in {load_time:.1f}s, RSS: {rss_after_load:.0f}MB", file=sys.stderr)
|
||||||
|
|
||||||
|
# Create synthetic test images (text line images)
|
||||||
|
test_images = []
|
||||||
|
for i in range(num_runs):
|
||||||
|
# Create a simple white image with black text-like content
|
||||||
|
# In production, these would be real cropped text lines
|
||||||
|
w, h = 384, 48 # typical TrOCR input size
|
||||||
|
img = Image.new('RGB', (w, h), 'white')
|
||||||
|
# Add some variation
|
||||||
|
pixels = img.load()
|
||||||
|
# Simple dark region to simulate text
|
||||||
|
for x in range(50 + i * 10, 200 + i * 5):
|
||||||
|
for y in range(10, 38):
|
||||||
|
pixels[x, y] = (30, 30, 30)
|
||||||
|
test_images.append(img)
|
||||||
|
|
||||||
|
# Warm-up run (not counted)
|
||||||
|
print("Warm-up...", file=sys.stderr)
|
||||||
|
import torch
|
||||||
|
with torch.no_grad():
|
||||||
|
pixel_values = processor(images=test_images[0], return_tensors="pt").pixel_values
|
||||||
|
_ = model.generate(pixel_values, max_new_tokens=50)
|
||||||
|
|
||||||
|
# Benchmark runs
|
||||||
|
print(f"Running {num_runs} inference passes...", file=sys.stderr)
|
||||||
|
times_ms = []
|
||||||
|
for i, img in enumerate(test_images):
|
||||||
|
start = time.monotonic()
|
||||||
|
with torch.no_grad():
|
||||||
|
pixel_values = processor(images=img, return_tensors="pt").pixel_values
|
||||||
|
generated_ids = model.generate(pixel_values, max_new_tokens=50)
|
||||||
|
text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
||||||
|
elapsed_ms = (time.monotonic() - start) * 1000
|
||||||
|
times_ms.append(elapsed_ms)
|
||||||
|
print(f" Run {i+1}/{num_runs}: {elapsed_ms:.0f}ms -> '{text[:30]}'", file=sys.stderr)
|
||||||
|
|
||||||
|
rss_after_inference = get_rss_mb()
|
||||||
|
|
||||||
|
# Compute stats
|
||||||
|
times_sorted = sorted(times_ms)
|
||||||
|
p50_idx = len(times_sorted) // 2
|
||||||
|
p95_idx = int(len(times_sorted) * 0.95)
|
||||||
|
|
||||||
|
report = {
|
||||||
|
"benchmark": "trocr-baseline",
|
||||||
|
"timestamp": datetime.utcnow().isoformat() + "Z",
|
||||||
|
"model": model_name,
|
||||||
|
"backend": "pytorch",
|
||||||
|
"quantization": "float32",
|
||||||
|
"num_runs": num_runs,
|
||||||
|
"model_size_mb": round(model_size, 1),
|
||||||
|
"ram_mb": {
|
||||||
|
"before_load": round(rss_before, 1),
|
||||||
|
"after_load": round(rss_after_load, 1),
|
||||||
|
"after_inference": round(rss_after_inference, 1),
|
||||||
|
"model_delta": round(rss_after_load - rss_before, 1),
|
||||||
|
},
|
||||||
|
"load_time_seconds": round(load_time, 2),
|
||||||
|
"inference_ms": {
|
||||||
|
"min": round(min(times_ms), 1),
|
||||||
|
"max": round(max(times_ms), 1),
|
||||||
|
"mean": round(sum(times_ms) / len(times_ms), 1),
|
||||||
|
"p50": round(times_sorted[p50_idx], 1),
|
||||||
|
"p95": round(times_sorted[min(p95_idx, len(times_sorted) - 1)], 1),
|
||||||
|
},
|
||||||
|
"times_ms": [round(t, 1) for t in times_ms],
|
||||||
|
"platform": {
|
||||||
|
"python": sys.version.split()[0],
|
||||||
|
"os": sys.platform,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
return report
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="TrOCR Baseline Benchmark")
|
||||||
|
parser.add_argument("--model", default="microsoft/trocr-base-printed",
|
||||||
|
help="HuggingFace model name")
|
||||||
|
parser.add_argument("--runs", type=int, default=10,
|
||||||
|
help="Number of inference runs")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
report = benchmark_trocr(model_name=args.model, num_runs=args.runs)
|
||||||
|
print(json.dumps(report, indent=2))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
61
scripts/run-regression.sh
Executable file
61
scripts/run-regression.sh
Executable file
@@ -0,0 +1,61 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Run OCR pipeline regression tests and exit non-zero on failure.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# ./scripts/run-regression.sh # default: macmini:8086
|
||||||
|
# ./scripts/run-regression.sh http://localhost:8086
|
||||||
|
#
|
||||||
|
# Exit codes:
|
||||||
|
# 0 = all pass
|
||||||
|
# 1 = failures or errors
|
||||||
|
# 2 = connection error
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
BASE_URL="${1:-http://macmini:8086}"
|
||||||
|
ENDPOINT="${BASE_URL}/api/v1/ocr-pipeline/regression/run?triggered_by=script"
|
||||||
|
|
||||||
|
echo "=== OCR Pipeline Regression Suite ==="
|
||||||
|
echo "Endpoint: ${ENDPOINT}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
RESPONSE=$(curl -sf -X POST "${ENDPOINT}" -H "Content-Type: application/json" 2>&1) || {
|
||||||
|
echo "ERROR: Could not reach ${ENDPOINT}"
|
||||||
|
exit 2
|
||||||
|
}
|
||||||
|
|
||||||
|
STATUS=$(echo "${RESPONSE}" | python3 -c "import sys,json; print(json.load(sys.stdin)['status'])")
|
||||||
|
TOTAL=$(echo "${RESPONSE}" | python3 -c "import sys,json; s=json.load(sys.stdin)['summary']; print(s['total'])")
|
||||||
|
PASSED=$(echo "${RESPONSE}" | python3 -c "import sys,json; s=json.load(sys.stdin)['summary']; print(s['passed'])")
|
||||||
|
FAILED=$(echo "${RESPONSE}" | python3 -c "import sys,json; s=json.load(sys.stdin)['summary']; print(s['failed'])")
|
||||||
|
ERRORS=$(echo "${RESPONSE}" | python3 -c "import sys,json; s=json.load(sys.stdin)['summary']; print(s['errors'])")
|
||||||
|
DURATION=$(echo "${RESPONSE}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('duration_ms', '?'))")
|
||||||
|
|
||||||
|
echo "Status: ${STATUS}"
|
||||||
|
echo "Total: ${TOTAL}"
|
||||||
|
echo "Passed: ${PASSED}"
|
||||||
|
echo "Failed: ${FAILED}"
|
||||||
|
echo "Errors: ${ERRORS}"
|
||||||
|
echo "Duration: ${DURATION}ms"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
if [ "${STATUS}" = "pass" ]; then
|
||||||
|
echo "PASS — All regression tests passed."
|
||||||
|
exit 0
|
||||||
|
else
|
||||||
|
echo "FAIL — Regression failures detected!"
|
||||||
|
# Print failure details
|
||||||
|
echo "${RESPONSE}" | python3 -c "
|
||||||
|
import sys, json
|
||||||
|
data = json.load(sys.stdin)
|
||||||
|
for r in data.get('results', []):
|
||||||
|
if r['status'] != 'pass':
|
||||||
|
print(f\" {r['status'].upper()}: {r.get('name', r['session_id'])}\")
|
||||||
|
if 'error' in r:
|
||||||
|
print(f\" Error: {r['error']}\")
|
||||||
|
ds = r.get('diff_summary', {})
|
||||||
|
if ds:
|
||||||
|
print(f\" Structural: {ds.get('structural_changes', 0)}, Text: {ds.get('text_changes', 0)}, Missing: {ds.get('cells_missing', 0)}, Added: {ds.get('cells_added', 0)}\")
|
||||||
|
"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
Reference in New Issue
Block a user