feat: Sprint 1 — IPA hardening, regression framework, ground-truth review
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Failing after 1m55s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 19s

Track A (Backend):
- Compound word IPA decomposition (schoolbag→school+bag)
- Trailing garbled IPA fragment removal after brackets (R21 fix)
- Regression runner with DB persistence, history endpoints
- Page crop determinism verified with tests

Track B (Frontend):
- OCR Regression dashboard (/ai/ocr-regression)
- Ground Truth Review workflow (/ai/ocr-ground-truth)
  with split-view, confidence highlighting, inline edit,
  batch mark, progress tracking

Track C (Docs):
- OCR-Pipeline.md v5.0 (Steps 5e-5h)
- Regression testing guide
- mkdocs.yml nav update

Track D (Infra):
- TrOCR baseline benchmark script
- run-regression.sh shell script
- Migration 008: regression_runs table

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-23 09:21:27 +01:00
parent f5d5d6c59c
commit a1e079b911
13 changed files with 1796 additions and 15 deletions

View File

@@ -0,0 +1,580 @@
'use client'
/**
* Ground-Truth Review Workflow
*
* Efficient mass-review of OCR sessions:
* - Session queue with auto-advance
* - Split-view: original image left, grid right
* - Confidence highlighting on cells
* - Quick-accept per row
* - Inline cell editing
* - Batch mark as ground truth
* - Progress tracking
*/
import { useState, useEffect, useCallback, useRef } from 'react'
import { PagePurpose } from '@/components/common/PagePurpose'
import { AIToolsSidebarResponsive } from '@/components/ai/AIToolsSidebar'
const KLAUSUR_API = '/klausur-api'
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
interface Session {
id: string
name: string
filename: string
status: string
created_at: string
document_category: string | null
has_ground_truth: boolean
}
interface GridZone {
zone_id: string
zone_type: string
columns: Array<{ col_index: number; col_type: string; header: string }>
rows: Array<{ row_index: number; is_header: boolean }>
cells: GridCell[]
}
interface GridCell {
cell_id: string
row_index: number
col_index: number
col_type: string
text: string
confidence?: number
is_bold?: boolean
}
interface GridResult {
zones: GridZone[]
summary?: {
total_zones: number
total_columns: number
total_rows: number
total_cells: number
}
}
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
function confidenceColor(conf: number | undefined): string {
if (conf === undefined) return ''
if (conf >= 80) return 'bg-emerald-50'
if (conf >= 50) return 'bg-amber-50'
return 'bg-red-50'
}
function confidenceBorder(conf: number | undefined): string {
if (conf === undefined) return 'border-slate-200'
if (conf >= 80) return 'border-emerald-200'
if (conf >= 50) return 'border-amber-300'
return 'border-red-300'
}
// ---------------------------------------------------------------------------
// Component
// ---------------------------------------------------------------------------
export default function GroundTruthReviewPage() {
// Session list & queue
const [allSessions, setAllSessions] = useState<Session[]>([])
const [filter, setFilter] = useState<'all' | 'unreviewed' | 'reviewed'>('unreviewed')
const [currentIdx, setCurrentIdx] = useState(0)
const [loading, setLoading] = useState(true)
// Current session data
const [grid, setGrid] = useState<GridResult | null>(null)
const [loadingGrid, setLoadingGrid] = useState(false)
const [editingCell, setEditingCell] = useState<string | null>(null)
const [editText, setEditText] = useState('')
const [acceptedRows, setAcceptedRows] = useState<Set<string>>(new Set())
const [zoom, setZoom] = useState(100)
// Batch operations
const [selectedSessions, setSelectedSessions] = useState<Set<string>>(new Set())
const [marking, setMarking] = useState(false)
const [markResult, setMarkResult] = useState<string | null>(null)
// Stats
const [reviewedCount, setReviewedCount] = useState(0)
const [totalCount, setTotalCount] = useState(0)
const imageRef = useRef<HTMLDivElement>(null)
// Load all sessions
const loadSessions = useCallback(async () => {
setLoading(true)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions?limit=200`)
if (!res.ok) return
const data = await res.json()
const sessions: Session[] = (data.sessions || []).map((s: any) => ({
id: s.id,
name: s.name || '',
filename: s.filename || '',
status: s.status || 'active',
created_at: s.created_at || '',
document_category: s.document_category || null,
has_ground_truth: !!(s.ground_truth && s.ground_truth.build_grid_reference),
}))
setAllSessions(sessions)
setTotalCount(sessions.length)
setReviewedCount(sessions.filter(s => s.has_ground_truth).length)
} catch (e) {
console.error('Failed to load sessions:', e)
} finally {
setLoading(false)
}
}, [])
useEffect(() => { loadSessions() }, [loadSessions])
// Filtered sessions
const filteredSessions = allSessions.filter(s => {
if (filter === 'unreviewed') return !s.has_ground_truth && s.status === 'active'
if (filter === 'reviewed') return s.has_ground_truth
return true
})
const currentSession = filteredSessions[currentIdx] || null
// Load grid for current session
const loadGrid = useCallback(async (sessionId: string) => {
setLoadingGrid(true)
setGrid(null)
setAcceptedRows(new Set())
setEditingCell(null)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/grid-editor`)
if (res.ok) {
const data = await res.json()
setGrid(data.grid || data)
}
} catch (e) {
console.error('Failed to load grid:', e)
} finally {
setLoadingGrid(false)
}
}, [])
useEffect(() => {
if (currentSession) loadGrid(currentSession.id)
}, [currentSession, loadGrid])
// Navigation
const goNext = () => {
if (currentIdx < filteredSessions.length - 1) setCurrentIdx(currentIdx + 1)
}
const goPrev = () => {
if (currentIdx > 0) setCurrentIdx(currentIdx - 1)
}
// Accept row
const acceptRow = (zoneId: string, rowIdx: number) => {
const key = `${zoneId}-${rowIdx}`
setAcceptedRows(prev => new Set([...prev, key]))
}
// Edit cell
const startEdit = (cell: GridCell) => {
setEditingCell(cell.cell_id)
setEditText(cell.text)
}
const saveEdit = async () => {
if (!editingCell || !currentSession) return
try {
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${currentSession.id}/update-cell`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ cell_id: editingCell, text: editText }),
})
// Update local state
if (grid) {
const newGrid = { ...grid }
for (const zone of newGrid.zones) {
for (const cell of zone.cells) {
if (cell.cell_id === editingCell) {
cell.text = editText
}
}
}
setGrid(newGrid)
}
} catch (e) {
console.error('Failed to save cell:', e)
}
setEditingCell(null)
}
// Mark as ground truth
const markGroundTruth = async (sessionId: string) => {
setMarking(true)
setMarkResult(null)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/mark-ground-truth`, {
method: 'POST',
})
if (res.ok) {
setMarkResult('success')
// Update local session state
setAllSessions(prev => prev.map(s =>
s.id === sessionId ? { ...s, has_ground_truth: true } : s
))
setReviewedCount(prev => prev + 1)
} else {
setMarkResult('error')
}
} catch {
setMarkResult('error')
} finally {
setMarking(false)
}
}
// Batch mark
const batchMark = async () => {
setMarking(true)
let success = 0
for (const sid of selectedSessions) {
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}/mark-ground-truth`, {
method: 'POST',
})
if (res.ok) success++
} catch { /* skip */ }
}
setAllSessions(prev => prev.map(s =>
selectedSessions.has(s.id) ? { ...s, has_ground_truth: true } : s
))
setReviewedCount(prev => prev + success)
setSelectedSessions(new Set())
setMarking(false)
setMarkResult(`${success} Sessions als Ground Truth markiert`)
setTimeout(() => setMarkResult(null), 3000)
}
// All cells for current grid
const allCells = grid?.zones?.flatMap(z => z.cells) || []
const lowConfCells = allCells.filter(c => (c.confidence ?? 100) < 50)
const imageUrl = currentSession
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${currentSession.id}/image/original`
: null
return (
<AIToolsSidebarResponsive>
<div className="max-w-[1600px] mx-auto p-4 space-y-4">
<PagePurpose moduleId="ocr-ground-truth" />
{/* Progress Bar */}
<div className="bg-white rounded-lg border border-slate-200 p-4">
<div className="flex items-center justify-between mb-2">
<h2 className="text-lg font-bold text-slate-900">Ground Truth Review</h2>
<span className="text-sm text-slate-500">
{reviewedCount} von {totalCount} geprueft ({totalCount > 0 ? Math.round(reviewedCount / totalCount * 100) : 0}%)
</span>
</div>
<div className="w-full bg-slate-100 rounded-full h-2.5">
<div
className="bg-teal-500 h-2.5 rounded-full transition-all duration-500"
style={{ width: `${totalCount > 0 ? (reviewedCount / totalCount) * 100 : 0}%` }}
/>
</div>
</div>
{/* Filter + Queue */}
<div className="flex items-center gap-4">
<div className="flex gap-1 bg-slate-100 rounded-lg p-1">
{(['unreviewed', 'reviewed', 'all'] as const).map(f => (
<button
key={f}
onClick={() => { setFilter(f); setCurrentIdx(0) }}
className={`px-3 py-1.5 text-sm rounded-md transition-colors ${
filter === f
? 'bg-white text-slate-900 shadow-sm font-medium'
: 'text-slate-500 hover:text-slate-700'
}`}
>
{f === 'unreviewed' ? 'Offen' : f === 'reviewed' ? 'Geprueft' : 'Alle'}
<span className="ml-1 text-xs text-slate-400">
({allSessions.filter(s =>
f === 'unreviewed' ? !s.has_ground_truth && s.status === 'active'
: f === 'reviewed' ? s.has_ground_truth
: true
).length})
</span>
</button>
))}
</div>
{/* Navigation */}
<div className="flex items-center gap-2 ml-auto">
<button onClick={goPrev} disabled={currentIdx === 0}
className="p-2 rounded hover:bg-slate-100 disabled:opacity-30 disabled:cursor-not-allowed">
<svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M15 19l-7-7 7-7" />
</svg>
</button>
<span className="text-sm text-slate-500 min-w-[80px] text-center">
{filteredSessions.length > 0 ? `${currentIdx + 1} / ${filteredSessions.length}` : '—'}
</span>
<button onClick={goNext} disabled={currentIdx >= filteredSessions.length - 1}
className="p-2 rounded hover:bg-slate-100 disabled:opacity-30 disabled:cursor-not-allowed">
<svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
</svg>
</button>
</div>
{/* Batch mark button */}
{selectedSessions.size > 0 && (
<button
onClick={batchMark}
disabled={marking}
className="px-3 py-1.5 bg-teal-600 text-white text-sm rounded-lg hover:bg-teal-700 disabled:opacity-50"
>
{selectedSessions.size} markieren
</button>
)}
</div>
{/* Toast */}
{markResult && (
<div className={`p-3 rounded-lg text-sm ${
markResult === 'error' ? 'bg-red-50 text-red-700 border border-red-200'
: markResult === 'success' ? 'bg-emerald-50 text-emerald-700 border border-emerald-200'
: 'bg-blue-50 text-blue-700 border border-blue-200'
}`}>
{markResult === 'success' ? 'Als Ground Truth markiert!' : markResult === 'error' ? 'Fehler beim Markieren' : markResult}
</div>
)}
{/* Main Content: Split View */}
{loading ? (
<div className="text-center py-12 text-slate-400">Lade Sessions...</div>
) : !currentSession ? (
<div className="text-center py-12 text-slate-400">
<p className="text-lg">Keine Sessions in dieser Ansicht</p>
</div>
) : (
<div className="grid grid-cols-2 gap-4" style={{ minHeight: '70vh' }}>
{/* Left: Original Image */}
<div className="bg-white rounded-lg border border-slate-200 overflow-hidden flex flex-col">
<div className="flex items-center justify-between px-3 py-2 border-b border-slate-100 bg-slate-50">
<span className="text-sm font-medium text-slate-700 truncate">
{currentSession.name || currentSession.filename}
</span>
<div className="flex items-center gap-2">
<button onClick={() => setZoom(z => Math.max(50, z - 25))}
className="px-2 py-0.5 text-xs bg-slate-200 rounded hover:bg-slate-300">-</button>
<span className="text-xs text-slate-500 w-10 text-center">{zoom}%</span>
<button onClick={() => setZoom(z => Math.min(300, z + 25))}
className="px-2 py-0.5 text-xs bg-slate-200 rounded hover:bg-slate-300">+</button>
</div>
</div>
<div ref={imageRef} className="flex-1 overflow-auto p-2">
{imageUrl && (
<img
src={imageUrl}
alt="Original scan"
style={{ width: `${zoom}%`, maxWidth: 'none' }}
className="block"
/>
)}
</div>
</div>
{/* Right: Grid Review */}
<div className="bg-white rounded-lg border border-slate-200 overflow-hidden flex flex-col">
<div className="flex items-center justify-between px-3 py-2 border-b border-slate-100 bg-slate-50">
<div className="flex items-center gap-3">
<span className="text-sm font-medium text-slate-700">
{allCells.length} Zellen
</span>
{lowConfCells.length > 0 && (
<span className="text-xs bg-red-100 text-red-700 px-2 py-0.5 rounded-full">
{lowConfCells.length} niedrige Konfidenz
</span>
)}
</div>
<div className="flex items-center gap-2">
{!currentSession.has_ground_truth && (
<button
onClick={() => markGroundTruth(currentSession.id)}
disabled={marking}
className="px-3 py-1 bg-teal-600 text-white text-xs rounded hover:bg-teal-700 disabled:opacity-50"
>
{marking ? 'Markiere...' : 'Als Ground Truth markieren'}
</button>
)}
{currentSession.has_ground_truth && (
<span className="text-xs bg-emerald-100 text-emerald-700 px-2 py-0.5 rounded-full">
Ground Truth
</span>
)}
<button
onClick={() => { markGroundTruth(currentSession.id); setTimeout(goNext, 500) }}
disabled={marking}
className="px-3 py-1 bg-slate-600 text-white text-xs rounded hover:bg-slate-700 disabled:opacity-50"
>
Markieren & Weiter
</button>
</div>
</div>
{/* Grid Content */}
<div className="flex-1 overflow-auto">
{loadingGrid ? (
<div className="flex items-center justify-center h-full text-slate-400">
<svg className="animate-spin h-6 w-6 mr-2" fill="none" viewBox="0 0 24 24">
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
</svg>
Lade Grid...
</div>
) : !grid || !grid.zones ? (
<div className="text-center py-8 text-slate-400 text-sm">
Kein Grid vorhanden. Bitte zuerst die Pipeline ausfuehren.
</div>
) : (
<div className="p-3 space-y-4">
{grid.zones.map((zone, zi) => (
<div key={zone.zone_id || zi} className="space-y-1">
{/* Zone header */}
<div className="text-xs text-slate-400 uppercase tracking-wide">
Zone {zi + 1} ({zone.zone_type})
{zone.columns?.length > 0 && (
<span className="ml-2">
{zone.columns.map(c => c.col_type.replace('column_', '')).join(' | ')}
</span>
)}
</div>
{/* Group cells by row */}
{Array.from(new Set(zone.cells.map(c => c.row_index)))
.sort((a, b) => a - b)
.map(rowIdx => {
const rowCells = zone.cells
.filter(c => c.row_index === rowIdx)
.sort((a, b) => a.col_index - b.col_index)
const rowKey = `${zone.zone_id || zi}-${rowIdx}`
const isAccepted = acceptedRows.has(rowKey)
return (
<div
key={rowKey}
className={`flex items-start gap-1 group ${isAccepted ? 'opacity-60' : ''}`}
>
{/* Quick accept button */}
<button
onClick={() => acceptRow(zone.zone_id || String(zi), rowIdx)}
className={`flex-shrink-0 w-6 h-6 rounded flex items-center justify-center mt-0.5 transition-colors ${
isAccepted
? 'bg-emerald-100 text-emerald-600'
: 'bg-slate-100 text-slate-400 hover:bg-emerald-100 hover:text-emerald-600'
}`}
title="Zeile als korrekt markieren"
>
<svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
</svg>
</button>
{/* Cells */}
<div className="flex-1 flex gap-1 flex-wrap">
{rowCells.map(cell => (
<div
key={cell.cell_id}
className={`flex-1 min-w-[80px] px-2 py-1 rounded text-sm border cursor-pointer transition-colors
${confidenceColor(cell.confidence)}
${confidenceBorder(cell.confidence)}
${editingCell === cell.cell_id ? 'ring-2 ring-teal-400' : 'hover:border-teal-300'}
${cell.is_bold ? 'font-bold' : ''}
`}
onClick={() => !isAccepted && startEdit(cell)}
title={`Konfidenz: ${cell.confidence ?? '?'}% | ${cell.col_type}`}
>
{editingCell === cell.cell_id ? (
<input
autoFocus
value={editText}
onChange={e => setEditText(e.target.value)}
onBlur={saveEdit}
onKeyDown={e => {
if (e.key === 'Enter') saveEdit()
if (e.key === 'Escape') setEditingCell(null)
}}
className="w-full bg-transparent outline-none text-sm"
/>
) : (
<span className={cell.text ? '' : 'text-slate-300 italic'}>
{cell.text || '(leer)'}
</span>
)}
</div>
))}
</div>
</div>
)
})}
</div>
))}
</div>
)}
</div>
</div>
</div>
)}
{/* Session List (collapsed) */}
{filteredSessions.length > 1 && (
<details className="bg-white rounded-lg border border-slate-200">
<summary className="px-4 py-3 cursor-pointer text-sm font-medium text-slate-700 hover:bg-slate-50">
Session-Liste ({filteredSessions.length})
</summary>
<div className="border-t border-slate-100 max-h-60 overflow-y-auto">
{filteredSessions.map((s, idx) => (
<div
key={s.id}
className={`flex items-center gap-3 px-4 py-2 text-sm cursor-pointer hover:bg-slate-50 border-b border-slate-50 ${
idx === currentIdx ? 'bg-teal-50' : ''
}`}
onClick={() => setCurrentIdx(idx)}
>
<input
type="checkbox"
checked={selectedSessions.has(s.id)}
onChange={e => {
e.stopPropagation()
setSelectedSessions(prev => {
const next = new Set(prev)
if (next.has(s.id)) next.delete(s.id)
else next.add(s.id)
return next
})
}}
className="rounded border-slate-300"
/>
<span className={`w-2 h-2 rounded-full flex-shrink-0 ${s.has_ground_truth ? 'bg-emerald-400' : 'bg-slate-300'}`} />
<span className="truncate flex-1">{s.name || s.filename || s.id}</span>
{s.document_category && (
<span className="text-xs bg-slate-100 px-1.5 py-0.5 rounded text-slate-500">{s.document_category}</span>
)}
</div>
))}
</div>
</details>
)}
</div>
</AIToolsSidebarResponsive>
)
}

View File

@@ -0,0 +1,391 @@
'use client'
/**
* OCR Regression Dashboard
*
* Shows all ground-truth sessions, runs regression tests,
* displays pass/fail results with diff details, and shows history.
*/
import { useState, useEffect, useCallback } from 'react'
import { PagePurpose } from '@/components/common/PagePurpose'
import { AIToolsSidebarResponsive } from '@/components/ai/AIToolsSidebar'
const KLAUSUR_API = '/klausur-api'
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
interface GTSession {
session_id: string
name: string
filename: string
document_category: string | null
pipeline: string | null
saved_at: string | null
summary: {
total_zones: number
total_columns: number
total_rows: number
total_cells: number
}
}
interface DiffSummary {
structural_changes: number
cells_missing: number
cells_added: number
text_changes: number
col_type_changes: number
}
interface RegressionResult {
session_id: string
name: string
status: 'pass' | 'fail' | 'error'
error?: string
diff_summary?: DiffSummary
reference_summary?: Record<string, number>
current_summary?: Record<string, number>
structural_diffs?: Array<{ field: string; reference: number; current: number }>
cell_diffs?: Array<{ type: string; cell_id: string; reference?: string; current?: string }>
}
interface RegressionRun {
id: string
run_at: string
status: string
total: number
passed: number
failed: number
errors: number
duration_ms: number
triggered_by: string
}
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
function StatusBadge({ status }: { status: string }) {
const cls =
status === 'pass'
? 'bg-emerald-100 text-emerald-800 border-emerald-200'
: status === 'fail'
? 'bg-red-100 text-red-800 border-red-200'
: 'bg-amber-100 text-amber-800 border-amber-200'
return (
<span className={`inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium border ${cls}`}>
{status === 'pass' ? 'Pass' : status === 'fail' ? 'Fail' : 'Error'}
</span>
)
}
function formatDate(iso: string | null) {
if (!iso) return '—'
return new Date(iso).toLocaleString('de-DE', {
day: '2-digit', month: '2-digit', year: 'numeric',
hour: '2-digit', minute: '2-digit',
})
}
// ---------------------------------------------------------------------------
// Component
// ---------------------------------------------------------------------------
export default function OCRRegressionPage() {
const [sessions, setSessions] = useState<GTSession[]>([])
const [results, setResults] = useState<RegressionResult[]>([])
const [history, setHistory] = useState<RegressionRun[]>([])
const [running, setRunning] = useState(false)
const [overallStatus, setOverallStatus] = useState<string | null>(null)
const [durationMs, setDurationMs] = useState<number | null>(null)
const [expandedSession, setExpandedSession] = useState<string | null>(null)
const [tab, setTab] = useState<'current' | 'history'>('current')
// Load ground-truth sessions
const loadSessions = useCallback(async () => {
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/ground-truth-sessions`)
if (res.ok) {
const data = await res.json()
setSessions(data.sessions || [])
}
} catch (e) {
console.error('Failed to load GT sessions:', e)
}
}, [])
// Load history
const loadHistory = useCallback(async () => {
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/regression/history?limit=20`)
if (res.ok) {
const data = await res.json()
setHistory(data.runs || [])
}
} catch (e) {
console.error('Failed to load history:', e)
}
}, [])
useEffect(() => {
loadSessions()
loadHistory()
}, [loadSessions, loadHistory])
// Run all regressions
const runAll = async () => {
setRunning(true)
setResults([])
setOverallStatus(null)
setDurationMs(null)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/regression/run?triggered_by=manual`, {
method: 'POST',
})
if (res.ok) {
const data = await res.json()
setResults(data.results || [])
setOverallStatus(data.status)
setDurationMs(data.duration_ms)
loadHistory()
}
} catch (e) {
console.error('Regression run failed:', e)
setOverallStatus('error')
} finally {
setRunning(false)
}
}
const totalPass = results.filter(r => r.status === 'pass').length
const totalFail = results.filter(r => r.status === 'fail').length
const totalError = results.filter(r => r.status === 'error').length
return (
<AIToolsSidebarResponsive>
<div className="max-w-7xl mx-auto p-6 space-y-6">
<PagePurpose moduleId="ocr-regression" />
{/* Header + Run Button */}
<div className="flex items-center justify-between">
<div>
<h1 className="text-2xl font-bold text-slate-900">OCR Regression Tests</h1>
<p className="text-sm text-slate-500 mt-1">
{sessions.length} Ground-Truth Session{sessions.length !== 1 ? 's' : ''}
</p>
</div>
<button
onClick={runAll}
disabled={running || sessions.length === 0}
className="inline-flex items-center gap-2 px-4 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 disabled:opacity-50 disabled:cursor-not-allowed font-medium transition-colors"
>
{running ? (
<>
<svg className="animate-spin h-4 w-4" fill="none" viewBox="0 0 24 24">
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
</svg>
Laeuft...
</>
) : (
'Alle Tests starten'
)}
</button>
</div>
{/* Overall Result Banner */}
{overallStatus && (
<div className={`rounded-lg p-4 border ${
overallStatus === 'pass'
? 'bg-emerald-50 border-emerald-200'
: 'bg-red-50 border-red-200'
}`}>
<div className="flex items-center justify-between">
<div className="flex items-center gap-3">
<StatusBadge status={overallStatus} />
<span className="font-medium text-slate-900">
{totalPass} bestanden, {totalFail} fehlgeschlagen, {totalError} Fehler
</span>
</div>
{durationMs !== null && (
<span className="text-sm text-slate-500">{(durationMs / 1000).toFixed(1)}s</span>
)}
</div>
</div>
)}
{/* Tabs */}
<div className="border-b border-slate-200">
<nav className="flex gap-4">
{(['current', 'history'] as const).map(t => (
<button
key={t}
onClick={() => setTab(t)}
className={`pb-3 px-1 text-sm font-medium border-b-2 transition-colors ${
tab === t
? 'border-teal-500 text-teal-600'
: 'border-transparent text-slate-500 hover:text-slate-700'
}`}
>
{t === 'current' ? 'Aktuelle Ergebnisse' : 'Verlauf'}
</button>
))}
</nav>
</div>
{/* Current Results Tab */}
{tab === 'current' && (
<div className="space-y-3">
{results.length === 0 && !running && (
<div className="text-center py-12 text-slate-400">
<p className="text-lg">Keine Ergebnisse</p>
<p className="text-sm mt-1">Klicken Sie &quot;Alle Tests starten&quot; um die Regression zu laufen.</p>
</div>
)}
{results.map(r => (
<div
key={r.session_id}
className="bg-white rounded-lg border border-slate-200 overflow-hidden"
>
<div
className="flex items-center justify-between px-4 py-3 cursor-pointer hover:bg-slate-50 transition-colors"
onClick={() => setExpandedSession(expandedSession === r.session_id ? null : r.session_id)}
>
<div className="flex items-center gap-3 min-w-0">
<StatusBadge status={r.status} />
<span className="font-medium text-slate-900 truncate">{r.name || r.session_id}</span>
</div>
<div className="flex items-center gap-4 text-sm text-slate-500">
{r.diff_summary && (
<span>
{r.diff_summary.text_changes} Text, {r.diff_summary.structural_changes} Struktur
</span>
)}
{r.error && <span className="text-red-500">{r.error}</span>}
<svg className={`w-4 h-4 transition-transform ${expandedSession === r.session_id ? 'rotate-180' : ''}`} fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 9l-7 7-7-7" />
</svg>
</div>
</div>
{/* Expanded Details */}
{expandedSession === r.session_id && r.status === 'fail' && (
<div className="border-t border-slate-100 px-4 py-3 bg-slate-50 space-y-3">
{/* Structural Diffs */}
{r.structural_diffs && r.structural_diffs.length > 0 && (
<div>
<h4 className="text-xs font-medium text-slate-500 uppercase mb-1">Strukturelle Aenderungen</h4>
<div className="space-y-1">
{r.structural_diffs.map((d, i) => (
<div key={i} className="text-sm">
<span className="font-mono text-slate-600">{d.field}</span>: {d.reference} {d.current}
</div>
))}
</div>
</div>
)}
{/* Cell Diffs */}
{r.cell_diffs && r.cell_diffs.length > 0 && (
<div>
<h4 className="text-xs font-medium text-slate-500 uppercase mb-1">
Zellen-Aenderungen ({r.cell_diffs.length})
</h4>
<div className="max-h-60 overflow-y-auto space-y-1">
{r.cell_diffs.slice(0, 50).map((d, i) => (
<div key={i} className="text-sm font-mono bg-white rounded px-2 py-1 border border-slate-100">
<span className={`text-xs px-1 rounded ${
d.type === 'text_change' ? 'bg-amber-100 text-amber-700'
: d.type === 'cell_missing' ? 'bg-red-100 text-red-700'
: 'bg-blue-100 text-blue-700'
}`}>
{d.type}
</span>{' '}
<span className="text-slate-500">{d.cell_id}</span>
{d.reference && (
<>
{' '}<span className="line-through text-red-400">{d.reference}</span>
</>
)}
{d.current && (
<>
{' '}<span className="text-emerald-600">{d.current}</span>
</>
)}
</div>
))}
{r.cell_diffs.length > 50 && (
<p className="text-xs text-slate-400">... und {r.cell_diffs.length - 50} weitere</p>
)}
</div>
</div>
)}
</div>
)}
</div>
))}
{/* Ground Truth Sessions Overview (when no results yet) */}
{results.length === 0 && sessions.length > 0 && (
<div>
<h3 className="text-sm font-medium text-slate-700 mb-2">Ground-Truth Sessions</h3>
<div className="grid gap-2">
{sessions.map(s => (
<div key={s.session_id} className="bg-white rounded-lg border border-slate-200 px-4 py-3 flex items-center justify-between">
<div>
<span className="font-medium text-slate-900">{s.name || s.session_id}</span>
<span className="text-sm text-slate-400 ml-2">{s.filename}</span>
</div>
<div className="text-sm text-slate-500">
{s.summary.total_cells} Zellen, {s.summary.total_zones} Zonen
{s.pipeline && <span className="ml-2 text-xs bg-slate-100 px-1.5 py-0.5 rounded">{s.pipeline}</span>}
</div>
</div>
))}
</div>
</div>
)}
</div>
)}
{/* History Tab */}
{tab === 'history' && (
<div className="space-y-2">
{history.length === 0 ? (
<p className="text-center py-8 text-slate-400">Noch keine Laeufe aufgezeichnet.</p>
) : (
<table className="w-full text-sm">
<thead>
<tr className="border-b border-slate-200 text-left text-slate-500">
<th className="pb-2 font-medium">Datum</th>
<th className="pb-2 font-medium">Status</th>
<th className="pb-2 font-medium text-right">Gesamt</th>
<th className="pb-2 font-medium text-right">Pass</th>
<th className="pb-2 font-medium text-right">Fail</th>
<th className="pb-2 font-medium text-right">Dauer</th>
<th className="pb-2 font-medium">Trigger</th>
</tr>
</thead>
<tbody>
{history.map(run => (
<tr key={run.id} className="border-b border-slate-100 hover:bg-slate-50">
<td className="py-2">{formatDate(run.run_at)}</td>
<td className="py-2"><StatusBadge status={run.status} /></td>
<td className="py-2 text-right">{run.total}</td>
<td className="py-2 text-right text-emerald-600">{run.passed}</td>
<td className="py-2 text-right text-red-600">{run.failed + run.errors}</td>
<td className="py-2 text-right text-slate-500">{(run.duration_ms / 1000).toFixed(1)}s</td>
<td className="py-2 text-slate-400">{run.triggered_by}</td>
</tr>
))}
</tbody>
</table>
)}
</div>
)}
</div>
</AIToolsSidebarResponsive>
)
}

View File

@@ -182,6 +182,24 @@ export const navigation: NavCategory[] = [
// ----------------------------------------------------------------------- // -----------------------------------------------------------------------
// KI-Anwendungen: Endnutzer-orientierte KI-Module // KI-Anwendungen: Endnutzer-orientierte KI-Module
// ----------------------------------------------------------------------- // -----------------------------------------------------------------------
{
id: 'ocr-regression',
name: 'OCR Regression',
href: '/ai/ocr-regression',
description: 'Regressions-Tests & Ground Truth',
purpose: 'Regressions-Tests fuer die OCR-Pipeline ausfuehren. Zeigt Pass/Fail pro Ground-Truth Session, Diff-Details und Verlauf vergangener Laeufe.',
audience: ['Entwickler', 'QA'],
subgroup: 'KI-Werkzeuge',
},
{
id: 'ocr-ground-truth',
name: 'Ground Truth Review',
href: '/ai/ocr-ground-truth',
description: 'Ground Truth pruefen & markieren',
purpose: 'Effiziente Massenpruefung von OCR-Sessions. Split-View mit Confidence-Highlighting, Quick-Accept und Batch-Markierung als Ground Truth.',
audience: ['Entwickler', 'QA'],
subgroup: 'KI-Werkzeuge',
},
{ {
id: 'agents', id: 'agents',
name: 'Agent Management', name: 'Agent Management',

View File

@@ -0,0 +1,166 @@
# OCR Pipeline Regression Testing
**Stand:** 2026-03-23
---
## Uebersicht
Das Regression Framework stellt sicher, dass Aenderungen an der OCR-Pipeline keine bestehenden
Ergebnisse verschlechtern. Ground-Truth Sessions dienen als Referenz — nach jeder Code-Aenderung
wird die Pipeline neu ausgefuehrt und das Ergebnis mit der Referenz verglichen.
---
## Ground Truth markieren
### Via Admin-UI (empfohlen)
1. Oeffne die OCR Pipeline: [/ai/ocr-pipeline](https://macmini:3002/ai/ocr-pipeline)
2. Lade eine Session und fuehre alle Pipeline-Schritte aus
3. Pruefe das Ergebnis im Grid Editor (Schritt 10)
4. Korrigiere Fehler falls noetig (Inline-Edit)
5. Klicke **"Als Ground Truth markieren"**
### Via API
```bash
# Bestehende Session als Ground Truth markieren
curl -X POST "http://macmini:8086/api/v1/ocr-pipeline/sessions/{session_id}/mark-ground-truth"
# Ground Truth entfernen
curl -X DELETE "http://macmini:8086/api/v1/ocr-pipeline/sessions/{session_id}/mark-ground-truth"
# Alle Ground-Truth Sessions auflisten
curl "http://macmini:8086/api/v1/ocr-pipeline/ground-truth-sessions"
```
### Via Ground-Truth Review UI
Fuer die Massenpruefung von 50-100 Sessions:
1. Oeffne [/ai/ocr-ground-truth](https://macmini:3002/ai/ocr-ground-truth)
2. Filter auf "Offen" (ungeprueft)
3. Split-View: Bild links, Grid rechts pruefen
4. Korrekte Zeilen mit Haekchen bestaetigen
5. Fehler inline korrigieren
6. "Markieren & Weiter" fuer naechste Session
---
## Regression ausfuehren
### Via Shell-Script (CI/CD)
```bash
# Standard: macmini:8086
./scripts/run-regression.sh
# Custom URL
./scripts/run-regression.sh http://localhost:8086
# Exit-Codes:
# 0 = alle bestanden
# 1 = Fehler gefunden
# 2 = Verbindungsfehler
```
### Via Admin-UI
1. Oeffne [/ai/ocr-regression](https://macmini:3002/ai/ocr-regression)
2. Klicke **"Alle Tests starten"**
3. Ergebnis: Pass/Fail pro Session mit Diff-Details
### Via API
```bash
# Alle Ground-Truth Sessions testen
curl -X POST "http://macmini:8086/api/v1/ocr-pipeline/regression/run?triggered_by=script"
# Einzelne Session testen
curl -X POST "http://macmini:8086/api/v1/ocr-pipeline/sessions/{session_id}/regression/run"
# Verlauf abrufen
curl "http://macmini:8086/api/v1/ocr-pipeline/regression/history?limit=20"
```
---
## Ergebnisse lesen
### Diff-Typen
| Typ | Beschreibung |
|-----|-------------|
| `structural_changes` | Anzahl Zonen, Spalten oder Zeilen hat sich geaendert |
| `text_change` | Text einer Zelle hat sich geaendert |
| `cell_missing` | Zelle war in der Referenz, fehlt jetzt |
| `cell_added` | Neue Zelle die in der Referenz nicht existierte |
| `col_type_change` | Spaltentyp einer Zelle hat sich geaendert |
### Status-Bewertung
- **pass**: Keine Diffs → Code-Aenderung hat keine Auswirkung
- **fail**: Diffs gefunden → pruefen ob gewollt (Feature) oder ungewollt (Regression)
- **error**: Pipeline-Fehler → Build oder Config-Problem
### Verlauf
Alle Laeufe werden in der Tabelle `regression_runs` persistiert:
```sql
SELECT id, run_at, status, total, passed, failed, errors, duration_ms, triggered_by
FROM regression_runs
ORDER BY run_at DESC
LIMIT 10;
```
---
## Best Practices
### Ground-Truth Sessions waehlen
Decke verschiedene Seitentypen ab:
- Woerterbuchseiten (2-3 Spalten, IPA-Klammern)
- Uebungsseiten (Tabellen, Checkboxen)
- Seiten mit Illustrationen
- Seiten ohne IPA (reines Deutsch-Vokabular)
- Verschiedene Verlage und Layouts
### Workflow vor jedem Commit
```bash
# 1. Regression laufen lassen
./scripts/run-regression.sh
# 2. Bei Failure: Diff pruefen
# - Gewollte Aenderung? → Ground Truth aktualisieren
# - Ungewollte Regression? → Code fixen
# 3. Bei Pass: Commit
git add . && git commit -m "fix: ..."
```
---
## Datenbank-Schema
```sql
CREATE TABLE regression_runs (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
run_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
status VARCHAR(20) NOT NULL, -- pass, fail, error
total INT NOT NULL DEFAULT 0,
passed INT NOT NULL DEFAULT 0,
failed INT NOT NULL DEFAULT 0,
errors INT NOT NULL DEFAULT 0,
duration_ms INT,
results JSONB NOT NULL DEFAULT '[]', -- Detail-Ergebnisse pro Session
triggered_by VARCHAR(50) DEFAULT 'manual'
);
```
Ground-Truth Referenzen werden im `ground_truth` JSONB-Feld der
`ocr_pipeline_sessions` Tabelle gespeichert.

View File

@@ -1,7 +1,7 @@
# OCR Pipeline - Schrittweise Seitenrekonstruktion # OCR Pipeline - Schrittweise Seitenrekonstruktion
**Version:** 4.7.0 **Version:** 5.0.0
**Status:** Produktiv (Schritte 110 + Grid Editor implementiert) **Status:** Produktiv (Schritte 110 + Grid Editor + Regression Framework)
**URL:** https://macmini:3002/ai/ocr-pipeline **URL:** https://macmini:3002/ai/ocr-pipeline
## Uebersicht ## Uebersicht
@@ -1197,6 +1197,62 @@ des Headwords der vorherigen Zeile). Diese werden von PaddleOCR als garbled Text
4. Schlaegt IPA im Britfone-Woerterbuch nach 4. Schlaegt IPA im Britfone-Woerterbuch nach
5. Beruecksichtigt alle Wortteile (z.B. "close sth. down" → `[klˈəʊz dˈaʊn]`) 5. Beruecksichtigt alle Wortteile (z.B. "close sth. down" → `[klˈəʊz dˈaʊn]`)
### Compound Word IPA Decomposition (Step 5e)
Zusammengesetzte Woerter wie "schoolbag" oder "blackbird" haben oft keinen eigenen
IPA-Eintrag im Woerterbuch. Die Funktion `_decompose_compound()` zerlegt sie:
1. Probiere jede Teilungsposition (min. 3 Zeichen pro Teil)
2. Wenn beide Teile im Woerterbuch stehen → IPA verketten
3. Waehle die Teilung mit dem laengsten ersten Teil
**Beispiele:**
| Eingabe | Zerlegung | IPA |
|---------|-----------|-----|
| schoolbag | school + bag | skˈuːl + bæɡ |
| blackbird | black + bird | blæk + bˈɜːd |
| ice-cream | ice + cream | aɪs + kɹˈiːm |
### Trailing Garbled Fragment Removal (Step 5f)
Nach korrekt erkanntem IPA (z.B. `seat [sˈiːt]`) haengt OCR manchmal
eine garbled Kopie der IPA-Transkription an: `seat [sˈiːt] belt si:t belt`.
**`_strip_post_bracket_garbled()`** erkennt und entfernt diese:
1. Alles nach dem letzten `]` scannen
2. Woerter mit IPA-Markern (`:`, `ə`, `ɪ` etc.) → garbled, entfernen
3. Echte Woerter (Woerterbuch, Deutsch, Delimiter) → behalten
4. **Multi-Wort-Headword:** "belt" ist ein echtes Wort, aber wenn danach
garbled IPA kommt, wird nur "belt" behalten, der Rest entfernt
### Regression Framework (Step 5g)
Ground-Truth Sessions koennen als Referenz markiert werden. Nach jeder
Code-Aenderung vergleicht `POST /regression/run` die aktuelle Pipeline-Ausgabe
mit den gespeicherten Referenzen:
- **Strukturelle Diffs:** Zonen, Spalten, Zeilen (Anzahl-Aenderungen)
- **Zellen-Diffs:** Text-Aenderungen, fehlende/neue Zellen, col_type-Aenderungen
- **Persistenz:** Ergebnisse in `regression_runs` Tabelle fuer Trend-Analyse
- **Shell-Script:** `scripts/run-regression.sh` fuer CI-Integration
Admin-UI: [/ai/ocr-regression](https://macmini:3002/ai/ocr-regression)
### Ground Truth Review Workflow (Step 5h)
Admin-UI fuer effiziente Massenpruefung von Sessions:
- **Split-View:** Original-Bild links, erkannter Grid rechts
- **Confidence-Highlighting:** Niedrige Konfidenz rot hervorgehoben
- **Quick-Accept:** Korrekte Zeilen mit einem Klick bestaetigen
- **Inline-Edit:** Text direkt im Grid korrigieren
- **Session-Queue:** Automatisch naechste Session laden
- **Batch-Mark:** Mehrere Sessions gleichzeitig als Ground Truth markieren
Admin-UI: [/ai/ocr-ground-truth](https://macmini:3002/ai/ocr-ground-truth)
### `en_col_type` Erkennung ### `en_col_type` Erkennung
Die Erkennung der Englisch-Headword-Spalte nutzt **Bracket-IPA-Pattern-Count** Die Erkennung der Englisch-Headword-Spalte nutzt **Bracket-IPA-Pattern-Count**
@@ -1536,6 +1592,7 @@ cd klausur-service/backend && pytest tests/test_paddle_kombi.py -v # 36 Tests
| Datum | Version | Aenderung | | Datum | Version | Aenderung |
|-------|---------|----------| |-------|---------|----------|
| 2026-03-23 | 5.0.0 | **Phase 1 Sprint 1:** Compound-IPA-Zerlegung (`_decompose_compound`), Trailing-Garbled-Fragment-Entfernung (Multi-Wort-Headwords), Regression Framework mit DB-Persistenz + History + Shell-Script, Ground-Truth Review Workflow UI, Page-Crop Determinismus verifiziert. Admin-Seiten: `/ai/ocr-regression`, `/ai/ocr-ground-truth`. |
| 2026-03-20 | 4.7.0 | Grid Editor: Zone Merging ueber Bilder (`image_overlays`), Heading Detection (Farbe + Hoehe), Ghost-Filter (borderless-aware), Oversized Word Box Removal, IPA Phonetic Correction (Britfone), IPA Continuation Detection, `en_col_type` via Bracket-Count. 27 Tests. | | 2026-03-20 | 4.7.0 | Grid Editor: Zone Merging ueber Bilder (`image_overlays`), Heading Detection (Farbe + Hoehe), Ghost-Filter (borderless-aware), Oversized Word Box Removal, IPA Phonetic Correction (Britfone), IPA Continuation Detection, `en_col_type` via Bracket-Count. 27 Tests. |
| 2026-03-16 | 4.6.0 | Strukturerkennung (Schritt 8): Region-basierte Grafikerkennung (`cv_graphic_detect.py`) mit Zwei-Pass-Verfahren (Farbregionen + schwarze Illustrationen), Wort-Ueberlappungs-Filter, Box/Zonen/Farb-Analyse. Schritt laeuft nach Worterkennung. | | 2026-03-16 | 4.6.0 | Strukturerkennung (Schritt 8): Region-basierte Grafikerkennung (`cv_graphic_detect.py`) mit Zwei-Pass-Verfahren (Farbregionen + schwarze Illustrationen), Wort-Ueberlappungs-Filter, Box/Zonen/Farb-Analyse. Schritt laeuft nach Worterkennung. |
| 2026-03-12 | 4.5.0 | Kombi-Modus (PaddleOCR + Tesseract): Beide Engines laufen parallel, Koordinaten werden IoU-basiert gematcht und confidence-gewichtet gemittelt. Ungematchte Tesseract-Woerter (Bullets, Symbole) werden hinzugefuegt. 3er-Toggle in OCR Overlay. | | 2026-03-12 | 4.5.0 | Kombi-Modus (PaddleOCR + Tesseract): Beide Engines laufen parallel, Koordinaten werden IoU-basiert gematcht und confidence-gewichtet gemittelt. Ungematchte Tesseract-Woerter (Bullets, Symbole) werden hinzugefuegt. 3er-Toggle in OCR Overlay. |

View File

@@ -1032,6 +1032,37 @@ def _text_has_garbled_ipa(text: str) -> bool:
return False return False
def _decompose_compound(word: str, pronunciation: str = 'british') -> Optional[str]:
"""Try to decompose a compound word and concatenate IPA for each part.
E.g. "schoolbag""school"+"bag" → IPA for both concatenated.
Only returns IPA if ALL parts are found in the dictionary.
Tries splits at every position (min 3 chars per part) and picks the
split where the first part is longest.
"""
if not IPA_AVAILABLE:
return None
lower = word.lower().strip()
if len(lower) < 6:
return None # too short for a compound
best_ipa = None
best_first_len = 0
for split_pos in range(3, len(lower) - 2): # min 3 chars each part
first = lower[:split_pos]
second = lower[split_pos:]
ipa_first = _lookup_ipa(first, pronunciation)
ipa_second = _lookup_ipa(second, pronunciation)
if ipa_first and ipa_second:
if split_pos > best_first_len:
best_first_len = split_pos
best_ipa = ipa_first + ipa_second
return best_ipa
def _insert_missing_ipa(text: str, pronunciation: str = 'british') -> str: def _insert_missing_ipa(text: str, pronunciation: str = 'british') -> str:
"""Insert IPA pronunciation for English words that have no brackets at all. """Insert IPA pronunciation for English words that have no brackets at all.
@@ -1077,6 +1108,10 @@ def _insert_missing_ipa(text: str, pronunciation: str = 'british') -> str:
# Fallback: try without hyphens (e.g. "second-hand" → "secondhand") # Fallback: try without hyphens (e.g. "second-hand" → "secondhand")
if not ipa and '-' in clean: if not ipa and '-' in clean:
ipa = _lookup_ipa(clean.replace('-', ''), pronunciation) ipa = _lookup_ipa(clean.replace('-', ''), pronunciation)
# Fallback 0b: compound word decomposition
# E.g. "schoolbag" → "school"+"bag" → concatenated IPA
if not ipa:
ipa = _decompose_compound(clean, pronunciation)
# Fallback 1: IPA-marker split for merged tokens where OCR # Fallback 1: IPA-marker split for merged tokens where OCR
# joined headword with its IPA (e.g. "schoolbagsku:lbæg"). # joined headword with its IPA (e.g. "schoolbagsku:lbæg").
# Find the first IPA marker character (:, æ, ɪ, etc.), walk # Find the first IPA marker character (:, æ, ɪ, etc.), walk
@@ -1098,6 +1133,9 @@ def _insert_missing_ipa(text: str, pronunciation: str = 'british') -> str:
headword = w[:split] headword = w[:split]
ocr_ipa = w[split:] ocr_ipa = w[split:]
hw_ipa = _lookup_ipa(headword, pronunciation) hw_ipa = _lookup_ipa(headword, pronunciation)
if not hw_ipa:
# Try compound decomposition for the headword part
hw_ipa = _decompose_compound(headword, pronunciation)
if hw_ipa: if hw_ipa:
words[i] = f"{headword} [{hw_ipa}]" words[i] = f"{headword} [{hw_ipa}]"
else: else:
@@ -1197,6 +1235,12 @@ def _strip_post_bracket_garbled(
E.g. ``sea [sˈiː] si:`` → ``sea [sˈiː]`` E.g. ``sea [sˈiː] si:`` → ``sea [sˈiː]``
``seat [sˈiːt] si:t`` → ``seat [sˈiːt]`` ``seat [sˈiːt] si:t`` → ``seat [sˈiːt]``
``seat [sˈiːt] belt si:t belt`` → ``seat [sˈiːt] belt``
For multi-word headwords like "seat belt", a real English word ("belt")
may be followed by garbled IPA duplicates. We detect this by checking
whether the sequence after a real word contains IPA markers (`:`, `ə`,
etc.) — if so, everything from the first garbled token onward is stripped.
""" """
if ']' not in text: if ']' not in text:
return text return text
@@ -1207,6 +1251,8 @@ def _strip_post_bracket_garbled(
after = text[last_bracket + 1:].strip() after = text[last_bracket + 1:].strip()
if not after: if not after:
return text return text
_IPA_MARKER_CHARS = set(':əɪɛɒʊʌæɑɔʃʒθðŋˈˌ')
after_words = after.split() after_words = after.split()
kept: List[str] = [] kept: List[str] = []
for idx, w in enumerate(after_words): for idx, w in enumerate(after_words):
@@ -1215,17 +1261,42 @@ def _strip_post_bracket_garbled(
kept.extend(after_words[idx:]) kept.extend(after_words[idx:])
break break
# Contains IPA markers (length mark, IPA chars) — garbled, skip # Contains IPA markers (length mark, IPA chars) — garbled, skip
if ':' in w or any(c in w for c in 'əɪɛɒʊʌæɑɔʃʒθðŋˈˌ'): if any(c in w for c in _IPA_MARKER_CHARS):
# Everything from here is garbled IPA — stop scanning
# but look ahead: if any remaining words are real English
# words WITHOUT IPA markers, they might be a different headword
# following. Only skip the contiguous garbled run.
continue continue
clean = re.sub(r'[^a-zA-Z]', '', w) clean = re.sub(r'[^a-zA-Z]', '', w)
# Uppercase — likely German, keep rest # Uppercase — likely German, keep rest
if clean and clean[0].isupper(): if clean and clean[0].isupper():
kept.extend(after_words[idx:]) kept.extend(after_words[idx:])
break break
# Known English word — keep rest # Known English word — keep it, but check if followed by garbled IPA
# (multi-word headword case like "seat [siːt] belt si:t belt")
if clean and len(clean) >= 2 and _lookup_ipa(clean, pronunciation): if clean and len(clean) >= 2 and _lookup_ipa(clean, pronunciation):
kept.extend(after_words[idx:]) # Peek ahead: if next word has IPA markers, the rest is garbled
break remaining = after_words[idx + 1:]
has_garbled_after = any(
any(c in rw for c in _IPA_MARKER_CHARS)
for rw in remaining
)
if has_garbled_after:
# Keep this real word but stop — rest is garbled duplication
kept.append(w)
# Still scan for delimiters/German in the remaining words
for ridx, rw in enumerate(remaining):
if rw in ('', '', '-', '/', '|', ',', ';'):
kept.extend(remaining[ridx:])
break
rclean = re.sub(r'[^a-zA-Z]', '', rw)
if rclean and rclean[0].isupper():
kept.extend(remaining[ridx:])
break
break
else:
kept.extend(after_words[idx:])
break
# Unknown short word — likely garbled, skip # Unknown short word — likely garbled, skip
if kept: if kept:
return before + ' ' + ' '.join(kept) return before + ' ' + ' '.join(kept)

View File

@@ -0,0 +1,18 @@
-- Migration 008: Regression test run history
-- Stores results of regression test runs for trend analysis.
CREATE TABLE IF NOT EXISTS regression_runs (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
run_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
status VARCHAR(20) NOT NULL, -- 'pass', 'fail', 'error'
total INT NOT NULL DEFAULT 0,
passed INT NOT NULL DEFAULT 0,
failed INT NOT NULL DEFAULT 0,
errors INT NOT NULL DEFAULT 0,
duration_ms INT,
results JSONB NOT NULL DEFAULT '[]',
triggered_by VARCHAR(50) DEFAULT 'manual' -- 'manual', 'script', 'ci'
);
CREATE INDEX IF NOT EXISTS idx_regression_runs_run_at
ON regression_runs (run_at DESC);

View File

@@ -8,7 +8,11 @@ Lizenz: Apache 2.0
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal. DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
""" """
import json
import logging import logging
import os
import time
import uuid
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
@@ -16,6 +20,7 @@ from fastapi import APIRouter, HTTPException, Query
from grid_editor_api import _build_grid_core from grid_editor_api import _build_grid_core
from ocr_pipeline_session_store import ( from ocr_pipeline_session_store import (
get_pool,
get_session_db, get_session_db,
list_ground_truth_sessions_db, list_ground_truth_sessions_db,
update_session_db, update_session_db,
@@ -26,6 +31,60 @@ logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["regression"]) router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["regression"])
# ---------------------------------------------------------------------------
# DB persistence for regression runs
# ---------------------------------------------------------------------------
async def _init_regression_table():
"""Ensure regression_runs table exists (idempotent)."""
pool = await get_pool()
async with pool.acquire() as conn:
migration_path = os.path.join(
os.path.dirname(__file__),
"migrations/008_regression_runs.sql",
)
if os.path.exists(migration_path):
with open(migration_path, "r") as f:
sql = f.read()
await conn.execute(sql)
async def _persist_regression_run(
status: str,
summary: dict,
results: list,
duration_ms: int,
triggered_by: str = "manual",
) -> str:
"""Save a regression run to the database. Returns the run ID."""
try:
await _init_regression_table()
pool = await get_pool()
run_id = str(uuid.uuid4())
async with pool.acquire() as conn:
await conn.execute(
"""
INSERT INTO regression_runs
(id, status, total, passed, failed, errors, duration_ms, results, triggered_by)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8::jsonb, $9)
""",
run_id,
status,
summary.get("total", 0),
summary.get("passed", 0),
summary.get("failed", 0),
summary.get("errors", 0),
duration_ms,
json.dumps(results),
triggered_by,
)
logger.info("Regression run %s persisted: %s", run_id, status)
return run_id
except Exception as e:
logger.warning("Failed to persist regression run: %s", e)
return ""
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Helpers # Helpers
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -299,8 +358,11 @@ async def run_single_regression(session_id: str):
@router.post("/regression/run") @router.post("/regression/run")
async def run_all_regressions(): async def run_all_regressions(
triggered_by: str = Query("manual", description="Who triggered: manual, script, ci"),
):
"""Re-run build_grid for ALL ground-truth sessions and compare.""" """Re-run build_grid for ALL ground-truth sessions and compare."""
start_time = time.monotonic()
sessions = await list_ground_truth_sessions_db() sessions = await list_ground_truth_sessions_db()
if not sessions: if not sessions:
@@ -370,19 +432,105 @@ async def run_all_regressions():
results.append(entry) results.append(entry)
overall = "pass" if failed == 0 and errors == 0 else "fail" overall = "pass" if failed == 0 and errors == 0 else "fail"
duration_ms = int((time.monotonic() - start_time) * 1000)
summary = {
"total": len(results),
"passed": passed,
"failed": failed,
"errors": errors,
}
logger.info( logger.info(
"Regression suite: %s%d passed, %d failed, %d errors (of %d)", "Regression suite: %s%d passed, %d failed, %d errors (of %d) in %dms",
overall, passed, failed, errors, len(results), overall, passed, failed, errors, len(results), duration_ms,
)
# Persist to DB
run_id = await _persist_regression_run(
status=overall,
summary=summary,
results=results,
duration_ms=duration_ms,
triggered_by=triggered_by,
) )
return { return {
"status": overall, "status": overall,
"run_id": run_id,
"duration_ms": duration_ms,
"results": results, "results": results,
"summary": { "summary": summary,
"total": len(results),
"passed": passed,
"failed": failed,
"errors": errors,
},
} }
@router.get("/regression/history")
async def get_regression_history(
limit: int = Query(20, ge=1, le=100),
):
"""Get recent regression run history from the database."""
try:
await _init_regression_table()
pool = await get_pool()
async with pool.acquire() as conn:
rows = await conn.fetch(
"""
SELECT id, run_at, status, total, passed, failed, errors,
duration_ms, triggered_by
FROM regression_runs
ORDER BY run_at DESC
LIMIT $1
""",
limit,
)
return {
"runs": [
{
"id": str(row["id"]),
"run_at": row["run_at"].isoformat() if row["run_at"] else None,
"status": row["status"],
"total": row["total"],
"passed": row["passed"],
"failed": row["failed"],
"errors": row["errors"],
"duration_ms": row["duration_ms"],
"triggered_by": row["triggered_by"],
}
for row in rows
],
"count": len(rows),
}
except Exception as e:
logger.warning("Failed to fetch regression history: %s", e)
return {"runs": [], "count": 0, "error": str(e)}
@router.get("/regression/history/{run_id}")
async def get_regression_run_detail(run_id: str):
"""Get detailed results of a specific regression run."""
try:
await _init_regression_table()
pool = await get_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow(
"SELECT * FROM regression_runs WHERE id = $1",
run_id,
)
if not row:
raise HTTPException(status_code=404, detail="Run not found")
return {
"id": str(row["id"]),
"run_at": row["run_at"].isoformat() if row["run_at"] else None,
"status": row["status"],
"total": row["total"],
"passed": row["passed"],
"failed": row["failed"],
"errors": row["errors"],
"duration_ms": row["duration_ms"],
"triggered_by": row["triggered_by"],
"results": json.loads(row["results"]) if row["results"] else [],
}
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))

View File

@@ -57,6 +57,63 @@ class TestInsertMissingIpa:
result = _insert_missing_ipa("Anstecknadel", "british") result = _insert_missing_ipa("Anstecknadel", "british")
assert result == "Anstecknadel" assert result == "Anstecknadel"
def test_compound_word_schoolbag_gets_ipa(self):
"""R07: Compound word 'schoolbag' should get decomposed IPA (school+bag)."""
from cv_ocr_engines import _insert_missing_ipa
result = _insert_missing_ipa("schoolbag", "british")
assert "[" in result and "]" in result
assert result.startswith("schoolbag [")
def test_compound_word_blackbird(self):
"""Compound word 'blackbird' should get decomposed IPA."""
from cv_ocr_engines import _insert_missing_ipa
result = _insert_missing_ipa("blackbird", "british")
assert "[" in result and "]" in result
def test_compound_word_too_short(self):
"""Words shorter than 6 chars should not attempt compound decomposition."""
from cv_ocr_engines import _decompose_compound
assert _decompose_compound("bag", "british") is None
def test_decompose_compound_direct(self):
"""Direct test of _decompose_compound for known compounds."""
from cv_ocr_engines import _decompose_compound
# schoolbag = school + bag — both should be in dictionary
result = _decompose_compound("schoolbag", "british")
assert result is not None
class TestStripPostBracketGarbled:
"""Tests for _strip_post_bracket_garbled — trailing garbled IPA removal."""
def test_simple_trailing_garbled(self):
"""R21-simple: 'sea [sˈiː] si:' → trailing IPA marker removed."""
from cv_ocr_engines import _strip_post_bracket_garbled
result = _strip_post_bracket_garbled("sea [sˈiː] si:")
assert "si:" not in result
assert result.startswith("sea [sˈiː]")
def test_multi_word_trailing_garbled(self):
"""R21: 'seat [sˈiːt] belt si:t belt' → keep 'belt', remove garbled."""
from cv_ocr_engines import _strip_post_bracket_garbled
result = _strip_post_bracket_garbled("seat [sˈiːt] belt si:t belt")
assert "belt" in result # real word kept
assert "si:t" not in result # garbled removed
# Should contain "seat [sˈiːt] belt" but not the garbled duplication
assert result.count("belt") == 1
def test_delimiter_after_bracket_kept(self):
"""Delimiters after IPA bracket are kept."""
from cv_ocr_engines import _strip_post_bracket_garbled
result = _strip_post_bracket_garbled("dance [dˈɑːns] tanzen")
assert " tanzen" in result
def test_german_after_bracket_kept(self):
"""German words (uppercase) after IPA bracket are kept."""
from cv_ocr_engines import _strip_post_bracket_garbled
result = _strip_post_bracket_garbled("badge [bædʒ] Abzeichen")
assert "Abzeichen" in result
class TestFixCellPhonetics: class TestFixCellPhonetics:
"""Tests for fix_cell_phonetics function.""" """Tests for fix_cell_phonetics function."""

View File

@@ -415,3 +415,53 @@ class TestDetectAndCropPage:
assert 0 <= pct["y"] <= 100 assert 0 <= pct["y"] <= 100
assert 0 < pct["width"] <= 100 assert 0 < pct["width"] <= 100
assert 0 < pct["height"] <= 100 assert 0 < pct["height"] <= 100
class TestCropDeterminism:
"""A3: Verify that page crop produces identical results across N runs."""
@pytest.mark.parametrize("image_factory,desc", [
(
lambda: _make_image_with_content(800, 600, (100, 700, 80, 520)),
"standard content",
),
(
lambda: _make_book_scan(1000, 800),
"book scan with spine shadow",
),
])
def test_determinism_10_runs(self, image_factory, desc):
"""Same image must produce identical crops in 10 consecutive runs."""
img = image_factory()
results = []
for _ in range(10):
cropped, result = detect_and_crop_page(img.copy())
results.append({
"crop_applied": result["crop_applied"],
"cropped_size": result["cropped_size"],
"border_fractions": result["border_fractions"],
"shape": cropped.shape,
})
first = results[0]
for i, r in enumerate(results[1:], 1):
assert r["crop_applied"] == first["crop_applied"], (
f"Run {i} crop_applied differs from run 0 ({desc})"
)
assert r["cropped_size"] == first["cropped_size"], (
f"Run {i} cropped_size differs from run 0 ({desc})"
)
assert r["shape"] == first["shape"], (
f"Run {i} output shape differs from run 0 ({desc})"
)
def test_determinism_pixel_identical(self):
"""Crop output pixels must be identical across runs."""
img = _make_image_with_content(800, 600, (100, 700, 80, 520))
ref_crop, _ = detect_and_crop_page(img.copy())
for i in range(5):
crop, _ = detect_and_crop_page(img.copy())
assert np.array_equal(ref_crop, crop), (
f"Run {i} produced different pixel output"
)

View File

@@ -84,5 +84,6 @@ nav:
- Zeugnis-System: architecture/zeugnis-system.md - Zeugnis-System: architecture/zeugnis-system.md
- Entwicklung: - Entwicklung:
- Testing: development/testing.md - Testing: development/testing.md
- Regression Testing: development/regression-testing.md
- Dokumentation: development/documentation.md - Dokumentation: development/documentation.md
- CI/CD Pipeline: development/ci-cd-pipeline.md - CI/CD Pipeline: development/ci-cd-pipeline.md

163
scripts/benchmark-trocr.py Executable file
View File

@@ -0,0 +1,163 @@
#!/usr/bin/env python3
"""
TrOCR Baseline Benchmark — measures PyTorch TrOCR performance.
Metrics:
- RAM usage (RSS) before and after model load
- Inference time per line (min, max, mean, p50, p95)
- Model size on disk
Output: JSON report to stdout (redirect to file for Sprint 2 comparison).
Usage:
python scripts/benchmark-trocr.py [--model trocr-base-printed] [--runs 10]
python scripts/benchmark-trocr.py > benchmark-trocr-baseline.json
"""
import argparse
import json
import os
import sys
import time
from datetime import datetime
# Add backend to path for imports
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'klausur-service', 'backend'))
def get_rss_mb():
"""Get current process RSS in MB."""
import resource
# resource.getrusage returns KB on Linux, bytes on macOS
usage = resource.getrusage(resource.RUSAGE_SELF)
rss = usage.ru_maxrss
if sys.platform == 'darwin':
return rss / (1024 * 1024) # bytes to MB on macOS
return rss / 1024 # KB to MB on Linux
def get_model_size_mb(model_name):
"""Estimate model size from HuggingFace cache."""
cache_dir = os.path.expanduser("~/.cache/huggingface/hub")
total = 0
model_dir_pattern = model_name.replace('/', '--')
for root, dirs, files in os.walk(cache_dir):
if model_dir_pattern in root:
for f in files:
total += os.path.getsize(os.path.join(root, f))
return total / (1024 * 1024) # bytes to MB
def benchmark_trocr(model_name: str = "microsoft/trocr-base-printed", num_runs: int = 10):
"""Run TrOCR benchmark and return results dict."""
import numpy as np
from PIL import Image
rss_before = get_rss_mb()
# Load model
print(f"Loading model: {model_name}", file=sys.stderr)
load_start = time.monotonic()
try:
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
processor = TrOCRProcessor.from_pretrained(model_name)
model = VisionEncoderDecoderModel.from_pretrained(model_name)
model.eval()
except Exception as e:
return {"error": f"Failed to load model: {e}"}
load_time = time.monotonic() - load_start
rss_after_load = get_rss_mb()
model_size = get_model_size_mb(model_name)
print(f"Model loaded in {load_time:.1f}s, RSS: {rss_after_load:.0f}MB", file=sys.stderr)
# Create synthetic test images (text line images)
test_images = []
for i in range(num_runs):
# Create a simple white image with black text-like content
# In production, these would be real cropped text lines
w, h = 384, 48 # typical TrOCR input size
img = Image.new('RGB', (w, h), 'white')
# Add some variation
pixels = img.load()
# Simple dark region to simulate text
for x in range(50 + i * 10, 200 + i * 5):
for y in range(10, 38):
pixels[x, y] = (30, 30, 30)
test_images.append(img)
# Warm-up run (not counted)
print("Warm-up...", file=sys.stderr)
import torch
with torch.no_grad():
pixel_values = processor(images=test_images[0], return_tensors="pt").pixel_values
_ = model.generate(pixel_values, max_new_tokens=50)
# Benchmark runs
print(f"Running {num_runs} inference passes...", file=sys.stderr)
times_ms = []
for i, img in enumerate(test_images):
start = time.monotonic()
with torch.no_grad():
pixel_values = processor(images=img, return_tensors="pt").pixel_values
generated_ids = model.generate(pixel_values, max_new_tokens=50)
text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
elapsed_ms = (time.monotonic() - start) * 1000
times_ms.append(elapsed_ms)
print(f" Run {i+1}/{num_runs}: {elapsed_ms:.0f}ms -> '{text[:30]}'", file=sys.stderr)
rss_after_inference = get_rss_mb()
# Compute stats
times_sorted = sorted(times_ms)
p50_idx = len(times_sorted) // 2
p95_idx = int(len(times_sorted) * 0.95)
report = {
"benchmark": "trocr-baseline",
"timestamp": datetime.utcnow().isoformat() + "Z",
"model": model_name,
"backend": "pytorch",
"quantization": "float32",
"num_runs": num_runs,
"model_size_mb": round(model_size, 1),
"ram_mb": {
"before_load": round(rss_before, 1),
"after_load": round(rss_after_load, 1),
"after_inference": round(rss_after_inference, 1),
"model_delta": round(rss_after_load - rss_before, 1),
},
"load_time_seconds": round(load_time, 2),
"inference_ms": {
"min": round(min(times_ms), 1),
"max": round(max(times_ms), 1),
"mean": round(sum(times_ms) / len(times_ms), 1),
"p50": round(times_sorted[p50_idx], 1),
"p95": round(times_sorted[min(p95_idx, len(times_sorted) - 1)], 1),
},
"times_ms": [round(t, 1) for t in times_ms],
"platform": {
"python": sys.version.split()[0],
"os": sys.platform,
},
}
return report
def main():
parser = argparse.ArgumentParser(description="TrOCR Baseline Benchmark")
parser.add_argument("--model", default="microsoft/trocr-base-printed",
help="HuggingFace model name")
parser.add_argument("--runs", type=int, default=10,
help="Number of inference runs")
args = parser.parse_args()
report = benchmark_trocr(model_name=args.model, num_runs=args.runs)
print(json.dumps(report, indent=2))
if __name__ == "__main__":
main()

61
scripts/run-regression.sh Executable file
View File

@@ -0,0 +1,61 @@
#!/usr/bin/env bash
# Run OCR pipeline regression tests and exit non-zero on failure.
#
# Usage:
# ./scripts/run-regression.sh # default: macmini:8086
# ./scripts/run-regression.sh http://localhost:8086
#
# Exit codes:
# 0 = all pass
# 1 = failures or errors
# 2 = connection error
set -euo pipefail
BASE_URL="${1:-http://macmini:8086}"
ENDPOINT="${BASE_URL}/api/v1/ocr-pipeline/regression/run?triggered_by=script"
echo "=== OCR Pipeline Regression Suite ==="
echo "Endpoint: ${ENDPOINT}"
echo ""
RESPONSE=$(curl -sf -X POST "${ENDPOINT}" -H "Content-Type: application/json" 2>&1) || {
echo "ERROR: Could not reach ${ENDPOINT}"
exit 2
}
STATUS=$(echo "${RESPONSE}" | python3 -c "import sys,json; print(json.load(sys.stdin)['status'])")
TOTAL=$(echo "${RESPONSE}" | python3 -c "import sys,json; s=json.load(sys.stdin)['summary']; print(s['total'])")
PASSED=$(echo "${RESPONSE}" | python3 -c "import sys,json; s=json.load(sys.stdin)['summary']; print(s['passed'])")
FAILED=$(echo "${RESPONSE}" | python3 -c "import sys,json; s=json.load(sys.stdin)['summary']; print(s['failed'])")
ERRORS=$(echo "${RESPONSE}" | python3 -c "import sys,json; s=json.load(sys.stdin)['summary']; print(s['errors'])")
DURATION=$(echo "${RESPONSE}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('duration_ms', '?'))")
echo "Status: ${STATUS}"
echo "Total: ${TOTAL}"
echo "Passed: ${PASSED}"
echo "Failed: ${FAILED}"
echo "Errors: ${ERRORS}"
echo "Duration: ${DURATION}ms"
echo ""
if [ "${STATUS}" = "pass" ]; then
echo "PASS — All regression tests passed."
exit 0
else
echo "FAIL — Regression failures detected!"
# Print failure details
echo "${RESPONSE}" | python3 -c "
import sys, json
data = json.load(sys.stdin)
for r in data.get('results', []):
if r['status'] != 'pass':
print(f\" {r['status'].upper()}: {r.get('name', r['session_id'])}\")
if 'error' in r:
print(f\" Error: {r['error']}\")
ds = r.get('diff_summary', {})
if ds:
print(f\" Structural: {ds.get('structural_changes', 0)}, Text: {ds.get('text_changes', 0)}, Missing: {ds.get('cells_missing', 0)}, Added: {ds.get('cells_added', 0)}\")
"
exit 1
fi