feat: Sprint 1 — IPA hardening, regression framework, ground-truth review

Track A (Backend): - Compound word IPA decomposition (schoolbag→school+bag) - Trailing garbled IPA fragment removal after brackets (R21 fix) - Regression runner with DB persistence, history endpoints - Page crop determinism verified with tests Track B (Frontend): - OCR Regression dashboard (/ai/ocr-regression) - Ground Truth Review workflow (/ai/ocr-ground-truth) with split-view, confidence highlighting, inline edit, batch mark, progress tracking Track C (Docs): - OCR-Pipeline.md v5.0 (Steps 5e-5h) - Regression testing guide - mkdocs.yml nav update Track D (Infra): - TrOCR baseline benchmark script - run-regression.sh shell script - Migration 008: regression_runs table Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-23 09:21:27 +01:00
parent f5d5d6c59c
commit a1e079b911
13 changed files with 1796 additions and 15 deletions
@@ -0,0 +1,580 @@
+'use client'
+
+/**
+ * Ground-Truth Review Workflow
+ *
+ * Efficient mass-review of OCR sessions:
+ * - Session queue with auto-advance
+ * - Split-view: original image left, grid right
+ * - Confidence highlighting on cells
+ * - Quick-accept per row
+ * - Inline cell editing
+ * - Batch mark as ground truth
+ * - Progress tracking
+ */
+
+import { useState, useEffect, useCallback, useRef } from 'react'
+import { PagePurpose } from '@/components/common/PagePurpose'
+import { AIToolsSidebarResponsive } from '@/components/ai/AIToolsSidebar'
+
+const KLAUSUR_API = '/klausur-api'
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+interface Session {
+  id: string
+  name: string
+  filename: string
+  status: string
+  created_at: string
+  document_category: string | null
+  has_ground_truth: boolean
+}
+
+interface GridZone {
+  zone_id: string
+  zone_type: string
+  columns: Array<{ col_index: number; col_type: string; header: string }>
+  rows: Array<{ row_index: number; is_header: boolean }>
+  cells: GridCell[]
+}
+
+interface GridCell {
+  cell_id: string
+  row_index: number
+  col_index: number
+  col_type: string
+  text: string
+  confidence?: number
+  is_bold?: boolean
+}
+
+interface GridResult {
+  zones: GridZone[]
+  summary?: {
+    total_zones: number
+    total_columns: number
+    total_rows: number
+    total_cells: number
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function confidenceColor(conf: number | undefined): string {
+  if (conf === undefined) return ''
+  if (conf >= 80) return 'bg-emerald-50'
+  if (conf >= 50) return 'bg-amber-50'
+  return 'bg-red-50'
+}
+
+function confidenceBorder(conf: number | undefined): string {
+  if (conf === undefined) return 'border-slate-200'
+  if (conf >= 80) return 'border-emerald-200'
+  if (conf >= 50) return 'border-amber-300'
+  return 'border-red-300'
+}
+
+// ---------------------------------------------------------------------------
+// Component
+// ---------------------------------------------------------------------------
+
+export default function GroundTruthReviewPage() {
+  // Session list & queue
+  const [allSessions, setAllSessions] = useState<Session[]>([])
+  const [filter, setFilter] = useState<'all' | 'unreviewed' | 'reviewed'>('unreviewed')
+  const [currentIdx, setCurrentIdx] = useState(0)
+  const [loading, setLoading] = useState(true)
+
+  // Current session data
+  const [grid, setGrid] = useState<GridResult | null>(null)
+  const [loadingGrid, setLoadingGrid] = useState(false)
+  const [editingCell, setEditingCell] = useState<string | null>(null)
+  const [editText, setEditText] = useState('')
+  const [acceptedRows, setAcceptedRows] = useState<Set<string>>(new Set())
+  const [zoom, setZoom] = useState(100)
+
+  // Batch operations
+  const [selectedSessions, setSelectedSessions] = useState<Set<string>>(new Set())
+  const [marking, setMarking] = useState(false)
+  const [markResult, setMarkResult] = useState<string | null>(null)
+
+  // Stats
+  const [reviewedCount, setReviewedCount] = useState(0)
+  const [totalCount, setTotalCount] = useState(0)
+
+  const imageRef = useRef<HTMLDivElement>(null)
+
+  // Load all sessions
+  const loadSessions = useCallback(async () => {
+    setLoading(true)
+    try {
+      const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions?limit=200`)
+      if (!res.ok) return
+      const data = await res.json()
+      const sessions: Session[] = (data.sessions || []).map((s: any) => ({
+        id: s.id,
+        name: s.name || '',
+        filename: s.filename || '',
+        status: s.status || 'active',
+        created_at: s.created_at || '',
+        document_category: s.document_category || null,
+        has_ground_truth: !!(s.ground_truth && s.ground_truth.build_grid_reference),
+      }))
+      setAllSessions(sessions)
+      setTotalCount(sessions.length)
+      setReviewedCount(sessions.filter(s => s.has_ground_truth).length)
+    } catch (e) {
+      console.error('Failed to load sessions:', e)
+    } finally {
+      setLoading(false)
+    }
+  }, [])
+
+  useEffect(() => { loadSessions() }, [loadSessions])
+
+  // Filtered sessions
+  const filteredSessions = allSessions.filter(s => {
+    if (filter === 'unreviewed') return !s.has_ground_truth && s.status === 'active'
+    if (filter === 'reviewed') return s.has_ground_truth
+    return true
+  })
+
+  const currentSession = filteredSessions[currentIdx] || null
+
+  // Load grid for current session
+  const loadGrid = useCallback(async (sessionId: string) => {
+    setLoadingGrid(true)
+    setGrid(null)
+    setAcceptedRows(new Set())
+    setEditingCell(null)
+    try {
+      const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/grid-editor`)
+      if (res.ok) {
+        const data = await res.json()
+        setGrid(data.grid || data)
+      }
+    } catch (e) {
+      console.error('Failed to load grid:', e)
+    } finally {
+      setLoadingGrid(false)
+    }
+  }, [])
+
+  useEffect(() => {
+    if (currentSession) loadGrid(currentSession.id)
+  }, [currentSession, loadGrid])
+
+  // Navigation
+  const goNext = () => {
+    if (currentIdx < filteredSessions.length - 1) setCurrentIdx(currentIdx + 1)
+  }
+  const goPrev = () => {
+    if (currentIdx > 0) setCurrentIdx(currentIdx - 1)
+  }
+
+  // Accept row
+  const acceptRow = (zoneId: string, rowIdx: number) => {
+    const key = `${zoneId}-${rowIdx}`
+    setAcceptedRows(prev => new Set([...prev, key]))
+  }
+
+  // Edit cell
+  const startEdit = (cell: GridCell) => {
+    setEditingCell(cell.cell_id)
+    setEditText(cell.text)
+  }
+
+  const saveEdit = async () => {
+    if (!editingCell || !currentSession) return
+    try {
+      await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${currentSession.id}/update-cell`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ cell_id: editingCell, text: editText }),
+      })
+      // Update local state
+      if (grid) {
+        const newGrid = { ...grid }
+        for (const zone of newGrid.zones) {
+          for (const cell of zone.cells) {
+            if (cell.cell_id === editingCell) {
+              cell.text = editText
+            }
+          }
+        }
+        setGrid(newGrid)
+      }
+    } catch (e) {
+      console.error('Failed to save cell:', e)
+    }
+    setEditingCell(null)
+  }
+
+  // Mark as ground truth
+  const markGroundTruth = async (sessionId: string) => {
+    setMarking(true)
+    setMarkResult(null)
+    try {
+      const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/mark-ground-truth`, {
+        method: 'POST',
+      })
+      if (res.ok) {
+        setMarkResult('success')
+        // Update local session state
+        setAllSessions(prev => prev.map(s =>
+          s.id === sessionId ? { ...s, has_ground_truth: true } : s
+        ))
+        setReviewedCount(prev => prev + 1)
+      } else {
+        setMarkResult('error')
+      }
+    } catch {
+      setMarkResult('error')
+    } finally {
+      setMarking(false)
+    }
+  }
+
+  // Batch mark
+  const batchMark = async () => {
+    setMarking(true)
+    let success = 0
+    for (const sid of selectedSessions) {
+      try {
+        const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}/mark-ground-truth`, {
+          method: 'POST',
+        })
+        if (res.ok) success++
+      } catch { /* skip */ }
+    }
+    setAllSessions(prev => prev.map(s =>
+      selectedSessions.has(s.id) ? { ...s, has_ground_truth: true } : s
+    ))
+    setReviewedCount(prev => prev + success)
+    setSelectedSessions(new Set())
+    setMarking(false)
+    setMarkResult(`${success} Sessions als Ground Truth markiert`)
+    setTimeout(() => setMarkResult(null), 3000)
+  }
+
+  // All cells for current grid
+  const allCells = grid?.zones?.flatMap(z => z.cells) || []
+  const lowConfCells = allCells.filter(c => (c.confidence ?? 100) < 50)
+
+  const imageUrl = currentSession
+    ? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${currentSession.id}/image/original`
+    : null
+
+  return (
+    <AIToolsSidebarResponsive>
+      <div className="max-w-[1600px] mx-auto p-4 space-y-4">
+        <PagePurpose moduleId="ocr-ground-truth" />
+
+        {/* Progress Bar */}
+        <div className="bg-white rounded-lg border border-slate-200 p-4">
+          <div className="flex items-center justify-between mb-2">
+            <h2 className="text-lg font-bold text-slate-900">Ground Truth Review</h2>
+            <span className="text-sm text-slate-500">
+              {reviewedCount} von {totalCount} geprueft ({totalCount > 0 ? Math.round(reviewedCount / totalCount * 100) : 0}%)
+            </span>
+          </div>
+          <div className="w-full bg-slate-100 rounded-full h-2.5">
+            <div
+              className="bg-teal-500 h-2.5 rounded-full transition-all duration-500"
+              style={{ width: `${totalCount > 0 ? (reviewedCount / totalCount) * 100 : 0}%` }}
+            />
+          </div>
+        </div>
+
+        {/* Filter + Queue */}
+        <div className="flex items-center gap-4">
+          <div className="flex gap-1 bg-slate-100 rounded-lg p-1">
+            {(['unreviewed', 'reviewed', 'all'] as const).map(f => (
+              <button
+                key={f}
+                onClick={() => { setFilter(f); setCurrentIdx(0) }}
+                className={`px-3 py-1.5 text-sm rounded-md transition-colors ${
+                  filter === f
+                    ? 'bg-white text-slate-900 shadow-sm font-medium'
+                    : 'text-slate-500 hover:text-slate-700'
+                }`}
+              >
+                {f === 'unreviewed' ? 'Offen' : f === 'reviewed' ? 'Geprueft' : 'Alle'}
+                <span className="ml-1 text-xs text-slate-400">
+                  ({allSessions.filter(s =>
+                    f === 'unreviewed' ? !s.has_ground_truth && s.status === 'active'
+                      : f === 'reviewed' ? s.has_ground_truth
+                        : true
+                  ).length})
+                </span>
+              </button>
+            ))}
+          </div>
+
+          {/* Navigation */}
+          <div className="flex items-center gap-2 ml-auto">
+            <button onClick={goPrev} disabled={currentIdx === 0}
+              className="p-2 rounded hover:bg-slate-100 disabled:opacity-30 disabled:cursor-not-allowed">
+              <svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M15 19l-7-7 7-7" />
+              </svg>
+            </button>
+            <span className="text-sm text-slate-500 min-w-[80px] text-center">
+              {filteredSessions.length > 0 ? `${currentIdx + 1} / ${filteredSessions.length}` : '—'}
+            </span>
+            <button onClick={goNext} disabled={currentIdx >= filteredSessions.length - 1}
+              className="p-2 rounded hover:bg-slate-100 disabled:opacity-30 disabled:cursor-not-allowed">
+              <svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
+              </svg>
+            </button>
+          </div>
+
+          {/* Batch mark button */}
+          {selectedSessions.size > 0 && (
+            <button
+              onClick={batchMark}
+              disabled={marking}
+              className="px-3 py-1.5 bg-teal-600 text-white text-sm rounded-lg hover:bg-teal-700 disabled:opacity-50"
+            >
+              {selectedSessions.size} markieren
+            </button>
+          )}
+        </div>
+
+        {/* Toast */}
+        {markResult && (
+          <div className={`p-3 rounded-lg text-sm ${
+            markResult === 'error' ? 'bg-red-50 text-red-700 border border-red-200'
+              : markResult === 'success' ? 'bg-emerald-50 text-emerald-700 border border-emerald-200'
+                : 'bg-blue-50 text-blue-700 border border-blue-200'
+          }`}>
+            {markResult === 'success' ? 'Als Ground Truth markiert!' : markResult === 'error' ? 'Fehler beim Markieren' : markResult}
+          </div>
+        )}
+
+        {/* Main Content: Split View */}
+        {loading ? (
+          <div className="text-center py-12 text-slate-400">Lade Sessions...</div>
+        ) : !currentSession ? (
+          <div className="text-center py-12 text-slate-400">
+            <p className="text-lg">Keine Sessions in dieser Ansicht</p>
+          </div>
+        ) : (
+          <div className="grid grid-cols-2 gap-4" style={{ minHeight: '70vh' }}>
+            {/* Left: Original Image */}
+            <div className="bg-white rounded-lg border border-slate-200 overflow-hidden flex flex-col">
+              <div className="flex items-center justify-between px-3 py-2 border-b border-slate-100 bg-slate-50">
+                <span className="text-sm font-medium text-slate-700 truncate">
+                  {currentSession.name || currentSession.filename}
+                </span>
+                <div className="flex items-center gap-2">
+                  <button onClick={() => setZoom(z => Math.max(50, z - 25))}
+                    className="px-2 py-0.5 text-xs bg-slate-200 rounded hover:bg-slate-300">-</button>
+                  <span className="text-xs text-slate-500 w-10 text-center">{zoom}%</span>
+                  <button onClick={() => setZoom(z => Math.min(300, z + 25))}
+                    className="px-2 py-0.5 text-xs bg-slate-200 rounded hover:bg-slate-300">+</button>
+                </div>
+              </div>
+              <div ref={imageRef} className="flex-1 overflow-auto p-2">
+                {imageUrl && (
+                  <img
+                    src={imageUrl}
+                    alt="Original scan"
+                    style={{ width: `${zoom}%`, maxWidth: 'none' }}
+                    className="block"
+                  />
+                )}
+              </div>
+            </div>
+
+            {/* Right: Grid Review */}
+            <div className="bg-white rounded-lg border border-slate-200 overflow-hidden flex flex-col">
+              <div className="flex items-center justify-between px-3 py-2 border-b border-slate-100 bg-slate-50">
+                <div className="flex items-center gap-3">
+                  <span className="text-sm font-medium text-slate-700">
+                    {allCells.length} Zellen
+                  </span>
+                  {lowConfCells.length > 0 && (
+                    <span className="text-xs bg-red-100 text-red-700 px-2 py-0.5 rounded-full">
+                      {lowConfCells.length} niedrige Konfidenz
+                    </span>
+                  )}
+                </div>
+                <div className="flex items-center gap-2">
+                  {!currentSession.has_ground_truth && (
+                    <button
+                      onClick={() => markGroundTruth(currentSession.id)}
+                      disabled={marking}
+                      className="px-3 py-1 bg-teal-600 text-white text-xs rounded hover:bg-teal-700 disabled:opacity-50"
+                    >
+                      {marking ? 'Markiere...' : 'Als Ground Truth markieren'}
+                    </button>
+                  )}
+                  {currentSession.has_ground_truth && (
+                    <span className="text-xs bg-emerald-100 text-emerald-700 px-2 py-0.5 rounded-full">
+                      Ground Truth
+                    </span>
+                  )}
+                  <button
+                    onClick={() => { markGroundTruth(currentSession.id); setTimeout(goNext, 500) }}
+                    disabled={marking}
+                    className="px-3 py-1 bg-slate-600 text-white text-xs rounded hover:bg-slate-700 disabled:opacity-50"
+                  >
+                    Markieren & Weiter
+                  </button>
+                </div>
+              </div>
+
+              {/* Grid Content */}
+              <div className="flex-1 overflow-auto">
+                {loadingGrid ? (
+                  <div className="flex items-center justify-center h-full text-slate-400">
+                    <svg className="animate-spin h-6 w-6 mr-2" fill="none" viewBox="0 0 24 24">
+                      <circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
+                      <path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
+                    </svg>
+                    Lade Grid...
+                  </div>
+                ) : !grid || !grid.zones ? (
+                  <div className="text-center py-8 text-slate-400 text-sm">
+                    Kein Grid vorhanden. Bitte zuerst die Pipeline ausfuehren.
+                  </div>
+                ) : (
+                  <div className="p-3 space-y-4">
+                    {grid.zones.map((zone, zi) => (
+                      <div key={zone.zone_id || zi} className="space-y-1">
+                        {/* Zone header */}
+                        <div className="text-xs text-slate-400 uppercase tracking-wide">
+                          Zone {zi + 1} ({zone.zone_type})
+                          {zone.columns?.length > 0 && (
+                            <span className="ml-2">
+                              {zone.columns.map(c => c.col_type.replace('column_', '')).join(' | ')}
+                            </span>
+                          )}
+                        </div>
+
+                        {/* Group cells by row */}
+                        {Array.from(new Set(zone.cells.map(c => c.row_index)))
+                          .sort((a, b) => a - b)
+                          .map(rowIdx => {
+                            const rowCells = zone.cells
+                              .filter(c => c.row_index === rowIdx)
+                              .sort((a, b) => a.col_index - b.col_index)
+                            const rowKey = `${zone.zone_id || zi}-${rowIdx}`
+                            const isAccepted = acceptedRows.has(rowKey)
+
+                            return (
+                              <div
+                                key={rowKey}
+                                className={`flex items-start gap-1 group ${isAccepted ? 'opacity-60' : ''}`}
+                              >
+                                {/* Quick accept button */}
+                                <button
+                                  onClick={() => acceptRow(zone.zone_id || String(zi), rowIdx)}
+                                  className={`flex-shrink-0 w-6 h-6 rounded flex items-center justify-center mt-0.5 transition-colors ${
+                                    isAccepted
+                                      ? 'bg-emerald-100 text-emerald-600'
+                                      : 'bg-slate-100 text-slate-400 hover:bg-emerald-100 hover:text-emerald-600'
+                                  }`}
+                                  title="Zeile als korrekt markieren"
+                                >
+                                  <svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                                    <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
+                                  </svg>
+                                </button>
+
+                                {/* Cells */}
+                                <div className="flex-1 flex gap-1 flex-wrap">
+                                  {rowCells.map(cell => (
+                                    <div
+                                      key={cell.cell_id}
+                                      className={`flex-1 min-w-[80px] px-2 py-1 rounded text-sm border cursor-pointer transition-colors
+                                        ${confidenceColor(cell.confidence)}
+                                        ${confidenceBorder(cell.confidence)}
+                                        ${editingCell === cell.cell_id ? 'ring-2 ring-teal-400' : 'hover:border-teal-300'}
+                                        ${cell.is_bold ? 'font-bold' : ''}
+                                      `}
+                                      onClick={() => !isAccepted && startEdit(cell)}
+                                      title={`Konfidenz: ${cell.confidence ?? '?'}% | ${cell.col_type}`}
+                                    >
+                                      {editingCell === cell.cell_id ? (
+                                        <input
+                                          autoFocus
+                                          value={editText}
+                                          onChange={e => setEditText(e.target.value)}
+                                          onBlur={saveEdit}
+                                          onKeyDown={e => {
+                                            if (e.key === 'Enter') saveEdit()
+                                            if (e.key === 'Escape') setEditingCell(null)
+                                          }}
+                                          className="w-full bg-transparent outline-none text-sm"
+                                        />
+                                      ) : (
+                                        <span className={cell.text ? '' : 'text-slate-300 italic'}>
+                                          {cell.text || '(leer)'}
+                                        </span>
+                                      )}
+                                    </div>
+                                  ))}
+                                </div>
+                              </div>
+                            )
+                          })}
+                      </div>
+                    ))}
+                  </div>
+                )}
+              </div>
+            </div>
+          </div>
+        )}
+
+        {/* Session List (collapsed) */}
+        {filteredSessions.length > 1 && (
+          <details className="bg-white rounded-lg border border-slate-200">
+            <summary className="px-4 py-3 cursor-pointer text-sm font-medium text-slate-700 hover:bg-slate-50">
+              Session-Liste ({filteredSessions.length})
+            </summary>
+            <div className="border-t border-slate-100 max-h-60 overflow-y-auto">
+              {filteredSessions.map((s, idx) => (
+                <div
+                  key={s.id}
+                  className={`flex items-center gap-3 px-4 py-2 text-sm cursor-pointer hover:bg-slate-50 border-b border-slate-50 ${
+                    idx === currentIdx ? 'bg-teal-50' : ''
+                  }`}
+                  onClick={() => setCurrentIdx(idx)}
+                >
+                  <input
+                    type="checkbox"
+                    checked={selectedSessions.has(s.id)}
+                    onChange={e => {
+                      e.stopPropagation()
+                      setSelectedSessions(prev => {
+                        const next = new Set(prev)
+                        if (next.has(s.id)) next.delete(s.id)
+                        else next.add(s.id)
+                        return next
+                      })
+                    }}
+                    className="rounded border-slate-300"
+                  />
+                  <span className={`w-2 h-2 rounded-full flex-shrink-0 ${s.has_ground_truth ? 'bg-emerald-400' : 'bg-slate-300'}`} />
+                  <span className="truncate flex-1">{s.name || s.filename || s.id}</span>
+                  {s.document_category && (
+                    <span className="text-xs bg-slate-100 px-1.5 py-0.5 rounded text-slate-500">{s.document_category}</span>
+                  )}
+                </div>
+              ))}
+            </div>
+          </details>
+        )}
+      </div>
+    </AIToolsSidebarResponsive>
+  )
+}
@@ -0,0 +1,391 @@
+'use client'
+
+/**
+ * OCR Regression Dashboard
+ *
+ * Shows all ground-truth sessions, runs regression tests,
+ * displays pass/fail results with diff details, and shows history.
+ */
+
+import { useState, useEffect, useCallback } from 'react'
+import { PagePurpose } from '@/components/common/PagePurpose'
+import { AIToolsSidebarResponsive } from '@/components/ai/AIToolsSidebar'
+
+const KLAUSUR_API = '/klausur-api'
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+interface GTSession {
+  session_id: string
+  name: string
+  filename: string
+  document_category: string | null
+  pipeline: string | null
+  saved_at: string | null
+  summary: {
+    total_zones: number
+    total_columns: number
+    total_rows: number
+    total_cells: number
+  }
+}
+
+interface DiffSummary {
+  structural_changes: number
+  cells_missing: number
+  cells_added: number
+  text_changes: number
+  col_type_changes: number
+}
+
+interface RegressionResult {
+  session_id: string
+  name: string
+  status: 'pass' | 'fail' | 'error'
+  error?: string
+  diff_summary?: DiffSummary
+  reference_summary?: Record<string, number>
+  current_summary?: Record<string, number>
+  structural_diffs?: Array<{ field: string; reference: number; current: number }>
+  cell_diffs?: Array<{ type: string; cell_id: string; reference?: string; current?: string }>
+}
+
+interface RegressionRun {
+  id: string
+  run_at: string
+  status: string
+  total: number
+  passed: number
+  failed: number
+  errors: number
+  duration_ms: number
+  triggered_by: string
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function StatusBadge({ status }: { status: string }) {
+  const cls =
+    status === 'pass'
+      ? 'bg-emerald-100 text-emerald-800 border-emerald-200'
+      : status === 'fail'
+        ? 'bg-red-100 text-red-800 border-red-200'
+        : 'bg-amber-100 text-amber-800 border-amber-200'
+  return (
+    <span className={`inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium border ${cls}`}>
+      {status === 'pass' ? 'Pass' : status === 'fail' ? 'Fail' : 'Error'}
+    </span>
+  )
+}
+
+function formatDate(iso: string | null) {
+  if (!iso) return '—'
+  return new Date(iso).toLocaleString('de-DE', {
+    day: '2-digit', month: '2-digit', year: 'numeric',
+    hour: '2-digit', minute: '2-digit',
+  })
+}
+
+// ---------------------------------------------------------------------------
+// Component
+// ---------------------------------------------------------------------------
+
+export default function OCRRegressionPage() {
+  const [sessions, setSessions] = useState<GTSession[]>([])
+  const [results, setResults] = useState<RegressionResult[]>([])
+  const [history, setHistory] = useState<RegressionRun[]>([])
+  const [running, setRunning] = useState(false)
+  const [overallStatus, setOverallStatus] = useState<string | null>(null)
+  const [durationMs, setDurationMs] = useState<number | null>(null)
+  const [expandedSession, setExpandedSession] = useState<string | null>(null)
+  const [tab, setTab] = useState<'current' | 'history'>('current')
+
+  // Load ground-truth sessions
+  const loadSessions = useCallback(async () => {
+    try {
+      const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/ground-truth-sessions`)
+      if (res.ok) {
+        const data = await res.json()
+        setSessions(data.sessions || [])
+      }
+    } catch (e) {
+      console.error('Failed to load GT sessions:', e)
+    }
+  }, [])
+
+  // Load history
+  const loadHistory = useCallback(async () => {
+    try {
+      const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/regression/history?limit=20`)
+      if (res.ok) {
+        const data = await res.json()
+        setHistory(data.runs || [])
+      }
+    } catch (e) {
+      console.error('Failed to load history:', e)
+    }
+  }, [])
+
+  useEffect(() => {
+    loadSessions()
+    loadHistory()
+  }, [loadSessions, loadHistory])
+
+  // Run all regressions
+  const runAll = async () => {
+    setRunning(true)
+    setResults([])
+    setOverallStatus(null)
+    setDurationMs(null)
+    try {
+      const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/regression/run?triggered_by=manual`, {
+        method: 'POST',
+      })
+      if (res.ok) {
+        const data = await res.json()
+        setResults(data.results || [])
+        setOverallStatus(data.status)
+        setDurationMs(data.duration_ms)
+        loadHistory()
+      }
+    } catch (e) {
+      console.error('Regression run failed:', e)
+      setOverallStatus('error')
+    } finally {
+      setRunning(false)
+    }
+  }
+
+  const totalPass = results.filter(r => r.status === 'pass').length
+  const totalFail = results.filter(r => r.status === 'fail').length
+  const totalError = results.filter(r => r.status === 'error').length
+
+  return (
+    <AIToolsSidebarResponsive>
+      <div className="max-w-7xl mx-auto p-6 space-y-6">
+        <PagePurpose moduleId="ocr-regression" />
+
+        {/* Header + Run Button */}
+        <div className="flex items-center justify-between">
+          <div>
+            <h1 className="text-2xl font-bold text-slate-900">OCR Regression Tests</h1>
+            <p className="text-sm text-slate-500 mt-1">
+              {sessions.length} Ground-Truth Session{sessions.length !== 1 ? 's' : ''}
+            </p>
+          </div>
+          <button
+            onClick={runAll}
+            disabled={running || sessions.length === 0}
+            className="inline-flex items-center gap-2 px-4 py-2.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 disabled:opacity-50 disabled:cursor-not-allowed font-medium transition-colors"
+          >
+            {running ? (
+              <>
+                <svg className="animate-spin h-4 w-4" fill="none" viewBox="0 0 24 24">
+                  <circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
+                  <path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
+                </svg>
+                Laeuft...
+              </>
+            ) : (
+              'Alle Tests starten'
+            )}
+          </button>
+        </div>
+
+        {/* Overall Result Banner */}
+        {overallStatus && (
+          <div className={`rounded-lg p-4 border ${
+            overallStatus === 'pass'
+              ? 'bg-emerald-50 border-emerald-200'
+              : 'bg-red-50 border-red-200'
+          }`}>
+            <div className="flex items-center justify-between">
+              <div className="flex items-center gap-3">
+                <StatusBadge status={overallStatus} />
+                <span className="font-medium text-slate-900">
+                  {totalPass} bestanden, {totalFail} fehlgeschlagen, {totalError} Fehler
+                </span>
+              </div>
+              {durationMs !== null && (
+                <span className="text-sm text-slate-500">{(durationMs / 1000).toFixed(1)}s</span>
+              )}
+            </div>
+          </div>
+        )}
+
+        {/* Tabs */}
+        <div className="border-b border-slate-200">
+          <nav className="flex gap-4">
+            {(['current', 'history'] as const).map(t => (
+              <button
+                key={t}
+                onClick={() => setTab(t)}
+                className={`pb-3 px-1 text-sm font-medium border-b-2 transition-colors ${
+                  tab === t
+                    ? 'border-teal-500 text-teal-600'
+                    : 'border-transparent text-slate-500 hover:text-slate-700'
+                }`}
+              >
+                {t === 'current' ? 'Aktuelle Ergebnisse' : 'Verlauf'}
+              </button>
+            ))}
+          </nav>
+        </div>
+
+        {/* Current Results Tab */}
+        {tab === 'current' && (
+          <div className="space-y-3">
+            {results.length === 0 && !running && (
+              <div className="text-center py-12 text-slate-400">
+                <p className="text-lg">Keine Ergebnisse</p>
+                <p className="text-sm mt-1">Klicken Sie &quot;Alle Tests starten&quot; um die Regression zu laufen.</p>
+              </div>
+            )}
+            {results.map(r => (
+              <div
+                key={r.session_id}
+                className="bg-white rounded-lg border border-slate-200 overflow-hidden"
+              >
+                <div
+                  className="flex items-center justify-between px-4 py-3 cursor-pointer hover:bg-slate-50 transition-colors"
+                  onClick={() => setExpandedSession(expandedSession === r.session_id ? null : r.session_id)}
+                >
+                  <div className="flex items-center gap-3 min-w-0">
+                    <StatusBadge status={r.status} />
+                    <span className="font-medium text-slate-900 truncate">{r.name || r.session_id}</span>
+                  </div>
+                  <div className="flex items-center gap-4 text-sm text-slate-500">
+                    {r.diff_summary && (
+                      <span>
+                        {r.diff_summary.text_changes} Text, {r.diff_summary.structural_changes} Struktur
+                      </span>
+                    )}
+                    {r.error && <span className="text-red-500">{r.error}</span>}
+                    <svg className={`w-4 h-4 transition-transform ${expandedSession === r.session_id ? 'rotate-180' : ''}`} fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                      <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 9l-7 7-7-7" />
+                    </svg>
+                  </div>
+                </div>
+
+                {/* Expanded Details */}
+                {expandedSession === r.session_id && r.status === 'fail' && (
+                  <div className="border-t border-slate-100 px-4 py-3 bg-slate-50 space-y-3">
+                    {/* Structural Diffs */}
+                    {r.structural_diffs && r.structural_diffs.length > 0 && (
+                      <div>
+                        <h4 className="text-xs font-medium text-slate-500 uppercase mb-1">Strukturelle Aenderungen</h4>
+                        <div className="space-y-1">
+                          {r.structural_diffs.map((d, i) => (
+                            <div key={i} className="text-sm">
+                              <span className="font-mono text-slate-600">{d.field}</span>: {d.reference} → {d.current}
+                            </div>
+                          ))}
+                        </div>
+                      </div>
+                    )}
+                    {/* Cell Diffs */}
+                    {r.cell_diffs && r.cell_diffs.length > 0 && (
+                      <div>
+                        <h4 className="text-xs font-medium text-slate-500 uppercase mb-1">
+                          Zellen-Aenderungen ({r.cell_diffs.length})
+                        </h4>
+                        <div className="max-h-60 overflow-y-auto space-y-1">
+                          {r.cell_diffs.slice(0, 50).map((d, i) => (
+                            <div key={i} className="text-sm font-mono bg-white rounded px-2 py-1 border border-slate-100">
+                              <span className={`text-xs px-1 rounded ${
+                                d.type === 'text_change' ? 'bg-amber-100 text-amber-700'
+                                  : d.type === 'cell_missing' ? 'bg-red-100 text-red-700'
+                                    : 'bg-blue-100 text-blue-700'
+                              }`}>
+                                {d.type}
+                              </span>{' '}
+                              <span className="text-slate-500">{d.cell_id}</span>
+                              {d.reference && (
+                                <>
+                                  {' '}<span className="line-through text-red-400">{d.reference}</span>
+                                </>
+                              )}
+                              {d.current && (
+                                <>
+                                  {' '}<span className="text-emerald-600">{d.current}</span>
+                                </>
+                              )}
+                            </div>
+                          ))}
+                          {r.cell_diffs.length > 50 && (
+                            <p className="text-xs text-slate-400">... und {r.cell_diffs.length - 50} weitere</p>
+                          )}
+                        </div>
+                      </div>
+                    )}
+                  </div>
+                )}
+              </div>
+            ))}
+
+            {/* Ground Truth Sessions Overview (when no results yet) */}
+            {results.length === 0 && sessions.length > 0 && (
+              <div>
+                <h3 className="text-sm font-medium text-slate-700 mb-2">Ground-Truth Sessions</h3>
+                <div className="grid gap-2">
+                  {sessions.map(s => (
+                    <div key={s.session_id} className="bg-white rounded-lg border border-slate-200 px-4 py-3 flex items-center justify-between">
+                      <div>
+                        <span className="font-medium text-slate-900">{s.name || s.session_id}</span>
+                        <span className="text-sm text-slate-400 ml-2">{s.filename}</span>
+                      </div>
+                      <div className="text-sm text-slate-500">
+                        {s.summary.total_cells} Zellen, {s.summary.total_zones} Zonen
+                        {s.pipeline && <span className="ml-2 text-xs bg-slate-100 px-1.5 py-0.5 rounded">{s.pipeline}</span>}
+                      </div>
+                    </div>
+                  ))}
+                </div>
+              </div>
+            )}
+          </div>
+        )}
+
+        {/* History Tab */}
+        {tab === 'history' && (
+          <div className="space-y-2">
+            {history.length === 0 ? (
+              <p className="text-center py-8 text-slate-400">Noch keine Laeufe aufgezeichnet.</p>
+            ) : (
+              <table className="w-full text-sm">
+                <thead>
+                  <tr className="border-b border-slate-200 text-left text-slate-500">
+                    <th className="pb-2 font-medium">Datum</th>
+                    <th className="pb-2 font-medium">Status</th>
+                    <th className="pb-2 font-medium text-right">Gesamt</th>
+                    <th className="pb-2 font-medium text-right">Pass</th>
+                    <th className="pb-2 font-medium text-right">Fail</th>
+                    <th className="pb-2 font-medium text-right">Dauer</th>
+                    <th className="pb-2 font-medium">Trigger</th>
+                  </tr>
+                </thead>
+                <tbody>
+                  {history.map(run => (
+                    <tr key={run.id} className="border-b border-slate-100 hover:bg-slate-50">
+                      <td className="py-2">{formatDate(run.run_at)}</td>
+                      <td className="py-2"><StatusBadge status={run.status} /></td>
+                      <td className="py-2 text-right">{run.total}</td>
+                      <td className="py-2 text-right text-emerald-600">{run.passed}</td>
+                      <td className="py-2 text-right text-red-600">{run.failed + run.errors}</td>
+                      <td className="py-2 text-right text-slate-500">{(run.duration_ms / 1000).toFixed(1)}s</td>
+                      <td className="py-2 text-slate-400">{run.triggered_by}</td>
+                    </tr>
+                  ))}
+                </tbody>
+              </table>
+            )}
+          </div>
+        )}
+      </div>
+    </AIToolsSidebarResponsive>
+  )
+}
@@ -182,6 +182,24 @@ export const navigation: NavCategory[] = [
      // -----------------------------------------------------------------------
      // KI-Anwendungen: Endnutzer-orientierte KI-Module
      // -----------------------------------------------------------------------
+      {
+        id: 'ocr-regression',
+        name: 'OCR Regression',
+        href: '/ai/ocr-regression',
+        description: 'Regressions-Tests & Ground Truth',
+        purpose: 'Regressions-Tests fuer die OCR-Pipeline ausfuehren. Zeigt Pass/Fail pro Ground-Truth Session, Diff-Details und Verlauf vergangener Laeufe.',
+        audience: ['Entwickler', 'QA'],
+        subgroup: 'KI-Werkzeuge',
+      },
+      {
+        id: 'ocr-ground-truth',
+        name: 'Ground Truth Review',
+        href: '/ai/ocr-ground-truth',
+        description: 'Ground Truth pruefen & markieren',
+        purpose: 'Effiziente Massenpruefung von OCR-Sessions. Split-View mit Confidence-Highlighting, Quick-Accept und Batch-Markierung als Ground Truth.',
+        audience: ['Entwickler', 'QA'],
+        subgroup: 'KI-Werkzeuge',
+      },
      {
        id: 'agents',
        name: 'Agent Management',
@@ -0,0 +1,166 @@
+# OCR Pipeline Regression Testing
+
+**Stand:** 2026-03-23
+
+---
+
+## Uebersicht
+
+Das Regression Framework stellt sicher, dass Aenderungen an der OCR-Pipeline keine bestehenden
+Ergebnisse verschlechtern. Ground-Truth Sessions dienen als Referenz — nach jeder Code-Aenderung
+wird die Pipeline neu ausgefuehrt und das Ergebnis mit der Referenz verglichen.
+
+---
+
+## Ground Truth markieren
+
+### Via Admin-UI (empfohlen)
+
+1. Oeffne die OCR Pipeline: [/ai/ocr-pipeline](https://macmini:3002/ai/ocr-pipeline)
+2. Lade eine Session und fuehre alle Pipeline-Schritte aus
+3. Pruefe das Ergebnis im Grid Editor (Schritt 10)
+4. Korrigiere Fehler falls noetig (Inline-Edit)
+5. Klicke **"Als Ground Truth markieren"**
+
+### Via API
+
+```bash
+# Bestehende Session als Ground Truth markieren
+curl -X POST "http://macmini:8086/api/v1/ocr-pipeline/sessions/{session_id}/mark-ground-truth"
+
+# Ground Truth entfernen
+curl -X DELETE "http://macmini:8086/api/v1/ocr-pipeline/sessions/{session_id}/mark-ground-truth"
+
+# Alle Ground-Truth Sessions auflisten
+curl "http://macmini:8086/api/v1/ocr-pipeline/ground-truth-sessions"
+```
+
+### Via Ground-Truth Review UI
+
+Fuer die Massenpruefung von 50-100 Sessions:
+
+1. Oeffne [/ai/ocr-ground-truth](https://macmini:3002/ai/ocr-ground-truth)
+2. Filter auf "Offen" (ungeprueft)
+3. Split-View: Bild links, Grid rechts pruefen
+4. Korrekte Zeilen mit Haekchen bestaetigen
+5. Fehler inline korrigieren
+6. "Markieren & Weiter" fuer naechste Session
+
+---
+
+## Regression ausfuehren
+
+### Via Shell-Script (CI/CD)
+
+```bash
+# Standard: macmini:8086
+./scripts/run-regression.sh
+
+# Custom URL
+./scripts/run-regression.sh http://localhost:8086
+
+# Exit-Codes:
+# 0 = alle bestanden
+# 1 = Fehler gefunden
+# 2 = Verbindungsfehler
+```
+
+### Via Admin-UI
+
+1. Oeffne [/ai/ocr-regression](https://macmini:3002/ai/ocr-regression)
+2. Klicke **"Alle Tests starten"**
+3. Ergebnis: Pass/Fail pro Session mit Diff-Details
+
+### Via API
+
+```bash
+# Alle Ground-Truth Sessions testen
+curl -X POST "http://macmini:8086/api/v1/ocr-pipeline/regression/run?triggered_by=script"
+
+# Einzelne Session testen
+curl -X POST "http://macmini:8086/api/v1/ocr-pipeline/sessions/{session_id}/regression/run"
+
+# Verlauf abrufen
+curl "http://macmini:8086/api/v1/ocr-pipeline/regression/history?limit=20"
+```
+
+---
+
+## Ergebnisse lesen
+
+### Diff-Typen
+
+| Typ | Beschreibung |
+|-----|-------------|
+| `structural_changes` | Anzahl Zonen, Spalten oder Zeilen hat sich geaendert |
+| `text_change` | Text einer Zelle hat sich geaendert |
+| `cell_missing` | Zelle war in der Referenz, fehlt jetzt |
+| `cell_added` | Neue Zelle die in der Referenz nicht existierte |
+| `col_type_change` | Spaltentyp einer Zelle hat sich geaendert |
+
+### Status-Bewertung
+
+- **pass**: Keine Diffs → Code-Aenderung hat keine Auswirkung
+- **fail**: Diffs gefunden → pruefen ob gewollt (Feature) oder ungewollt (Regression)
+- **error**: Pipeline-Fehler → Build oder Config-Problem
+
+### Verlauf
+
+Alle Laeufe werden in der Tabelle `regression_runs` persistiert:
+
+```sql
+SELECT id, run_at, status, total, passed, failed, errors, duration_ms, triggered_by
+FROM regression_runs
+ORDER BY run_at DESC
+LIMIT 10;
+```
+
+---
+
+## Best Practices
+
+### Ground-Truth Sessions waehlen
+
+Decke verschiedene Seitentypen ab:
+
+- Woerterbuchseiten (2-3 Spalten, IPA-Klammern)
+- Uebungsseiten (Tabellen, Checkboxen)
+- Seiten mit Illustrationen
+- Seiten ohne IPA (reines Deutsch-Vokabular)
+- Verschiedene Verlage und Layouts
+
+### Workflow vor jedem Commit
+
+```bash
+# 1. Regression laufen lassen
+./scripts/run-regression.sh
+
+# 2. Bei Failure: Diff pruefen
+#    - Gewollte Aenderung? → Ground Truth aktualisieren
+#    - Ungewollte Regression? → Code fixen
+
+# 3. Bei Pass: Commit
+git add . && git commit -m "fix: ..."
+```
+
+---
+
+## Datenbank-Schema
+
+```sql
+CREATE TABLE regression_runs (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    run_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+    status VARCHAR(20) NOT NULL,          -- pass, fail, error
+    total INT NOT NULL DEFAULT 0,
+    passed INT NOT NULL DEFAULT 0,
+    failed INT NOT NULL DEFAULT 0,
+    errors INT NOT NULL DEFAULT 0,
+    duration_ms INT,
+    results JSONB NOT NULL DEFAULT '[]',  -- Detail-Ergebnisse pro Session
+    triggered_by VARCHAR(50) DEFAULT 'manual'
+);
+```
+
+Ground-Truth Referenzen werden im `ground_truth` JSONB-Feld der
+`ocr_pipeline_sessions` Tabelle gespeichert.
@@ -1,7 +1,7 @@
 # OCR Pipeline - Schrittweise Seitenrekonstruktion

-**Version:** 4.7.0
-**Status:** Produktiv (Schritte 1–10 + Grid Editor implementiert)
+**Version:** 5.0.0
+**Status:** Produktiv (Schritte 1–10 + Grid Editor + Regression Framework)
 **URL:** https://macmini:3002/ai/ocr-pipeline

 ## Uebersicht
@@ -1197,6 +1197,62 @@ des Headwords der vorherigen Zeile). Diese werden von PaddleOCR als garbled Text
 4. Schlaegt IPA im Britfone-Woerterbuch nach
 5. Beruecksichtigt alle Wortteile (z.B. "close sth. down" → `[klˈəʊz dˈaʊn]`)

+### Compound Word IPA Decomposition (Step 5e)
+
+Zusammengesetzte Woerter wie "schoolbag" oder "blackbird" haben oft keinen eigenen
+IPA-Eintrag im Woerterbuch. Die Funktion `_decompose_compound()` zerlegt sie:
+
+1. Probiere jede Teilungsposition (min. 3 Zeichen pro Teil)
+2. Wenn beide Teile im Woerterbuch stehen → IPA verketten
+3. Waehle die Teilung mit dem laengsten ersten Teil
+
+**Beispiele:**
+
+| Eingabe | Zerlegung | IPA |
+|---------|-----------|-----|
+| schoolbag | school + bag | skˈuːl + bæɡ |
+| blackbird | black + bird | blæk + bˈɜːd |
+| ice-cream | ice + cream | aɪs + kɹˈiːm |
+
+### Trailing Garbled Fragment Removal (Step 5f)
+
+Nach korrekt erkanntem IPA (z.B. `seat [sˈiːt]`) haengt OCR manchmal
+eine garbled Kopie der IPA-Transkription an: `seat [sˈiːt] belt si:t belt`.
+
+**`_strip_post_bracket_garbled()`** erkennt und entfernt diese:
+
+1. Alles nach dem letzten `]` scannen
+2. Woerter mit IPA-Markern (`:`, `ə`, `ɪ` etc.) → garbled, entfernen
+3. Echte Woerter (Woerterbuch, Deutsch, Delimiter) → behalten
+4. **Multi-Wort-Headword:** "belt" ist ein echtes Wort, aber wenn danach
+   garbled IPA kommt, wird nur "belt" behalten, der Rest entfernt
+
+### Regression Framework (Step 5g)
+
+Ground-Truth Sessions koennen als Referenz markiert werden. Nach jeder
+Code-Aenderung vergleicht `POST /regression/run` die aktuelle Pipeline-Ausgabe
+mit den gespeicherten Referenzen:
+
+- **Strukturelle Diffs:** Zonen, Spalten, Zeilen (Anzahl-Aenderungen)
+- **Zellen-Diffs:** Text-Aenderungen, fehlende/neue Zellen, col_type-Aenderungen
+- **Persistenz:** Ergebnisse in `regression_runs` Tabelle fuer Trend-Analyse
+- **Shell-Script:** `scripts/run-regression.sh` fuer CI-Integration
+
+Admin-UI: [/ai/ocr-regression](https://macmini:3002/ai/ocr-regression)
+
+### Ground Truth Review Workflow (Step 5h)
+
+Admin-UI fuer effiziente Massenpruefung von Sessions:
+
+- **Split-View:** Original-Bild links, erkannter Grid rechts
+- **Confidence-Highlighting:** Niedrige Konfidenz rot hervorgehoben
+- **Quick-Accept:** Korrekte Zeilen mit einem Klick bestaetigen
+- **Inline-Edit:** Text direkt im Grid korrigieren
+- **Session-Queue:** Automatisch naechste Session laden
+- **Batch-Mark:** Mehrere Sessions gleichzeitig als Ground Truth markieren
+
+Admin-UI: [/ai/ocr-ground-truth](https://macmini:3002/ai/ocr-ground-truth)
+
 ### `en_col_type` Erkennung

 Die Erkennung der Englisch-Headword-Spalte nutzt **Bracket-IPA-Pattern-Count**
@@ -1536,6 +1592,7 @@ cd klausur-service/backend && pytest tests/test_paddle_kombi.py -v  # 36 Tests

 | Datum | Version | Aenderung |
 |-------|---------|----------|
+| 2026-03-23 | 5.0.0 | **Phase 1 Sprint 1:** Compound-IPA-Zerlegung (`_decompose_compound`), Trailing-Garbled-Fragment-Entfernung (Multi-Wort-Headwords), Regression Framework mit DB-Persistenz + History + Shell-Script, Ground-Truth Review Workflow UI, Page-Crop Determinismus verifiziert. Admin-Seiten: `/ai/ocr-regression`, `/ai/ocr-ground-truth`. |
 | 2026-03-20 | 4.7.0 | Grid Editor: Zone Merging ueber Bilder (`image_overlays`), Heading Detection (Farbe + Hoehe), Ghost-Filter (borderless-aware), Oversized Word Box Removal, IPA Phonetic Correction (Britfone), IPA Continuation Detection, `en_col_type` via Bracket-Count. 27 Tests. |
 | 2026-03-16 | 4.6.0 | Strukturerkennung (Schritt 8): Region-basierte Grafikerkennung (`cv_graphic_detect.py`) mit Zwei-Pass-Verfahren (Farbregionen + schwarze Illustrationen), Wort-Ueberlappungs-Filter, Box/Zonen/Farb-Analyse. Schritt laeuft nach Worterkennung. |
 | 2026-03-12 | 4.5.0 | Kombi-Modus (PaddleOCR + Tesseract): Beide Engines laufen parallel, Koordinaten werden IoU-basiert gematcht und confidence-gewichtet gemittelt. Ungematchte Tesseract-Woerter (Bullets, Symbole) werden hinzugefuegt. 3er-Toggle in OCR Overlay. |
@@ -1032,6 +1032,37 @@ def _text_has_garbled_ipa(text: str) -> bool:
    return False


+def _decompose_compound(word: str, pronunciation: str = 'british') -> Optional[str]:
+    """Try to decompose a compound word and concatenate IPA for each part.
+
+    E.g. "schoolbag" → "school"+"bag" → IPA for both concatenated.
+    Only returns IPA if ALL parts are found in the dictionary.
+
+    Tries splits at every position (min 3 chars per part) and picks the
+    split where the first part is longest.
+    """
+    if not IPA_AVAILABLE:
+        return None
+    lower = word.lower().strip()
+    if len(lower) < 6:
+        return None  # too short for a compound
+
+    best_ipa = None
+    best_first_len = 0
+
+    for split_pos in range(3, len(lower) - 2):  # min 3 chars each part
+        first = lower[:split_pos]
+        second = lower[split_pos:]
+        ipa_first = _lookup_ipa(first, pronunciation)
+        ipa_second = _lookup_ipa(second, pronunciation)
+        if ipa_first and ipa_second:
+            if split_pos > best_first_len:
+                best_first_len = split_pos
+                best_ipa = ipa_first + ipa_second
+
+    return best_ipa
+
+
 def _insert_missing_ipa(text: str, pronunciation: str = 'british') -> str:
    """Insert IPA pronunciation for English words that have no brackets at all.

@@ -1077,6 +1108,10 @@ def _insert_missing_ipa(text: str, pronunciation: str = 'british') -> str:
        # Fallback: try without hyphens (e.g. "second-hand" → "secondhand")
        if not ipa and '-' in clean:
            ipa = _lookup_ipa(clean.replace('-', ''), pronunciation)
+        # Fallback 0b: compound word decomposition
+        # E.g. "schoolbag" → "school"+"bag" → concatenated IPA
+        if not ipa:
+            ipa = _decompose_compound(clean, pronunciation)
        # Fallback 1: IPA-marker split for merged tokens where OCR
        # joined headword with its IPA (e.g. "schoolbagsku:lbæg").
        # Find the first IPA marker character (:, æ, ɪ, etc.), walk
@@ -1098,6 +1133,9 @@ def _insert_missing_ipa(text: str, pronunciation: str = 'british') -> str:
                    headword = w[:split]
                    ocr_ipa = w[split:]
                    hw_ipa = _lookup_ipa(headword, pronunciation)
+                    if not hw_ipa:
+                        # Try compound decomposition for the headword part
+                        hw_ipa = _decompose_compound(headword, pronunciation)
                    if hw_ipa:
                        words[i] = f"{headword} [{hw_ipa}]"
                    else:
@@ -1197,6 +1235,12 @@ def _strip_post_bracket_garbled(

    E.g. ``sea [sˈiː] si:`` → ``sea [sˈiː]``
         ``seat [sˈiːt] si:t`` → ``seat [sˈiːt]``
+         ``seat [sˈiːt] belt si:t belt`` → ``seat [sˈiːt] belt``
+
+    For multi-word headwords like "seat belt", a real English word ("belt")
+    may be followed by garbled IPA duplicates.  We detect this by checking
+    whether the sequence after a real word contains IPA markers (`:`, `ə`,
+    etc.) — if so, everything from the first garbled token onward is stripped.
    """
    if ']' not in text:
        return text
@@ -1207,6 +1251,8 @@ def _strip_post_bracket_garbled(
    after = text[last_bracket + 1:].strip()
    if not after:
        return text
+
+    _IPA_MARKER_CHARS = set(':əɪɛɒʊʌæɑɔʃʒθðŋˈˌ')
    after_words = after.split()
    kept: List[str] = []
    for idx, w in enumerate(after_words):
@@ -1215,15 +1261,40 @@ def _strip_post_bracket_garbled(
            kept.extend(after_words[idx:])
            break
        # Contains IPA markers (length mark, IPA chars) — garbled, skip
-        if ':' in w or any(c in w for c in 'əɪɛɒʊʌæɑɔʃʒθðŋˈˌ'):
+        if any(c in w for c in _IPA_MARKER_CHARS):
+            # Everything from here is garbled IPA — stop scanning
+            # but look ahead: if any remaining words are real English
+            # words WITHOUT IPA markers, they might be a different headword
+            # following. Only skip the contiguous garbled run.
            continue
        clean = re.sub(r'[^a-zA-Z]', '', w)
        # Uppercase — likely German, keep rest
        if clean and clean[0].isupper():
            kept.extend(after_words[idx:])
            break
-        # Known English word — keep rest
+        # Known English word — keep it, but check if followed by garbled IPA
+        # (multi-word headword case like "seat [siːt] belt si:t belt")
        if clean and len(clean) >= 2 and _lookup_ipa(clean, pronunciation):
+            # Peek ahead: if next word has IPA markers, the rest is garbled
+            remaining = after_words[idx + 1:]
+            has_garbled_after = any(
+                any(c in rw for c in _IPA_MARKER_CHARS)
+                for rw in remaining
+            )
+            if has_garbled_after:
+                # Keep this real word but stop — rest is garbled duplication
+                kept.append(w)
+                # Still scan for delimiters/German in the remaining words
+                for ridx, rw in enumerate(remaining):
+                    if rw in ('–', '—', '-', '/', '|', ',', ';'):
+                        kept.extend(remaining[ridx:])
+                        break
+                    rclean = re.sub(r'[^a-zA-Z]', '', rw)
+                    if rclean and rclean[0].isupper():
+                        kept.extend(remaining[ridx:])
+                        break
+                break
+            else:
                kept.extend(after_words[idx:])
                break
        # Unknown short word — likely garbled, skip
@@ -0,0 +1,18 @@
+-- Migration 008: Regression test run history
+-- Stores results of regression test runs for trend analysis.
+
+CREATE TABLE IF NOT EXISTS regression_runs (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    run_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+    status VARCHAR(20) NOT NULL,  -- 'pass', 'fail', 'error'
+    total INT NOT NULL DEFAULT 0,
+    passed INT NOT NULL DEFAULT 0,
+    failed INT NOT NULL DEFAULT 0,
+    errors INT NOT NULL DEFAULT 0,
+    duration_ms INT,
+    results JSONB NOT NULL DEFAULT '[]',
+    triggered_by VARCHAR(50) DEFAULT 'manual'  -- 'manual', 'script', 'ci'
+);
+
+CREATE INDEX IF NOT EXISTS idx_regression_runs_run_at
+    ON regression_runs (run_at DESC);
@@ -8,7 +8,11 @@ Lizenz: Apache 2.0
 DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
 """

+import json
 import logging
+import os
+import time
+import uuid
 from datetime import datetime, timezone
 from typing import Any, Dict, List, Optional

@@ -16,6 +20,7 @@ from fastapi import APIRouter, HTTPException, Query

 from grid_editor_api import _build_grid_core
 from ocr_pipeline_session_store import (
+    get_pool,
    get_session_db,
    list_ground_truth_sessions_db,
    update_session_db,
@@ -26,6 +31,60 @@ logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["regression"])


+# ---------------------------------------------------------------------------
+# DB persistence for regression runs
+# ---------------------------------------------------------------------------
+
+async def _init_regression_table():
+    """Ensure regression_runs table exists (idempotent)."""
+    pool = await get_pool()
+    async with pool.acquire() as conn:
+        migration_path = os.path.join(
+            os.path.dirname(__file__),
+            "migrations/008_regression_runs.sql",
+        )
+        if os.path.exists(migration_path):
+            with open(migration_path, "r") as f:
+                sql = f.read()
+            await conn.execute(sql)
+
+
+async def _persist_regression_run(
+    status: str,
+    summary: dict,
+    results: list,
+    duration_ms: int,
+    triggered_by: str = "manual",
+) -> str:
+    """Save a regression run to the database. Returns the run ID."""
+    try:
+        await _init_regression_table()
+        pool = await get_pool()
+        run_id = str(uuid.uuid4())
+        async with pool.acquire() as conn:
+            await conn.execute(
+                """
+                INSERT INTO regression_runs
+                    (id, status, total, passed, failed, errors, duration_ms, results, triggered_by)
+                VALUES ($1, $2, $3, $4, $5, $6, $7, $8::jsonb, $9)
+                """,
+                run_id,
+                status,
+                summary.get("total", 0),
+                summary.get("passed", 0),
+                summary.get("failed", 0),
+                summary.get("errors", 0),
+                duration_ms,
+                json.dumps(results),
+                triggered_by,
+            )
+        logger.info("Regression run %s persisted: %s", run_id, status)
+        return run_id
+    except Exception as e:
+        logger.warning("Failed to persist regression run: %s", e)
+        return ""
+
+
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
@@ -299,8 +358,11 @@ async def run_single_regression(session_id: str):


@router.post("/regression/run")
-async def run_all_regressions():
+async def run_all_regressions(
+    triggered_by: str = Query("manual", description="Who triggered: manual, script, ci"),
+):
    """Re-run build_grid for ALL ground-truth sessions and compare."""
+    start_time = time.monotonic()
    sessions = await list_ground_truth_sessions_db()

    if not sessions:
@@ -370,19 +432,105 @@ async def run_all_regressions():
        results.append(entry)

    overall = "pass" if failed == 0 and errors == 0 else "fail"
+    duration_ms = int((time.monotonic() - start_time) * 1000)

-    logger.info(
-        "Regression suite: %s — %d passed, %d failed, %d errors (of %d)",
-        overall, passed, failed, errors, len(results),
-    )
-
-    return {
-        "status": overall,
-        "results": results,
-        "summary": {
+    summary = {
        "total": len(results),
        "passed": passed,
        "failed": failed,
        "errors": errors,
-        },
    }
+
+    logger.info(
+        "Regression suite: %s — %d passed, %d failed, %d errors (of %d) in %dms",
+        overall, passed, failed, errors, len(results), duration_ms,
+    )
+
+    # Persist to DB
+    run_id = await _persist_regression_run(
+        status=overall,
+        summary=summary,
+        results=results,
+        duration_ms=duration_ms,
+        triggered_by=triggered_by,
+    )
+
+    return {
+        "status": overall,
+        "run_id": run_id,
+        "duration_ms": duration_ms,
+        "results": results,
+        "summary": summary,
+    }
+
+
+@router.get("/regression/history")
+async def get_regression_history(
+    limit: int = Query(20, ge=1, le=100),
+):
+    """Get recent regression run history from the database."""
+    try:
+        await _init_regression_table()
+        pool = await get_pool()
+        async with pool.acquire() as conn:
+            rows = await conn.fetch(
+                """
+                SELECT id, run_at, status, total, passed, failed, errors,
+                       duration_ms, triggered_by
+                FROM regression_runs
+                ORDER BY run_at DESC
+                LIMIT $1
+                """,
+                limit,
+            )
+        return {
+            "runs": [
+                {
+                    "id": str(row["id"]),
+                    "run_at": row["run_at"].isoformat() if row["run_at"] else None,
+                    "status": row["status"],
+                    "total": row["total"],
+                    "passed": row["passed"],
+                    "failed": row["failed"],
+                    "errors": row["errors"],
+                    "duration_ms": row["duration_ms"],
+                    "triggered_by": row["triggered_by"],
+                }
+                for row in rows
+            ],
+            "count": len(rows),
+        }
+    except Exception as e:
+        logger.warning("Failed to fetch regression history: %s", e)
+        return {"runs": [], "count": 0, "error": str(e)}
+
+
+@router.get("/regression/history/{run_id}")
+async def get_regression_run_detail(run_id: str):
+    """Get detailed results of a specific regression run."""
+    try:
+        await _init_regression_table()
+        pool = await get_pool()
+        async with pool.acquire() as conn:
+            row = await conn.fetchrow(
+                "SELECT * FROM regression_runs WHERE id = $1",
+                run_id,
+            )
+        if not row:
+            raise HTTPException(status_code=404, detail="Run not found")
+        return {
+            "id": str(row["id"]),
+            "run_at": row["run_at"].isoformat() if row["run_at"] else None,
+            "status": row["status"],
+            "total": row["total"],
+            "passed": row["passed"],
+            "failed": row["failed"],
+            "errors": row["errors"],
+            "duration_ms": row["duration_ms"],
+            "triggered_by": row["triggered_by"],
+            "results": json.loads(row["results"]) if row["results"] else [],
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
@@ -57,6 +57,63 @@ class TestInsertMissingIpa:
        result = _insert_missing_ipa("Anstecknadel", "british")
        assert result == "Anstecknadel"

+    def test_compound_word_schoolbag_gets_ipa(self):
+        """R07: Compound word 'schoolbag' should get decomposed IPA (school+bag)."""
+        from cv_ocr_engines import _insert_missing_ipa
+        result = _insert_missing_ipa("schoolbag", "british")
+        assert "[" in result and "]" in result
+        assert result.startswith("schoolbag [")
+
+    def test_compound_word_blackbird(self):
+        """Compound word 'blackbird' should get decomposed IPA."""
+        from cv_ocr_engines import _insert_missing_ipa
+        result = _insert_missing_ipa("blackbird", "british")
+        assert "[" in result and "]" in result
+
+    def test_compound_word_too_short(self):
+        """Words shorter than 6 chars should not attempt compound decomposition."""
+        from cv_ocr_engines import _decompose_compound
+        assert _decompose_compound("bag", "british") is None
+
+    def test_decompose_compound_direct(self):
+        """Direct test of _decompose_compound for known compounds."""
+        from cv_ocr_engines import _decompose_compound
+        # schoolbag = school + bag — both should be in dictionary
+        result = _decompose_compound("schoolbag", "british")
+        assert result is not None
+
+
+class TestStripPostBracketGarbled:
+    """Tests for _strip_post_bracket_garbled — trailing garbled IPA removal."""
+
+    def test_simple_trailing_garbled(self):
+        """R21-simple: 'sea [sˈiː] si:' → trailing IPA marker removed."""
+        from cv_ocr_engines import _strip_post_bracket_garbled
+        result = _strip_post_bracket_garbled("sea [sˈiː] si:")
+        assert "si:" not in result
+        assert result.startswith("sea [sˈiː]")
+
+    def test_multi_word_trailing_garbled(self):
+        """R21: 'seat [sˈiːt] belt si:t belt' → keep 'belt', remove garbled."""
+        from cv_ocr_engines import _strip_post_bracket_garbled
+        result = _strip_post_bracket_garbled("seat [sˈiːt] belt si:t belt")
+        assert "belt" in result  # real word kept
+        assert "si:t" not in result  # garbled removed
+        # Should contain "seat [sˈiːt] belt" but not the garbled duplication
+        assert result.count("belt") == 1
+
+    def test_delimiter_after_bracket_kept(self):
+        """Delimiters after IPA bracket are kept."""
+        from cv_ocr_engines import _strip_post_bracket_garbled
+        result = _strip_post_bracket_garbled("dance [dˈɑːns] – tanzen")
+        assert "– tanzen" in result
+
+    def test_german_after_bracket_kept(self):
+        """German words (uppercase) after IPA bracket are kept."""
+        from cv_ocr_engines import _strip_post_bracket_garbled
+        result = _strip_post_bracket_garbled("badge [bædʒ] Abzeichen")
+        assert "Abzeichen" in result
+

 class TestFixCellPhonetics:
    """Tests for fix_cell_phonetics function."""
@@ -415,3 +415,53 @@ class TestDetectAndCropPage:
            assert 0 <= pct["y"] <= 100
            assert 0 < pct["width"] <= 100
            assert 0 < pct["height"] <= 100
+
+
+class TestCropDeterminism:
+    """A3: Verify that page crop produces identical results across N runs."""
+
+    @pytest.mark.parametrize("image_factory,desc", [
+        (
+            lambda: _make_image_with_content(800, 600, (100, 700, 80, 520)),
+            "standard content",
+        ),
+        (
+            lambda: _make_book_scan(1000, 800),
+            "book scan with spine shadow",
+        ),
+    ])
+    def test_determinism_10_runs(self, image_factory, desc):
+        """Same image must produce identical crops in 10 consecutive runs."""
+        img = image_factory()
+        results = []
+        for _ in range(10):
+            cropped, result = detect_and_crop_page(img.copy())
+            results.append({
+                "crop_applied": result["crop_applied"],
+                "cropped_size": result["cropped_size"],
+                "border_fractions": result["border_fractions"],
+                "shape": cropped.shape,
+            })
+
+        first = results[0]
+        for i, r in enumerate(results[1:], 1):
+            assert r["crop_applied"] == first["crop_applied"], (
+                f"Run {i} crop_applied differs from run 0 ({desc})"
+            )
+            assert r["cropped_size"] == first["cropped_size"], (
+                f"Run {i} cropped_size differs from run 0 ({desc})"
+            )
+            assert r["shape"] == first["shape"], (
+                f"Run {i} output shape differs from run 0 ({desc})"
+            )
+
+    def test_determinism_pixel_identical(self):
+        """Crop output pixels must be identical across runs."""
+        img = _make_image_with_content(800, 600, (100, 700, 80, 520))
+        ref_crop, _ = detect_and_crop_page(img.copy())
+
+        for i in range(5):
+            crop, _ = detect_and_crop_page(img.copy())
+            assert np.array_equal(ref_crop, crop), (
+                f"Run {i} produced different pixel output"
+            )
@@ -84,5 +84,6 @@ nav:
    - Zeugnis-System: architecture/zeugnis-system.md
  - Entwicklung:
    - Testing: development/testing.md
+    - Regression Testing: development/regression-testing.md
    - Dokumentation: development/documentation.md
    - CI/CD Pipeline: development/ci-cd-pipeline.md
@@ -0,0 +1,163 @@
+#!/usr/bin/env python3
+"""
+TrOCR Baseline Benchmark — measures PyTorch TrOCR performance.
+
+Metrics:
+- RAM usage (RSS) before and after model load
+- Inference time per line (min, max, mean, p50, p95)
+- Model size on disk
+
+Output: JSON report to stdout (redirect to file for Sprint 2 comparison).
+
+Usage:
+    python scripts/benchmark-trocr.py [--model trocr-base-printed] [--runs 10]
+    python scripts/benchmark-trocr.py > benchmark-trocr-baseline.json
+"""
+
+import argparse
+import json
+import os
+import sys
+import time
+from datetime import datetime
+
+# Add backend to path for imports
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'klausur-service', 'backend'))
+
+
+def get_rss_mb():
+    """Get current process RSS in MB."""
+    import resource
+    # resource.getrusage returns KB on Linux, bytes on macOS
+    usage = resource.getrusage(resource.RUSAGE_SELF)
+    rss = usage.ru_maxrss
+    if sys.platform == 'darwin':
+        return rss / (1024 * 1024)  # bytes to MB on macOS
+    return rss / 1024  # KB to MB on Linux
+
+
+def get_model_size_mb(model_name):
+    """Estimate model size from HuggingFace cache."""
+    cache_dir = os.path.expanduser("~/.cache/huggingface/hub")
+    total = 0
+    model_dir_pattern = model_name.replace('/', '--')
+    for root, dirs, files in os.walk(cache_dir):
+        if model_dir_pattern in root:
+            for f in files:
+                total += os.path.getsize(os.path.join(root, f))
+    return total / (1024 * 1024)  # bytes to MB
+
+
+def benchmark_trocr(model_name: str = "microsoft/trocr-base-printed", num_runs: int = 10):
+    """Run TrOCR benchmark and return results dict."""
+    import numpy as np
+    from PIL import Image
+
+    rss_before = get_rss_mb()
+
+    # Load model
+    print(f"Loading model: {model_name}", file=sys.stderr)
+    load_start = time.monotonic()
+
+    try:
+        from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+        processor = TrOCRProcessor.from_pretrained(model_name)
+        model = VisionEncoderDecoderModel.from_pretrained(model_name)
+        model.eval()
+    except Exception as e:
+        return {"error": f"Failed to load model: {e}"}
+
+    load_time = time.monotonic() - load_start
+    rss_after_load = get_rss_mb()
+    model_size = get_model_size_mb(model_name)
+
+    print(f"Model loaded in {load_time:.1f}s, RSS: {rss_after_load:.0f}MB", file=sys.stderr)
+
+    # Create synthetic test images (text line images)
+    test_images = []
+    for i in range(num_runs):
+        # Create a simple white image with black text-like content
+        # In production, these would be real cropped text lines
+        w, h = 384, 48  # typical TrOCR input size
+        img = Image.new('RGB', (w, h), 'white')
+        # Add some variation
+        pixels = img.load()
+        # Simple dark region to simulate text
+        for x in range(50 + i * 10, 200 + i * 5):
+            for y in range(10, 38):
+                pixels[x, y] = (30, 30, 30)
+        test_images.append(img)
+
+    # Warm-up run (not counted)
+    print("Warm-up...", file=sys.stderr)
+    import torch
+    with torch.no_grad():
+        pixel_values = processor(images=test_images[0], return_tensors="pt").pixel_values
+        _ = model.generate(pixel_values, max_new_tokens=50)
+
+    # Benchmark runs
+    print(f"Running {num_runs} inference passes...", file=sys.stderr)
+    times_ms = []
+    for i, img in enumerate(test_images):
+        start = time.monotonic()
+        with torch.no_grad():
+            pixel_values = processor(images=img, return_tensors="pt").pixel_values
+            generated_ids = model.generate(pixel_values, max_new_tokens=50)
+            text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        elapsed_ms = (time.monotonic() - start) * 1000
+        times_ms.append(elapsed_ms)
+        print(f"  Run {i+1}/{num_runs}: {elapsed_ms:.0f}ms -> '{text[:30]}'", file=sys.stderr)
+
+    rss_after_inference = get_rss_mb()
+
+    # Compute stats
+    times_sorted = sorted(times_ms)
+    p50_idx = len(times_sorted) // 2
+    p95_idx = int(len(times_sorted) * 0.95)
+
+    report = {
+        "benchmark": "trocr-baseline",
+        "timestamp": datetime.utcnow().isoformat() + "Z",
+        "model": model_name,
+        "backend": "pytorch",
+        "quantization": "float32",
+        "num_runs": num_runs,
+        "model_size_mb": round(model_size, 1),
+        "ram_mb": {
+            "before_load": round(rss_before, 1),
+            "after_load": round(rss_after_load, 1),
+            "after_inference": round(rss_after_inference, 1),
+            "model_delta": round(rss_after_load - rss_before, 1),
+        },
+        "load_time_seconds": round(load_time, 2),
+        "inference_ms": {
+            "min": round(min(times_ms), 1),
+            "max": round(max(times_ms), 1),
+            "mean": round(sum(times_ms) / len(times_ms), 1),
+            "p50": round(times_sorted[p50_idx], 1),
+            "p95": round(times_sorted[min(p95_idx, len(times_sorted) - 1)], 1),
+        },
+        "times_ms": [round(t, 1) for t in times_ms],
+        "platform": {
+            "python": sys.version.split()[0],
+            "os": sys.platform,
+        },
+    }
+
+    return report
+
+
+def main():
+    parser = argparse.ArgumentParser(description="TrOCR Baseline Benchmark")
+    parser.add_argument("--model", default="microsoft/trocr-base-printed",
+                        help="HuggingFace model name")
+    parser.add_argument("--runs", type=int, default=10,
+                        help="Number of inference runs")
+    args = parser.parse_args()
+
+    report = benchmark_trocr(model_name=args.model, num_runs=args.runs)
+    print(json.dumps(report, indent=2))
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,61 @@
+#!/usr/bin/env bash
+# Run OCR pipeline regression tests and exit non-zero on failure.
+#
+# Usage:
+#   ./scripts/run-regression.sh                    # default: macmini:8086
+#   ./scripts/run-regression.sh http://localhost:8086
+#
+# Exit codes:
+#   0 = all pass
+#   1 = failures or errors
+#   2 = connection error
+
+set -euo pipefail
+
+BASE_URL="${1:-http://macmini:8086}"
+ENDPOINT="${BASE_URL}/api/v1/ocr-pipeline/regression/run?triggered_by=script"
+
+echo "=== OCR Pipeline Regression Suite ==="
+echo "Endpoint: ${ENDPOINT}"
+echo ""
+
+RESPONSE=$(curl -sf -X POST "${ENDPOINT}" -H "Content-Type: application/json" 2>&1) || {
+    echo "ERROR: Could not reach ${ENDPOINT}"
+    exit 2
+}
+
+STATUS=$(echo "${RESPONSE}" | python3 -c "import sys,json; print(json.load(sys.stdin)['status'])")
+TOTAL=$(echo "${RESPONSE}" | python3 -c "import sys,json; s=json.load(sys.stdin)['summary']; print(s['total'])")
+PASSED=$(echo "${RESPONSE}" | python3 -c "import sys,json; s=json.load(sys.stdin)['summary']; print(s['passed'])")
+FAILED=$(echo "${RESPONSE}" | python3 -c "import sys,json; s=json.load(sys.stdin)['summary']; print(s['failed'])")
+ERRORS=$(echo "${RESPONSE}" | python3 -c "import sys,json; s=json.load(sys.stdin)['summary']; print(s['errors'])")
+DURATION=$(echo "${RESPONSE}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('duration_ms', '?'))")
+
+echo "Status:   ${STATUS}"
+echo "Total:    ${TOTAL}"
+echo "Passed:   ${PASSED}"
+echo "Failed:   ${FAILED}"
+echo "Errors:   ${ERRORS}"
+echo "Duration: ${DURATION}ms"
+echo ""
+
+if [ "${STATUS}" = "pass" ]; then
+    echo "PASS — All regression tests passed."
+    exit 0
+else
+    echo "FAIL — Regression failures detected!"
+    # Print failure details
+    echo "${RESPONSE}" | python3 -c "
+import sys, json
+data = json.load(sys.stdin)
+for r in data.get('results', []):
+    if r['status'] != 'pass':
+        print(f\"  {r['status'].upper()}: {r.get('name', r['session_id'])}\")
+        if 'error' in r:
+            print(f\"    Error: {r['error']}\")
+        ds = r.get('diff_summary', {})
+        if ds:
+            print(f\"    Structural: {ds.get('structural_changes', 0)}, Text: {ds.get('text_changes', 0)}, Missing: {ds.get('cells_missing', 0)}, Added: {ds.get('cells_added', 0)}\")
+"
+    exit 1
+fi