Files
breakpilot-lehrer/admin-lehrer/app/(admin)/ai/ocr-ground-truth/page.tsx
Benjamin Admin c695b659fb
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 26s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Failing after 1m53s
CI / test-python-agent-core (push) Successful in 14s
CI / test-nodejs-website (push) Successful in 17s
fix: PagePurpose props on ground-truth and regression pages
Both pages passed `moduleId` which is not a valid prop for PagePurpose.
The component expects explicit title/purpose/audience — calling
audience.join() on undefined caused the client-side crash.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-23 09:43:36 +01:00

594 lines
24 KiB
TypeScript

'use client'
/**
* Ground-Truth Review Workflow
*
* Efficient mass-review of OCR sessions:
* - Session queue with auto-advance
* - Split-view: original image left, grid right
* - Confidence highlighting on cells
* - Quick-accept per row
* - Inline cell editing
* - Batch mark as ground truth
* - Progress tracking
*/
import { useState, useEffect, useCallback, useRef } from 'react'
import { PagePurpose } from '@/components/common/PagePurpose'
import { AIToolsSidebarResponsive } from '@/components/ai/AIToolsSidebar'
const KLAUSUR_API = '/klausur-api'
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
interface Session {
id: string
name: string
filename: string
status: string
created_at: string
document_category: string | null
has_ground_truth: boolean
}
interface GridZone {
zone_id: string
zone_type: string
columns: Array<{ col_index: number; col_type: string; header: string }>
rows: Array<{ row_index: number; is_header: boolean }>
cells: GridCell[]
}
interface GridCell {
cell_id: string
row_index: number
col_index: number
col_type: string
text: string
confidence?: number
is_bold?: boolean
}
interface GridResult {
zones: GridZone[]
summary?: {
total_zones: number
total_columns: number
total_rows: number
total_cells: number
}
}
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
function confidenceColor(conf: number | undefined): string {
if (conf === undefined) return ''
if (conf >= 80) return 'bg-emerald-50'
if (conf >= 50) return 'bg-amber-50'
return 'bg-red-50'
}
function confidenceBorder(conf: number | undefined): string {
if (conf === undefined) return 'border-slate-200'
if (conf >= 80) return 'border-emerald-200'
if (conf >= 50) return 'border-amber-300'
return 'border-red-300'
}
// ---------------------------------------------------------------------------
// Component
// ---------------------------------------------------------------------------
export default function GroundTruthReviewPage() {
// Session list & queue
const [allSessions, setAllSessions] = useState<Session[]>([])
const [filter, setFilter] = useState<'all' | 'unreviewed' | 'reviewed'>('unreviewed')
const [currentIdx, setCurrentIdx] = useState(0)
const [loading, setLoading] = useState(true)
// Current session data
const [grid, setGrid] = useState<GridResult | null>(null)
const [loadingGrid, setLoadingGrid] = useState(false)
const [editingCell, setEditingCell] = useState<string | null>(null)
const [editText, setEditText] = useState('')
const [acceptedRows, setAcceptedRows] = useState<Set<string>>(new Set())
const [zoom, setZoom] = useState(100)
// Batch operations
const [selectedSessions, setSelectedSessions] = useState<Set<string>>(new Set())
const [marking, setMarking] = useState(false)
const [markResult, setMarkResult] = useState<string | null>(null)
// Stats
const [reviewedCount, setReviewedCount] = useState(0)
const [totalCount, setTotalCount] = useState(0)
const imageRef = useRef<HTMLDivElement>(null)
// Load all sessions
const loadSessions = useCallback(async () => {
setLoading(true)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions?limit=200`)
if (!res.ok) return
const data = await res.json()
const sessions: Session[] = (data.sessions || []).map((s: any) => ({
id: s.id,
name: s.name || '',
filename: s.filename || '',
status: s.status || 'active',
created_at: s.created_at || '',
document_category: s.document_category || null,
has_ground_truth: !!(s.ground_truth && s.ground_truth.build_grid_reference),
}))
setAllSessions(sessions)
setTotalCount(sessions.length)
setReviewedCount(sessions.filter(s => s.has_ground_truth).length)
} catch (e) {
console.error('Failed to load sessions:', e)
} finally {
setLoading(false)
}
}, [])
useEffect(() => { loadSessions() }, [loadSessions])
// Filtered sessions
const filteredSessions = allSessions.filter(s => {
if (filter === 'unreviewed') return !s.has_ground_truth && s.status === 'active'
if (filter === 'reviewed') return s.has_ground_truth
return true
})
const currentSession = filteredSessions[currentIdx] || null
// Load grid for current session
const loadGrid = useCallback(async (sessionId: string) => {
setLoadingGrid(true)
setGrid(null)
setAcceptedRows(new Set())
setEditingCell(null)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/grid-editor`)
if (res.ok) {
const data = await res.json()
setGrid(data.grid || data)
}
} catch (e) {
console.error('Failed to load grid:', e)
} finally {
setLoadingGrid(false)
}
}, [])
useEffect(() => {
if (currentSession) loadGrid(currentSession.id)
}, [currentSession, loadGrid])
// Navigation
const goNext = () => {
if (currentIdx < filteredSessions.length - 1) setCurrentIdx(currentIdx + 1)
}
const goPrev = () => {
if (currentIdx > 0) setCurrentIdx(currentIdx - 1)
}
// Accept row
const acceptRow = (zoneId: string, rowIdx: number) => {
const key = `${zoneId}-${rowIdx}`
setAcceptedRows(prev => new Set([...prev, key]))
}
// Edit cell
const startEdit = (cell: GridCell) => {
setEditingCell(cell.cell_id)
setEditText(cell.text)
}
const saveEdit = async () => {
if (!editingCell || !currentSession) return
try {
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${currentSession.id}/update-cell`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ cell_id: editingCell, text: editText }),
})
// Update local state
if (grid) {
const newGrid = { ...grid }
for (const zone of newGrid.zones) {
for (const cell of zone.cells) {
if (cell.cell_id === editingCell) {
cell.text = editText
}
}
}
setGrid(newGrid)
}
} catch (e) {
console.error('Failed to save cell:', e)
}
setEditingCell(null)
}
// Mark as ground truth
const markGroundTruth = async (sessionId: string) => {
setMarking(true)
setMarkResult(null)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/mark-ground-truth`, {
method: 'POST',
})
if (res.ok) {
setMarkResult('success')
// Update local session state
setAllSessions(prev => prev.map(s =>
s.id === sessionId ? { ...s, has_ground_truth: true } : s
))
setReviewedCount(prev => prev + 1)
} else {
setMarkResult('error')
}
} catch {
setMarkResult('error')
} finally {
setMarking(false)
}
}
// Batch mark
const batchMark = async () => {
setMarking(true)
let success = 0
for (const sid of selectedSessions) {
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}/mark-ground-truth`, {
method: 'POST',
})
if (res.ok) success++
} catch { /* skip */ }
}
setAllSessions(prev => prev.map(s =>
selectedSessions.has(s.id) ? { ...s, has_ground_truth: true } : s
))
setReviewedCount(prev => prev + success)
setSelectedSessions(new Set())
setMarking(false)
setMarkResult(`${success} Sessions als Ground Truth markiert`)
setTimeout(() => setMarkResult(null), 3000)
}
// All cells for current grid
const allCells = grid?.zones?.flatMap(z => z.cells) || []
const lowConfCells = allCells.filter(c => (c.confidence ?? 100) < 50)
const imageUrl = currentSession
? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${currentSession.id}/image/original`
: null
return (
<AIToolsSidebarResponsive>
<div className="max-w-[1600px] mx-auto p-4 space-y-4">
<PagePurpose
title="Ground Truth Review"
purpose="Effiziente Massenpruefung von OCR-Sessions: Bild und Grid nebeneinander pruefen, Fehler inline korrigieren, Sessions als Ground Truth markieren."
audience={['Entwickler', 'QA']}
defaultCollapsed
architecture={{
services: ['klausur-service (FastAPI, Port 8086)'],
databases: ['PostgreSQL (ocr_pipeline_sessions)'],
}}
relatedPages={[
{ name: 'OCR Pipeline', href: '/ai/ocr-pipeline', description: 'OCR-Pipeline ausfuehren' },
{ name: 'OCR Regression', href: '/ai/ocr-regression', description: 'Regressions-Tests' },
]}
/>
{/* Progress Bar */}
<div className="bg-white rounded-lg border border-slate-200 p-4">
<div className="flex items-center justify-between mb-2">
<h2 className="text-lg font-bold text-slate-900">Ground Truth Review</h2>
<span className="text-sm text-slate-500">
{reviewedCount} von {totalCount} geprueft ({totalCount > 0 ? Math.round(reviewedCount / totalCount * 100) : 0}%)
</span>
</div>
<div className="w-full bg-slate-100 rounded-full h-2.5">
<div
className="bg-teal-500 h-2.5 rounded-full transition-all duration-500"
style={{ width: `${totalCount > 0 ? (reviewedCount / totalCount) * 100 : 0}%` }}
/>
</div>
</div>
{/* Filter + Queue */}
<div className="flex items-center gap-4">
<div className="flex gap-1 bg-slate-100 rounded-lg p-1">
{(['unreviewed', 'reviewed', 'all'] as const).map(f => (
<button
key={f}
onClick={() => { setFilter(f); setCurrentIdx(0) }}
className={`px-3 py-1.5 text-sm rounded-md transition-colors ${
filter === f
? 'bg-white text-slate-900 shadow-sm font-medium'
: 'text-slate-500 hover:text-slate-700'
}`}
>
{f === 'unreviewed' ? 'Offen' : f === 'reviewed' ? 'Geprueft' : 'Alle'}
<span className="ml-1 text-xs text-slate-400">
({allSessions.filter(s =>
f === 'unreviewed' ? !s.has_ground_truth && s.status === 'active'
: f === 'reviewed' ? s.has_ground_truth
: true
).length})
</span>
</button>
))}
</div>
{/* Navigation */}
<div className="flex items-center gap-2 ml-auto">
<button onClick={goPrev} disabled={currentIdx === 0}
className="p-2 rounded hover:bg-slate-100 disabled:opacity-30 disabled:cursor-not-allowed">
<svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M15 19l-7-7 7-7" />
</svg>
</button>
<span className="text-sm text-slate-500 min-w-[80px] text-center">
{filteredSessions.length > 0 ? `${currentIdx + 1} / ${filteredSessions.length}` : '—'}
</span>
<button onClick={goNext} disabled={currentIdx >= filteredSessions.length - 1}
className="p-2 rounded hover:bg-slate-100 disabled:opacity-30 disabled:cursor-not-allowed">
<svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
</svg>
</button>
</div>
{/* Batch mark button */}
{selectedSessions.size > 0 && (
<button
onClick={batchMark}
disabled={marking}
className="px-3 py-1.5 bg-teal-600 text-white text-sm rounded-lg hover:bg-teal-700 disabled:opacity-50"
>
{selectedSessions.size} markieren
</button>
)}
</div>
{/* Toast */}
{markResult && (
<div className={`p-3 rounded-lg text-sm ${
markResult === 'error' ? 'bg-red-50 text-red-700 border border-red-200'
: markResult === 'success' ? 'bg-emerald-50 text-emerald-700 border border-emerald-200'
: 'bg-blue-50 text-blue-700 border border-blue-200'
}`}>
{markResult === 'success' ? 'Als Ground Truth markiert!' : markResult === 'error' ? 'Fehler beim Markieren' : markResult}
</div>
)}
{/* Main Content: Split View */}
{loading ? (
<div className="text-center py-12 text-slate-400">Lade Sessions...</div>
) : !currentSession ? (
<div className="text-center py-12 text-slate-400">
<p className="text-lg">Keine Sessions in dieser Ansicht</p>
</div>
) : (
<div className="grid grid-cols-2 gap-4" style={{ minHeight: '70vh' }}>
{/* Left: Original Image */}
<div className="bg-white rounded-lg border border-slate-200 overflow-hidden flex flex-col">
<div className="flex items-center justify-between px-3 py-2 border-b border-slate-100 bg-slate-50">
<span className="text-sm font-medium text-slate-700 truncate">
{currentSession.name || currentSession.filename}
</span>
<div className="flex items-center gap-2">
<button onClick={() => setZoom(z => Math.max(50, z - 25))}
className="px-2 py-0.5 text-xs bg-slate-200 rounded hover:bg-slate-300">-</button>
<span className="text-xs text-slate-500 w-10 text-center">{zoom}%</span>
<button onClick={() => setZoom(z => Math.min(300, z + 25))}
className="px-2 py-0.5 text-xs bg-slate-200 rounded hover:bg-slate-300">+</button>
</div>
</div>
<div ref={imageRef} className="flex-1 overflow-auto p-2">
{imageUrl && (
<img
src={imageUrl}
alt="Original scan"
style={{ width: `${zoom}%`, maxWidth: 'none' }}
className="block"
/>
)}
</div>
</div>
{/* Right: Grid Review */}
<div className="bg-white rounded-lg border border-slate-200 overflow-hidden flex flex-col">
<div className="flex items-center justify-between px-3 py-2 border-b border-slate-100 bg-slate-50">
<div className="flex items-center gap-3">
<span className="text-sm font-medium text-slate-700">
{allCells.length} Zellen
</span>
{lowConfCells.length > 0 && (
<span className="text-xs bg-red-100 text-red-700 px-2 py-0.5 rounded-full">
{lowConfCells.length} niedrige Konfidenz
</span>
)}
</div>
<div className="flex items-center gap-2">
{!currentSession.has_ground_truth && (
<button
onClick={() => markGroundTruth(currentSession.id)}
disabled={marking}
className="px-3 py-1 bg-teal-600 text-white text-xs rounded hover:bg-teal-700 disabled:opacity-50"
>
{marking ? 'Markiere...' : 'Als Ground Truth markieren'}
</button>
)}
{currentSession.has_ground_truth && (
<span className="text-xs bg-emerald-100 text-emerald-700 px-2 py-0.5 rounded-full">
Ground Truth
</span>
)}
<button
onClick={() => { markGroundTruth(currentSession.id); setTimeout(goNext, 500) }}
disabled={marking}
className="px-3 py-1 bg-slate-600 text-white text-xs rounded hover:bg-slate-700 disabled:opacity-50"
>
Markieren & Weiter
</button>
</div>
</div>
{/* Grid Content */}
<div className="flex-1 overflow-auto">
{loadingGrid ? (
<div className="flex items-center justify-center h-full text-slate-400">
<svg className="animate-spin h-6 w-6 mr-2" fill="none" viewBox="0 0 24 24">
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
</svg>
Lade Grid...
</div>
) : !grid || !grid.zones ? (
<div className="text-center py-8 text-slate-400 text-sm">
Kein Grid vorhanden. Bitte zuerst die Pipeline ausfuehren.
</div>
) : (
<div className="p-3 space-y-4">
{grid.zones.map((zone, zi) => (
<div key={zone.zone_id || zi} className="space-y-1">
{/* Zone header */}
<div className="text-xs text-slate-400 uppercase tracking-wide">
Zone {zi + 1} ({zone.zone_type})
{zone.columns?.length > 0 && (
<span className="ml-2">
{zone.columns.map(c => c.col_type.replace('column_', '')).join(' | ')}
</span>
)}
</div>
{/* Group cells by row */}
{Array.from(new Set(zone.cells.map(c => c.row_index)))
.sort((a, b) => a - b)
.map(rowIdx => {
const rowCells = zone.cells
.filter(c => c.row_index === rowIdx)
.sort((a, b) => a.col_index - b.col_index)
const rowKey = `${zone.zone_id || zi}-${rowIdx}`
const isAccepted = acceptedRows.has(rowKey)
return (
<div
key={rowKey}
className={`flex items-start gap-1 group ${isAccepted ? 'opacity-60' : ''}`}
>
{/* Quick accept button */}
<button
onClick={() => acceptRow(zone.zone_id || String(zi), rowIdx)}
className={`flex-shrink-0 w-6 h-6 rounded flex items-center justify-center mt-0.5 transition-colors ${
isAccepted
? 'bg-emerald-100 text-emerald-600'
: 'bg-slate-100 text-slate-400 hover:bg-emerald-100 hover:text-emerald-600'
}`}
title="Zeile als korrekt markieren"
>
<svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
</svg>
</button>
{/* Cells */}
<div className="flex-1 flex gap-1 flex-wrap">
{rowCells.map(cell => (
<div
key={cell.cell_id}
className={`flex-1 min-w-[80px] px-2 py-1 rounded text-sm border cursor-pointer transition-colors
${confidenceColor(cell.confidence)}
${confidenceBorder(cell.confidence)}
${editingCell === cell.cell_id ? 'ring-2 ring-teal-400' : 'hover:border-teal-300'}
${cell.is_bold ? 'font-bold' : ''}
`}
onClick={() => !isAccepted && startEdit(cell)}
title={`Konfidenz: ${cell.confidence ?? '?'}% | ${cell.col_type}`}
>
{editingCell === cell.cell_id ? (
<input
autoFocus
value={editText}
onChange={e => setEditText(e.target.value)}
onBlur={saveEdit}
onKeyDown={e => {
if (e.key === 'Enter') saveEdit()
if (e.key === 'Escape') setEditingCell(null)
}}
className="w-full bg-transparent outline-none text-sm"
/>
) : (
<span className={cell.text ? '' : 'text-slate-300 italic'}>
{cell.text || '(leer)'}
</span>
)}
</div>
))}
</div>
</div>
)
})}
</div>
))}
</div>
)}
</div>
</div>
</div>
)}
{/* Session List (collapsed) */}
{filteredSessions.length > 1 && (
<details className="bg-white rounded-lg border border-slate-200">
<summary className="px-4 py-3 cursor-pointer text-sm font-medium text-slate-700 hover:bg-slate-50">
Session-Liste ({filteredSessions.length})
</summary>
<div className="border-t border-slate-100 max-h-60 overflow-y-auto">
{filteredSessions.map((s, idx) => (
<div
key={s.id}
className={`flex items-center gap-3 px-4 py-2 text-sm cursor-pointer hover:bg-slate-50 border-b border-slate-50 ${
idx === currentIdx ? 'bg-teal-50' : ''
}`}
onClick={() => setCurrentIdx(idx)}
>
<input
type="checkbox"
checked={selectedSessions.has(s.id)}
onChange={e => {
e.stopPropagation()
setSelectedSessions(prev => {
const next = new Set(prev)
if (next.has(s.id)) next.delete(s.id)
else next.add(s.id)
return next
})
}}
className="rounded border-slate-300"
/>
<span className={`w-2 h-2 rounded-full flex-shrink-0 ${s.has_ground_truth ? 'bg-emerald-400' : 'bg-slate-300'}`} />
<span className="truncate flex-1">{s.name || s.filename || s.id}</span>
{s.document_category && (
<span className="text-xs bg-slate-100 px-1.5 py-0.5 rounded text-slate-500">{s.document_category}</span>
)}
</div>
))}
</div>
</details>
)}
</div>
</AIToolsSidebarResponsive>
)
}