Files
Benjamin Admin 587b066a40 feat(ocr-pipeline): ground-truth comparison tool for column detection
Side-by-side view: auto result (readonly) vs GT editor where teacher
draws correct columns. Diff table shows Auto vs GT with IoU matching.
GT data persisted per session for algorithm tuning.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-27 22:48:37 +01:00

321 lines
12 KiB
TypeScript

'use client'
import { useState, useMemo } from 'react'
import type { ColumnResult, ColumnGroundTruth, PageRegion } from '@/app/(admin)/ai/ocr-pipeline/types'
interface ColumnControlsProps {
columnResult: ColumnResult | null
onRerun: () => void
onManualMode: () => void
onGtMode: () => void
onGroundTruth: (gt: ColumnGroundTruth) => void
onNext: () => void
isDetecting: boolean
savedGtColumns: PageRegion[] | null
}
const TYPE_COLORS: Record<string, string> = {
column_en: 'bg-blue-100 text-blue-700 dark:bg-blue-900/30 dark:text-blue-400',
column_de: 'bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400',
column_example: 'bg-orange-100 text-orange-700 dark:bg-orange-900/30 dark:text-orange-400',
column_text: 'bg-cyan-100 text-cyan-700 dark:bg-cyan-900/30 dark:text-cyan-400',
page_ref: 'bg-purple-100 text-purple-700 dark:bg-purple-900/30 dark:text-purple-400',
column_marker: 'bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400',
column_ignore: 'bg-gray-100 text-gray-500 dark:bg-gray-700/30 dark:text-gray-500',
header: 'bg-gray-100 text-gray-600 dark:bg-gray-700/50 dark:text-gray-400',
footer: 'bg-gray-100 text-gray-600 dark:bg-gray-700/50 dark:text-gray-400',
}
const TYPE_LABELS: Record<string, string> = {
column_en: 'EN',
column_de: 'DE',
column_example: 'Beispiel',
column_text: 'Text',
page_ref: 'Seite',
column_marker: 'Marker',
column_ignore: 'Ignorieren',
header: 'Header',
footer: 'Footer',
}
const METHOD_LABELS: Record<string, string> = {
content: 'Inhalt',
position_enhanced: 'Position',
position_fallback: 'Fallback',
}
interface DiffRow {
index: number
autoCol: PageRegion | null
gtCol: PageRegion | null
diffX: number | null
diffW: number | null
typeMismatch: boolean
}
/** Match auto columns to GT columns by overlap on X-axis (IoU > 50%) */
function computeDiff(autoCols: PageRegion[], gtCols: PageRegion[]): DiffRow[] {
const rows: DiffRow[] = []
const usedGt = new Set<number>()
const usedAuto = new Set<number>()
// Match auto → GT by best X-axis overlap
for (let ai = 0; ai < autoCols.length; ai++) {
const a = autoCols[ai]
let bestIdx = -1
let bestIoU = 0
for (let gi = 0; gi < gtCols.length; gi++) {
if (usedGt.has(gi)) continue
const g = gtCols[gi]
const overlapStart = Math.max(a.x, g.x)
const overlapEnd = Math.min(a.x + a.width, g.x + g.width)
const overlap = Math.max(0, overlapEnd - overlapStart)
const union = (a.width + g.width) - overlap
const iou = union > 0 ? overlap / union : 0
if (iou > bestIoU) {
bestIoU = iou
bestIdx = gi
}
}
if (bestIdx >= 0 && bestIoU > 0.3) {
usedGt.add(bestIdx)
usedAuto.add(ai)
const g = gtCols[bestIdx]
rows.push({
index: rows.length + 1,
autoCol: a,
gtCol: g,
diffX: g.x - a.x,
diffW: g.width - a.width,
typeMismatch: a.type !== g.type,
})
}
}
// Unmatched auto columns
for (let ai = 0; ai < autoCols.length; ai++) {
if (usedAuto.has(ai)) continue
rows.push({
index: rows.length + 1,
autoCol: autoCols[ai],
gtCol: null,
diffX: null,
diffW: null,
typeMismatch: false,
})
}
// Unmatched GT columns
for (let gi = 0; gi < gtCols.length; gi++) {
if (usedGt.has(gi)) continue
rows.push({
index: rows.length + 1,
autoCol: null,
gtCol: gtCols[gi],
diffX: null,
diffW: null,
typeMismatch: false,
})
}
return rows
}
export function ColumnControls({ columnResult, onRerun, onManualMode, onGtMode, onGroundTruth, onNext, isDetecting, savedGtColumns }: ColumnControlsProps) {
const [gtSaved, setGtSaved] = useState(false)
const diffRows = useMemo(() => {
if (!columnResult || !savedGtColumns) return null
const autoCols = columnResult.columns.filter(c => c.type.startsWith('column') || c.type === 'page_ref')
const gtCols = savedGtColumns.filter(c => c.type.startsWith('column') || c.type === 'page_ref')
return computeDiff(autoCols, gtCols)
}, [columnResult, savedGtColumns])
if (!columnResult) return null
const columns = columnResult.columns.filter((c: PageRegion) => c.type.startsWith('column') || c.type === 'page_ref')
const headerFooter = columnResult.columns.filter((c: PageRegion) => !c.type.startsWith('column') && c.type !== 'page_ref')
const handleGt = (isCorrect: boolean) => {
onGroundTruth({ is_correct: isCorrect })
setGtSaved(true)
}
return (
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-4">
{/* Summary */}
<div className="flex items-center gap-3 flex-wrap">
<div className="text-sm text-gray-600 dark:text-gray-400">
<span className="font-medium text-gray-800 dark:text-gray-200">{columns.length} Spalten</span> erkannt
{columnResult.duration_seconds > 0 && (
<span className="ml-2 text-xs">({columnResult.duration_seconds}s)</span>
)}
</div>
<button
onClick={onRerun}
disabled={isDetecting}
className="text-xs px-2 py-1 bg-gray-100 dark:bg-gray-700 rounded hover:bg-gray-200 dark:hover:bg-gray-600 transition-colors disabled:opacity-50"
>
Erneut erkennen
</button>
<button
onClick={onManualMode}
className="text-xs px-2 py-1 bg-teal-100 text-teal-700 dark:bg-teal-900/30 dark:text-teal-400 rounded hover:bg-teal-200 dark:hover:bg-teal-900/50 transition-colors"
>
Manuell markieren
</button>
<button
onClick={onGtMode}
className="text-xs px-2 py-1 bg-amber-100 text-amber-700 dark:bg-amber-900/30 dark:text-amber-400 rounded hover:bg-amber-200 dark:hover:bg-amber-900/50 transition-colors"
>
{savedGtColumns ? 'Ground Truth bearbeiten' : 'Ground Truth eintragen'}
</button>
</div>
{/* Column list */}
<div className="space-y-2">
{columns.map((col: PageRegion, i: number) => (
<div key={i} className="flex items-center gap-3 text-sm">
<span className={`px-2 py-0.5 rounded text-xs font-medium ${TYPE_COLORS[col.type] || ''}`}>
{TYPE_LABELS[col.type] || col.type}
</span>
{col.classification_confidence != null && col.classification_confidence < 1.0 && (
<span className="text-xs font-medium text-gray-600 dark:text-gray-300">
{Math.round(col.classification_confidence * 100)}%
</span>
)}
{col.classification_method && (
<span className="text-xs text-gray-400 dark:text-gray-500">
({METHOD_LABELS[col.classification_method] || col.classification_method})
</span>
)}
<span className="text-gray-500 dark:text-gray-400 text-xs font-mono">
x={col.x} y={col.y} {col.width}x{col.height}px
</span>
</div>
))}
{headerFooter.map((r: PageRegion, i: number) => (
<div key={`hf-${i}`} className="flex items-center gap-3 text-sm">
<span className={`px-2 py-0.5 rounded text-xs font-medium ${TYPE_COLORS[r.type] || ''}`}>
{TYPE_LABELS[r.type] || r.type}
</span>
<span className="text-gray-500 dark:text-gray-400 text-xs font-mono">
x={r.x} y={r.y} {r.width}x{r.height}px
</span>
</div>
))}
</div>
{/* Diff table (Auto vs GT) */}
{diffRows && diffRows.length > 0 && (
<div className="border-t border-gray-100 dark:border-gray-700 pt-3">
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-2">
Vergleich: Auto vs Ground Truth
</div>
<div className="overflow-x-auto">
<table className="w-full text-xs">
<thead>
<tr className="text-gray-500 dark:text-gray-400 border-b border-gray-100 dark:border-gray-700">
<th className="text-left py-1 pr-2">#</th>
<th className="text-left py-1 pr-2">Auto (Typ, x, w)</th>
<th className="text-left py-1 pr-2">GT (Typ, x, w)</th>
<th className="text-right py-1 pr-2">Diff X</th>
<th className="text-right py-1">Diff W</th>
</tr>
</thead>
<tbody>
{diffRows.map((row) => (
<tr
key={row.index}
className={
!row.autoCol || !row.gtCol || row.typeMismatch
? 'bg-red-50 dark:bg-red-900/10'
: (row.diffX !== null && Math.abs(row.diffX) > 20) || (row.diffW !== null && Math.abs(row.diffW) > 20)
? 'bg-amber-50 dark:bg-amber-900/10'
: ''
}
>
<td className="py-1 pr-2 font-mono text-gray-400">{row.index}</td>
<td className="py-1 pr-2 font-mono">
{row.autoCol ? (
<span>
<span className={`inline-block px-1 rounded ${TYPE_COLORS[row.autoCol.type] || ''}`}>
{TYPE_LABELS[row.autoCol.type] || row.autoCol.type}
</span>
{' '}{row.autoCol.x}, {row.autoCol.width}
</span>
) : (
<span className="text-red-400">fehlt</span>
)}
</td>
<td className="py-1 pr-2 font-mono">
{row.gtCol ? (
<span>
<span className={`inline-block px-1 rounded ${TYPE_COLORS[row.gtCol.type] || ''}`}>
{TYPE_LABELS[row.gtCol.type] || row.gtCol.type}
</span>
{' '}{row.gtCol.x}, {row.gtCol.width}
</span>
) : (
<span className="text-red-400">fehlt</span>
)}
</td>
<td className="py-1 pr-2 text-right font-mono">
{row.diffX !== null ? (
<span className={Math.abs(row.diffX) > 20 ? 'text-amber-600 dark:text-amber-400' : 'text-gray-500'}>
{row.diffX > 0 ? '+' : ''}{row.diffX}
</span>
) : '—'}
</td>
<td className="py-1 text-right font-mono">
{row.diffW !== null ? (
<span className={Math.abs(row.diffW) > 20 ? 'text-amber-600 dark:text-amber-400' : 'text-gray-500'}>
{row.diffW > 0 ? '+' : ''}{row.diffW}
</span>
) : '—'}
</td>
</tr>
))}
</tbody>
</table>
</div>
</div>
)}
{/* Ground Truth + Navigation */}
<div className="flex items-center justify-between pt-2 border-t border-gray-100 dark:border-gray-700">
<div className="flex items-center gap-2">
<span className="text-sm text-gray-500 dark:text-gray-400">Spalten korrekt?</span>
{gtSaved ? (
<span className="text-xs text-green-600 dark:text-green-400">Gespeichert</span>
) : (
<>
<button
onClick={() => handleGt(true)}
className="text-xs px-3 py-1 bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400 rounded hover:bg-green-200 dark:hover:bg-green-900/50 transition-colors"
>
Ja
</button>
<button
onClick={() => handleGt(false)}
className="text-xs px-3 py-1 bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400 rounded hover:bg-red-200 dark:hover:bg-red-900/50 transition-colors"
>
Nein
</button>
</>
)}
</div>
<button
onClick={onNext}
className="px-4 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors text-sm font-medium"
>
Weiter
</button>
</div>
</div>
)
}