feat(ocr-pipeline): ground-truth comparison tool for column detection

Side-by-side view: auto result (readonly) vs GT editor where teacher
draws correct columns. Diff table shows Auto vs GT with IoU matching.
GT data persisted per session for algorithm tuning.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-02-27 22:48:37 +01:00
parent 03fa186fec
commit 587b066a40
5 changed files with 352 additions and 16 deletions

View File

@@ -1,15 +1,17 @@
'use client'
import { useState } from 'react'
import { useState, useMemo } from 'react'
import type { ColumnResult, ColumnGroundTruth, PageRegion } from '@/app/(admin)/ai/ocr-pipeline/types'
interface ColumnControlsProps {
columnResult: ColumnResult | null
onRerun: () => void
onManualMode: () => void
onGtMode: () => void
onGroundTruth: (gt: ColumnGroundTruth) => void
onNext: () => void
isDetecting: boolean
savedGtColumns: PageRegion[] | null
}
const TYPE_COLORS: Record<string, string> = {
@@ -42,9 +44,95 @@ const METHOD_LABELS: Record<string, string> = {
position_fallback: 'Fallback',
}
export function ColumnControls({ columnResult, onRerun, onManualMode, onGroundTruth, onNext, isDetecting }: ColumnControlsProps) {
interface DiffRow {
index: number
autoCol: PageRegion | null
gtCol: PageRegion | null
diffX: number | null
diffW: number | null
typeMismatch: boolean
}
/** Match auto columns to GT columns by overlap on X-axis (IoU > 50%) */
function computeDiff(autoCols: PageRegion[], gtCols: PageRegion[]): DiffRow[] {
const rows: DiffRow[] = []
const usedGt = new Set<number>()
const usedAuto = new Set<number>()
// Match auto → GT by best X-axis overlap
for (let ai = 0; ai < autoCols.length; ai++) {
const a = autoCols[ai]
let bestIdx = -1
let bestIoU = 0
for (let gi = 0; gi < gtCols.length; gi++) {
if (usedGt.has(gi)) continue
const g = gtCols[gi]
const overlapStart = Math.max(a.x, g.x)
const overlapEnd = Math.min(a.x + a.width, g.x + g.width)
const overlap = Math.max(0, overlapEnd - overlapStart)
const union = (a.width + g.width) - overlap
const iou = union > 0 ? overlap / union : 0
if (iou > bestIoU) {
bestIoU = iou
bestIdx = gi
}
}
if (bestIdx >= 0 && bestIoU > 0.3) {
usedGt.add(bestIdx)
usedAuto.add(ai)
const g = gtCols[bestIdx]
rows.push({
index: rows.length + 1,
autoCol: a,
gtCol: g,
diffX: g.x - a.x,
diffW: g.width - a.width,
typeMismatch: a.type !== g.type,
})
}
}
// Unmatched auto columns
for (let ai = 0; ai < autoCols.length; ai++) {
if (usedAuto.has(ai)) continue
rows.push({
index: rows.length + 1,
autoCol: autoCols[ai],
gtCol: null,
diffX: null,
diffW: null,
typeMismatch: false,
})
}
// Unmatched GT columns
for (let gi = 0; gi < gtCols.length; gi++) {
if (usedGt.has(gi)) continue
rows.push({
index: rows.length + 1,
autoCol: null,
gtCol: gtCols[gi],
diffX: null,
diffW: null,
typeMismatch: false,
})
}
return rows
}
export function ColumnControls({ columnResult, onRerun, onManualMode, onGtMode, onGroundTruth, onNext, isDetecting, savedGtColumns }: ColumnControlsProps) {
const [gtSaved, setGtSaved] = useState(false)
const diffRows = useMemo(() => {
if (!columnResult || !savedGtColumns) return null
const autoCols = columnResult.columns.filter(c => c.type.startsWith('column') || c.type === 'page_ref')
const gtCols = savedGtColumns.filter(c => c.type.startsWith('column') || c.type === 'page_ref')
return computeDiff(autoCols, gtCols)
}, [columnResult, savedGtColumns])
if (!columnResult) return null
const columns = columnResult.columns.filter((c: PageRegion) => c.type.startsWith('column') || c.type === 'page_ref')
@@ -58,7 +146,7 @@ export function ColumnControls({ columnResult, onRerun, onManualMode, onGroundTr
return (
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4 space-y-4">
{/* Summary */}
<div className="flex items-center gap-3">
<div className="flex items-center gap-3 flex-wrap">
<div className="text-sm text-gray-600 dark:text-gray-400">
<span className="font-medium text-gray-800 dark:text-gray-200">{columns.length} Spalten</span> erkannt
{columnResult.duration_seconds > 0 && (
@@ -78,6 +166,12 @@ export function ColumnControls({ columnResult, onRerun, onManualMode, onGroundTr
>
Manuell markieren
</button>
<button
onClick={onGtMode}
className="text-xs px-2 py-1 bg-amber-100 text-amber-700 dark:bg-amber-900/30 dark:text-amber-400 rounded hover:bg-amber-200 dark:hover:bg-amber-900/50 transition-colors"
>
{savedGtColumns ? 'Ground Truth bearbeiten' : 'Ground Truth eintragen'}
</button>
</div>
{/* Column list */}
@@ -114,6 +208,82 @@ export function ColumnControls({ columnResult, onRerun, onManualMode, onGroundTr
))}
</div>
{/* Diff table (Auto vs GT) */}
{diffRows && diffRows.length > 0 && (
<div className="border-t border-gray-100 dark:border-gray-700 pt-3">
<div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-2">
Vergleich: Auto vs Ground Truth
</div>
<div className="overflow-x-auto">
<table className="w-full text-xs">
<thead>
<tr className="text-gray-500 dark:text-gray-400 border-b border-gray-100 dark:border-gray-700">
<th className="text-left py-1 pr-2">#</th>
<th className="text-left py-1 pr-2">Auto (Typ, x, w)</th>
<th className="text-left py-1 pr-2">GT (Typ, x, w)</th>
<th className="text-right py-1 pr-2">Diff X</th>
<th className="text-right py-1">Diff W</th>
</tr>
</thead>
<tbody>
{diffRows.map((row) => (
<tr
key={row.index}
className={
!row.autoCol || !row.gtCol || row.typeMismatch
? 'bg-red-50 dark:bg-red-900/10'
: (row.diffX !== null && Math.abs(row.diffX) > 20) || (row.diffW !== null && Math.abs(row.diffW) > 20)
? 'bg-amber-50 dark:bg-amber-900/10'
: ''
}
>
<td className="py-1 pr-2 font-mono text-gray-400">{row.index}</td>
<td className="py-1 pr-2 font-mono">
{row.autoCol ? (
<span>
<span className={`inline-block px-1 rounded ${TYPE_COLORS[row.autoCol.type] || ''}`}>
{TYPE_LABELS[row.autoCol.type] || row.autoCol.type}
</span>
{' '}{row.autoCol.x}, {row.autoCol.width}
</span>
) : (
<span className="text-red-400">fehlt</span>
)}
</td>
<td className="py-1 pr-2 font-mono">
{row.gtCol ? (
<span>
<span className={`inline-block px-1 rounded ${TYPE_COLORS[row.gtCol.type] || ''}`}>
{TYPE_LABELS[row.gtCol.type] || row.gtCol.type}
</span>
{' '}{row.gtCol.x}, {row.gtCol.width}
</span>
) : (
<span className="text-red-400">fehlt</span>
)}
</td>
<td className="py-1 pr-2 text-right font-mono">
{row.diffX !== null ? (
<span className={Math.abs(row.diffX) > 20 ? 'text-amber-600 dark:text-amber-400' : 'text-gray-500'}>
{row.diffX > 0 ? '+' : ''}{row.diffX}
</span>
) : '—'}
</td>
<td className="py-1 text-right font-mono">
{row.diffW !== null ? (
<span className={Math.abs(row.diffW) > 20 ? 'text-amber-600 dark:text-amber-400' : 'text-gray-500'}>
{row.diffW > 0 ? '+' : ''}{row.diffW}
</span>
) : '—'}
</td>
</tr>
))}
</tbody>
</table>
</div>
</div>
)}
{/* Ground Truth + Navigation */}
<div className="flex items-center justify-between pt-2 border-t border-gray-100 dark:border-gray-700">
<div className="flex items-center gap-2">