feat(ocr): Add Ground Truth labeling UI for OCR comparison
Adds a step-through tool for creating 100% correct reference data (ground truth) with position information. Users scan a page, review each vocabulary entry with image crops, confirm or correct the OCR text, and save the result as JSON. Backend: extract_entries_with_boxes() helper + 3 endpoints (extract-with-boxes, ground-truth save/load). Frontend: GroundTruthPanel component with SVG overlay, ImageCrop, keyboard shortcuts (Enter/Tab/arrows), and tab navigation in page.tsx. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -12,7 +12,7 @@ import { useState, useEffect, useCallback, useMemo } from 'react'
|
||||
import { PagePurpose } from '@/components/common/PagePurpose'
|
||||
import { AIToolsSidebarResponsive } from '@/components/ai/AIToolsSidebar'
|
||||
import { QRCodeUpload, UploadedFile } from '@/components/QRCodeUpload'
|
||||
import { GridOverlay, GridStats, GridLegend, CellCorrectionDialog, BlockReviewPanel, BlockReviewSummary, getCellBlockNumber } from '@/components/ocr'
|
||||
import { GridOverlay, GridStats, GridLegend, CellCorrectionDialog, BlockReviewPanel, BlockReviewSummary, getCellBlockNumber, GroundTruthPanel } from '@/components/ocr'
|
||||
import type { GridData, GridCell, BlockReviewData, BlockStatus } from '@/components/ocr'
|
||||
|
||||
interface VocabEntry {
|
||||
@@ -155,6 +155,9 @@ export default function OCRComparePage() {
|
||||
const [isExporting, setIsExporting] = useState(false)
|
||||
const [exportSuccess, setExportSuccess] = useState(false)
|
||||
|
||||
// Tab State (compare vs ground truth)
|
||||
const [activeTab, setActiveTab] = useState<'compare' | 'groundtruth'>('compare')
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
// Load session history
|
||||
@@ -1065,8 +1068,43 @@ export default function OCRComparePage() {
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Tab Bar */}
|
||||
{sessionId && pageCount > 0 && (
|
||||
<div className="flex gap-1 bg-slate-100 rounded-lg p-1">
|
||||
<button
|
||||
onClick={() => setActiveTab('compare')}
|
||||
className={`flex-1 px-4 py-2 rounded-md text-sm font-medium transition-colors ${
|
||||
activeTab === 'compare'
|
||||
? 'bg-white text-slate-900 shadow-sm'
|
||||
: 'text-slate-600 hover:text-slate-900'
|
||||
}`}
|
||||
>
|
||||
OCR Vergleich
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setActiveTab('groundtruth')}
|
||||
className={`flex-1 px-4 py-2 rounded-md text-sm font-medium transition-colors ${
|
||||
activeTab === 'groundtruth'
|
||||
? 'bg-white text-slate-900 shadow-sm'
|
||||
: 'text-slate-600 hover:text-slate-900'
|
||||
}`}
|
||||
>
|
||||
Ground Truth
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Ground Truth Panel */}
|
||||
{activeTab === 'groundtruth' && sessionId && (
|
||||
<GroundTruthPanel
|
||||
sessionId={sessionId}
|
||||
selectedPage={selectedPage}
|
||||
pageImageUrl={`${KLAUSUR_API}/api/v1/vocab/sessions/${sessionId}/pdf-thumbnail/${selectedPage}?hires=true`}
|
||||
/>
|
||||
)}
|
||||
|
||||
{/* Full-Width Comparison View */}
|
||||
{(thumbnails[selectedPage] || result) && sessionId && (
|
||||
{activeTab === 'compare' && (thumbnails[selectedPage] || result) && sessionId && (
|
||||
<div className={`bg-white rounded-xl border border-slate-200 p-4 ${
|
||||
isFullscreen ? 'fixed inset-0 z-50 overflow-auto m-0 rounded-none bg-slate-50' : ''
|
||||
}`}>
|
||||
@@ -1477,7 +1515,7 @@ export default function OCRComparePage() {
|
||||
)}
|
||||
|
||||
{/* Comparison Summary */}
|
||||
{result?.comparison && (
|
||||
{activeTab === 'compare' && result?.comparison && (
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-6">
|
||||
<h3 className="font-semibold text-slate-900 mb-4">Vergleichszusammenfassung</h3>
|
||||
|
||||
|
||||
605
admin-v2/components/ocr/GroundTruthPanel.tsx
Normal file
605
admin-v2/components/ocr/GroundTruthPanel.tsx
Normal file
@@ -0,0 +1,605 @@
|
||||
'use client'
|
||||
|
||||
/**
|
||||
* GroundTruthPanel — Step-through UI for labeling OCR ground truth.
|
||||
*
|
||||
* Shows page image with SVG overlay (color-coded bounding boxes),
|
||||
* alongside crops of the current entry and editable text fields.
|
||||
* Keyboard-driven: Enter=confirm, Tab=skip, Arrow keys=navigate.
|
||||
*/
|
||||
|
||||
import { useState, useEffect, useCallback, useRef } from 'react'
|
||||
|
||||
// ---------- Types ----------
|
||||
|
||||
interface BBox {
|
||||
x: number
|
||||
y: number
|
||||
w: number
|
||||
h: number
|
||||
}
|
||||
|
||||
interface GTEntry {
|
||||
row_index: number
|
||||
english: string
|
||||
german: string
|
||||
example: string
|
||||
confidence: number
|
||||
bbox: BBox
|
||||
bbox_en: BBox
|
||||
bbox_de: BBox
|
||||
bbox_ex: BBox
|
||||
status?: 'pending' | 'confirmed' | 'edited' | 'skipped'
|
||||
}
|
||||
|
||||
interface GroundTruthPanelProps {
|
||||
sessionId: string
|
||||
selectedPage: number
|
||||
pageImageUrl: string
|
||||
}
|
||||
|
||||
// ---------- Helpers ----------
|
||||
|
||||
const STATUS_COLORS: Record<string, { fill: string; stroke: string }> = {
|
||||
current: { fill: 'rgba(250,204,21,0.25)', stroke: '#eab308' }, // yellow
|
||||
confirmed: { fill: 'rgba(34,197,94,0.18)', stroke: '#16a34a' }, // green
|
||||
edited: { fill: 'rgba(59,130,246,0.18)', stroke: '#2563eb' }, // blue
|
||||
skipped: { fill: 'rgba(148,163,184,0.15)', stroke: '#94a3b8' }, // gray
|
||||
pending: { fill: 'rgba(0,0,0,0)', stroke: '#cbd5e1' }, // outline only
|
||||
}
|
||||
|
||||
function getEntryColor(entry: GTEntry, index: number, currentIndex: number) {
|
||||
if (index === currentIndex) return STATUS_COLORS.current
|
||||
return STATUS_COLORS[entry.status || 'pending']
|
||||
}
|
||||
|
||||
// ---------- ImageCrop ----------
|
||||
|
||||
function ImageCrop({ imageUrl, bbox, naturalWidth, naturalHeight, maxWidth = 380, label }: {
|
||||
imageUrl: string
|
||||
bbox: BBox
|
||||
naturalWidth: number
|
||||
naturalHeight: number
|
||||
maxWidth?: number
|
||||
label?: string
|
||||
}) {
|
||||
if (!bbox || bbox.w === 0 || bbox.h === 0) return null
|
||||
|
||||
const cropWPx = (bbox.w / 100) * naturalWidth
|
||||
const cropHPx = (bbox.h / 100) * naturalHeight
|
||||
if (cropWPx < 1 || cropHPx < 1) return null
|
||||
|
||||
const scale = maxWidth / cropWPx
|
||||
const displayH = cropHPx * scale
|
||||
|
||||
return (
|
||||
<div>
|
||||
{label && <div className="text-xs font-medium text-slate-500 mb-1">{label}</div>}
|
||||
<div
|
||||
className="rounded-lg border border-slate-200 overflow-hidden bg-white"
|
||||
style={{ width: maxWidth, height: Math.min(displayH, 120), overflow: 'hidden', position: 'relative' }}
|
||||
>
|
||||
<img
|
||||
src={imageUrl}
|
||||
alt=""
|
||||
draggable={false}
|
||||
style={{
|
||||
position: 'absolute',
|
||||
width: naturalWidth * scale,
|
||||
height: naturalHeight * scale,
|
||||
left: -(bbox.x / 100) * naturalWidth * scale,
|
||||
top: -(bbox.y / 100) * naturalHeight * scale,
|
||||
maxWidth: 'none',
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// ---------- Main Component ----------
|
||||
|
||||
export function GroundTruthPanel({ sessionId, selectedPage, pageImageUrl }: GroundTruthPanelProps) {
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
// State
|
||||
const [entries, setEntries] = useState<GTEntry[]>([])
|
||||
const [currentIndex, setCurrentIndex] = useState(0)
|
||||
const [loading, setLoading] = useState(false)
|
||||
const [saving, setSaving] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [imageNatural, setImageNatural] = useState({ w: 0, h: 0 })
|
||||
const [showSummary, setShowSummary] = useState(false)
|
||||
const [savedMessage, setSavedMessage] = useState<string | null>(null)
|
||||
|
||||
// Editable fields for current entry
|
||||
const [editEn, setEditEn] = useState('')
|
||||
const [editDe, setEditDe] = useState('')
|
||||
const [editEx, setEditEx] = useState('')
|
||||
|
||||
const panelRef = useRef<HTMLDivElement>(null)
|
||||
const enInputRef = useRef<HTMLInputElement>(null)
|
||||
|
||||
// Load natural image dimensions
|
||||
useEffect(() => {
|
||||
if (!pageImageUrl) return
|
||||
const img = new Image()
|
||||
img.onload = () => setImageNatural({ w: img.naturalWidth, h: img.naturalHeight })
|
||||
img.src = pageImageUrl
|
||||
}, [pageImageUrl])
|
||||
|
||||
// Sync edit fields when current entry changes
|
||||
useEffect(() => {
|
||||
const entry = entries[currentIndex]
|
||||
if (entry) {
|
||||
setEditEn(entry.english)
|
||||
setEditDe(entry.german)
|
||||
setEditEx(entry.example)
|
||||
}
|
||||
}, [currentIndex, entries])
|
||||
|
||||
// ---------- Actions ----------
|
||||
|
||||
const handleExtract = useCallback(async () => {
|
||||
setLoading(true)
|
||||
setError(null)
|
||||
setShowSummary(false)
|
||||
setSavedMessage(null)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/vocab/sessions/${sessionId}/extract-with-boxes/${selectedPage}`, {
|
||||
method: 'POST',
|
||||
})
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({ detail: res.statusText }))
|
||||
throw new Error(err.detail || 'Extract failed')
|
||||
}
|
||||
const data = await res.json()
|
||||
const loaded: GTEntry[] = (data.entries || []).map((e: GTEntry) => ({ ...e, status: 'pending' as const }))
|
||||
setEntries(loaded)
|
||||
setCurrentIndex(0)
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : 'Extraction failed')
|
||||
} finally {
|
||||
setLoading(false)
|
||||
}
|
||||
}, [sessionId, selectedPage])
|
||||
|
||||
const confirmEntry = useCallback(() => {
|
||||
if (entries.length === 0) return
|
||||
const entry = entries[currentIndex]
|
||||
const isEdited = editEn !== entry.english || editDe !== entry.german || editEx !== entry.example
|
||||
const updated = [...entries]
|
||||
updated[currentIndex] = {
|
||||
...entry,
|
||||
english: editEn,
|
||||
german: editDe,
|
||||
example: editEx,
|
||||
status: isEdited ? 'edited' : 'confirmed',
|
||||
}
|
||||
setEntries(updated)
|
||||
if (currentIndex < entries.length - 1) {
|
||||
setCurrentIndex(currentIndex + 1)
|
||||
} else {
|
||||
setShowSummary(true)
|
||||
}
|
||||
}, [entries, currentIndex, editEn, editDe, editEx])
|
||||
|
||||
const skipEntry = useCallback(() => {
|
||||
if (entries.length === 0) return
|
||||
const updated = [...entries]
|
||||
updated[currentIndex] = { ...updated[currentIndex], status: 'skipped' }
|
||||
setEntries(updated)
|
||||
if (currentIndex < entries.length - 1) {
|
||||
setCurrentIndex(currentIndex + 1)
|
||||
} else {
|
||||
setShowSummary(true)
|
||||
}
|
||||
}, [entries, currentIndex])
|
||||
|
||||
const goTo = useCallback((idx: number) => {
|
||||
if (idx >= 0 && idx < entries.length) {
|
||||
setCurrentIndex(idx)
|
||||
setShowSummary(false)
|
||||
}
|
||||
}, [entries.length])
|
||||
|
||||
const handleSave = useCallback(async () => {
|
||||
setSaving(true)
|
||||
setError(null)
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/vocab/sessions/${sessionId}/ground-truth/${selectedPage}`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ entries }),
|
||||
})
|
||||
if (!res.ok) throw new Error('Save failed')
|
||||
const data = await res.json()
|
||||
setSavedMessage(`Gespeichert: ${data.confirmed} bestaetigt, ${data.edited} editiert, ${data.skipped} uebersprungen`)
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : 'Save failed')
|
||||
} finally {
|
||||
setSaving(false)
|
||||
}
|
||||
}, [sessionId, selectedPage, entries])
|
||||
|
||||
// ---------- Keyboard shortcuts ----------
|
||||
|
||||
useEffect(() => {
|
||||
if (entries.length === 0 || showSummary) return
|
||||
|
||||
const handler = (e: KeyboardEvent) => {
|
||||
// Don't capture when typing in inputs
|
||||
const tag = (e.target as HTMLElement)?.tagName
|
||||
const isInput = tag === 'INPUT' || tag === 'TEXTAREA'
|
||||
|
||||
if (e.key === 'Enter' && !e.shiftKey) {
|
||||
e.preventDefault()
|
||||
confirmEntry()
|
||||
} else if (e.key === 'Tab' && !e.shiftKey) {
|
||||
if (!isInput) {
|
||||
e.preventDefault()
|
||||
skipEntry()
|
||||
}
|
||||
} else if (e.key === 'ArrowLeft' && !isInput) {
|
||||
e.preventDefault()
|
||||
goTo(currentIndex - 1)
|
||||
} else if (e.key === 'ArrowRight' && !isInput) {
|
||||
e.preventDefault()
|
||||
goTo(currentIndex + 1)
|
||||
}
|
||||
}
|
||||
|
||||
window.addEventListener('keydown', handler)
|
||||
return () => window.removeEventListener('keydown', handler)
|
||||
}, [entries.length, showSummary, confirmEntry, skipEntry, goTo, currentIndex])
|
||||
|
||||
// ---------- Computed ----------
|
||||
|
||||
const currentEntry = entries[currentIndex]
|
||||
const confirmedCount = entries.filter(e => e.status === 'confirmed').length
|
||||
const editedCount = entries.filter(e => e.status === 'edited').length
|
||||
const skippedCount = entries.filter(e => e.status === 'skipped').length
|
||||
const processedCount = confirmedCount + editedCount + skippedCount
|
||||
const progress = entries.length > 0 ? Math.round((processedCount / entries.length) * 100) : 0
|
||||
|
||||
// ---------- Render: No entries yet ----------
|
||||
|
||||
if (entries.length === 0) {
|
||||
return (
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-8 text-center" ref={panelRef}>
|
||||
<h3 className="text-lg font-semibold text-slate-900 mb-2">Ground Truth Labeling</h3>
|
||||
<p className="text-sm text-slate-500 mb-6">
|
||||
Erkennung starten um Vokabeln mit Positionen zu extrahieren.
|
||||
Danach jede Zeile durchgehen und bestaetigen oder korrigieren.
|
||||
</p>
|
||||
<button
|
||||
onClick={handleExtract}
|
||||
disabled={loading}
|
||||
className="px-6 py-3 bg-teal-600 text-white rounded-lg font-medium hover:bg-teal-700 disabled:opacity-50 transition-colors"
|
||||
>
|
||||
{loading ? (
|
||||
<span className="flex items-center gap-2">
|
||||
<svg className="animate-spin w-5 h-5" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
|
||||
</svg>
|
||||
Erkennung laeuft...
|
||||
</span>
|
||||
) : 'Erkennung starten'}
|
||||
</button>
|
||||
{error && (
|
||||
<div className="mt-4 p-3 bg-red-50 border border-red-200 rounded-lg text-red-700 text-sm">{error}</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// ---------- Render: Summary ----------
|
||||
|
||||
if (showSummary) {
|
||||
return (
|
||||
<div className="bg-white rounded-xl border border-slate-200 p-6" ref={panelRef}>
|
||||
<h3 className="text-lg font-semibold text-slate-900 mb-4">Zusammenfassung</h3>
|
||||
<div className="grid grid-cols-3 gap-4 mb-6">
|
||||
<div className="bg-green-50 border border-green-200 rounded-lg p-4 text-center">
|
||||
<div className="text-2xl font-bold text-green-700">{confirmedCount}</div>
|
||||
<div className="text-sm text-green-600">Bestaetigt</div>
|
||||
</div>
|
||||
<div className="bg-blue-50 border border-blue-200 rounded-lg p-4 text-center">
|
||||
<div className="text-2xl font-bold text-blue-700">{editedCount}</div>
|
||||
<div className="text-sm text-blue-600">Editiert</div>
|
||||
</div>
|
||||
<div className="bg-slate-50 border border-slate-200 rounded-lg p-4 text-center">
|
||||
<div className="text-2xl font-bold text-slate-700">{skippedCount}</div>
|
||||
<div className="text-sm text-slate-500">Uebersprungen</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="flex gap-3">
|
||||
<button
|
||||
onClick={handleSave}
|
||||
disabled={saving}
|
||||
className="flex-1 px-4 py-2.5 bg-teal-600 text-white rounded-lg font-medium hover:bg-teal-700 disabled:opacity-50"
|
||||
>
|
||||
{saving ? 'Speichern...' : 'Ground Truth speichern'}
|
||||
</button>
|
||||
<button
|
||||
onClick={() => { setShowSummary(false); setCurrentIndex(0) }}
|
||||
className="px-4 py-2.5 bg-slate-100 text-slate-700 rounded-lg font-medium hover:bg-slate-200"
|
||||
>
|
||||
Nochmal durchgehen
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{savedMessage && (
|
||||
<div className="mt-4 p-3 bg-green-50 border border-green-200 rounded-lg text-green-700 text-sm">
|
||||
{savedMessage}
|
||||
</div>
|
||||
)}
|
||||
{error && (
|
||||
<div className="mt-4 p-3 bg-red-50 border border-red-200 rounded-lg text-red-700 text-sm">{error}</div>
|
||||
)}
|
||||
|
||||
{/* Entry list for quick review */}
|
||||
<div className="mt-6 max-h-96 overflow-y-auto">
|
||||
<table className="w-full text-sm">
|
||||
<thead className="sticky top-0 bg-white">
|
||||
<tr className="border-b border-slate-200">
|
||||
<th className="text-left py-2 px-2 text-slate-500">#</th>
|
||||
<th className="text-left py-2 px-2 text-slate-500">English</th>
|
||||
<th className="text-left py-2 px-2 text-slate-500">Deutsch</th>
|
||||
<th className="text-left py-2 px-2 text-slate-500">Status</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{entries.map((e, i) => (
|
||||
<tr
|
||||
key={i}
|
||||
onClick={() => goTo(i)}
|
||||
className="border-b border-slate-100 hover:bg-slate-50 cursor-pointer"
|
||||
>
|
||||
<td className="py-1.5 px-2 text-slate-400">{i + 1}</td>
|
||||
<td className="py-1.5 px-2">{e.english}</td>
|
||||
<td className="py-1.5 px-2">{e.german}</td>
|
||||
<td className="py-1.5 px-2">
|
||||
<span className={`inline-block px-2 py-0.5 rounded-full text-xs font-medium ${
|
||||
e.status === 'confirmed' ? 'bg-green-100 text-green-700' :
|
||||
e.status === 'edited' ? 'bg-blue-100 text-blue-700' :
|
||||
e.status === 'skipped' ? 'bg-slate-100 text-slate-500' :
|
||||
'bg-yellow-100 text-yellow-700'
|
||||
}`}>
|
||||
{e.status === 'confirmed' ? 'OK' :
|
||||
e.status === 'edited' ? 'Editiert' :
|
||||
e.status === 'skipped' ? 'Skip' : 'Offen'}
|
||||
</span>
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// ---------- Render: Main Review UI ----------
|
||||
|
||||
return (
|
||||
<div className="bg-white rounded-xl border border-slate-200 overflow-hidden" ref={panelRef}>
|
||||
{/* Progress bar */}
|
||||
<div className="h-1.5 bg-slate-100">
|
||||
<div
|
||||
className="h-full bg-teal-500 transition-all duration-300"
|
||||
style={{ width: `${progress}%` }}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className="flex flex-col lg:flex-row">
|
||||
{/* Left: Page image with SVG overlay (2/3) */}
|
||||
<div className="lg:w-2/3 p-4">
|
||||
<div className="relative bg-slate-50 rounded-lg overflow-hidden">
|
||||
{pageImageUrl && (
|
||||
<img
|
||||
src={pageImageUrl}
|
||||
alt={`Seite ${selectedPage + 1}`}
|
||||
className="w-full"
|
||||
draggable={false}
|
||||
/>
|
||||
)}
|
||||
{/* SVG Overlay */}
|
||||
<svg
|
||||
viewBox="0 0 100 100"
|
||||
preserveAspectRatio="none"
|
||||
className="absolute inset-0 w-full h-full"
|
||||
style={{ pointerEvents: 'none' }}
|
||||
>
|
||||
{entries.map((entry, i) => {
|
||||
const colors = getEntryColor(entry, i, currentIndex)
|
||||
return (
|
||||
<rect
|
||||
key={i}
|
||||
x={entry.bbox.x}
|
||||
y={entry.bbox.y}
|
||||
width={entry.bbox.w}
|
||||
height={entry.bbox.h}
|
||||
fill={colors.fill}
|
||||
stroke={colors.stroke}
|
||||
strokeWidth={i === currentIndex ? 0.3 : 0.15}
|
||||
style={{ cursor: 'pointer', pointerEvents: 'all' }}
|
||||
onClick={() => goTo(i)}
|
||||
/>
|
||||
)
|
||||
})}
|
||||
</svg>
|
||||
</div>
|
||||
|
||||
{/* Legend */}
|
||||
<div className="flex items-center gap-4 mt-3 text-xs text-slate-500">
|
||||
<span className="flex items-center gap-1">
|
||||
<span className="w-3 h-3 rounded-sm" style={{ background: STATUS_COLORS.current.fill, border: `1px solid ${STATUS_COLORS.current.stroke}` }} /> Aktuell
|
||||
</span>
|
||||
<span className="flex items-center gap-1">
|
||||
<span className="w-3 h-3 rounded-sm" style={{ background: STATUS_COLORS.confirmed.fill, border: `1px solid ${STATUS_COLORS.confirmed.stroke}` }} /> Bestaetigt
|
||||
</span>
|
||||
<span className="flex items-center gap-1">
|
||||
<span className="w-3 h-3 rounded-sm" style={{ background: STATUS_COLORS.edited.fill, border: `1px solid ${STATUS_COLORS.edited.stroke}` }} /> Editiert
|
||||
</span>
|
||||
<span className="flex items-center gap-1">
|
||||
<span className="w-3 h-3 rounded-sm" style={{ background: STATUS_COLORS.skipped.fill, border: `1px solid ${STATUS_COLORS.skipped.stroke}` }} /> Uebersprungen
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Right: Crops + Edit fields (1/3) */}
|
||||
<div className="lg:w-1/3 border-l border-slate-200 p-4 space-y-4">
|
||||
{currentEntry && (
|
||||
<>
|
||||
{/* Row crop */}
|
||||
{imageNatural.w > 0 && (
|
||||
<ImageCrop
|
||||
imageUrl={pageImageUrl}
|
||||
bbox={currentEntry.bbox}
|
||||
naturalWidth={imageNatural.w}
|
||||
naturalHeight={imageNatural.h}
|
||||
label="Gesamte Zeile"
|
||||
/>
|
||||
)}
|
||||
|
||||
{/* Column crops */}
|
||||
{imageNatural.w > 0 && (
|
||||
<div className="grid grid-cols-3 gap-2">
|
||||
{currentEntry.bbox_en.w > 0 && (
|
||||
<ImageCrop
|
||||
imageUrl={pageImageUrl}
|
||||
bbox={currentEntry.bbox_en}
|
||||
naturalWidth={imageNatural.w}
|
||||
naturalHeight={imageNatural.h}
|
||||
maxWidth={120}
|
||||
label="EN"
|
||||
/>
|
||||
)}
|
||||
{currentEntry.bbox_de.w > 0 && (
|
||||
<ImageCrop
|
||||
imageUrl={pageImageUrl}
|
||||
bbox={currentEntry.bbox_de}
|
||||
naturalWidth={imageNatural.w}
|
||||
naturalHeight={imageNatural.h}
|
||||
maxWidth={120}
|
||||
label="DE"
|
||||
/>
|
||||
)}
|
||||
{currentEntry.bbox_ex.w > 0 && (
|
||||
<ImageCrop
|
||||
imageUrl={pageImageUrl}
|
||||
bbox={currentEntry.bbox_ex}
|
||||
naturalWidth={imageNatural.w}
|
||||
naturalHeight={imageNatural.h}
|
||||
maxWidth={120}
|
||||
label="EX"
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Confidence badge */}
|
||||
<div className="flex items-center gap-2">
|
||||
<span className={`text-xs px-2 py-0.5 rounded-full font-medium ${
|
||||
currentEntry.confidence >= 70 ? 'bg-green-100 text-green-700' :
|
||||
currentEntry.confidence >= 40 ? 'bg-yellow-100 text-yellow-700' :
|
||||
'bg-red-100 text-red-700'
|
||||
}`}>
|
||||
Konfidenz: {currentEntry.confidence}%
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Edit fields */}
|
||||
<div className="space-y-3">
|
||||
<div>
|
||||
<label className="block text-xs font-medium text-slate-500 mb-1">English</label>
|
||||
<input
|
||||
ref={enInputRef}
|
||||
type="text"
|
||||
value={editEn}
|
||||
onChange={e => setEditEn(e.target.value)}
|
||||
className="w-full px-3 py-2 border border-slate-300 rounded-lg text-sm focus:ring-2 focus:ring-teal-500 focus:border-teal-500"
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<label className="block text-xs font-medium text-slate-500 mb-1">Deutsch</label>
|
||||
<input
|
||||
type="text"
|
||||
value={editDe}
|
||||
onChange={e => setEditDe(e.target.value)}
|
||||
className="w-full px-3 py-2 border border-slate-300 rounded-lg text-sm focus:ring-2 focus:ring-teal-500 focus:border-teal-500"
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<label className="block text-xs font-medium text-slate-500 mb-1">Beispiel</label>
|
||||
<input
|
||||
type="text"
|
||||
value={editEx}
|
||||
onChange={e => setEditEx(e.target.value)}
|
||||
className="w-full px-3 py-2 border border-slate-300 rounded-lg text-sm focus:ring-2 focus:ring-teal-500 focus:border-teal-500"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Action buttons */}
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
onClick={confirmEntry}
|
||||
className="flex-1 px-4 py-2.5 bg-green-600 text-white rounded-lg font-medium hover:bg-green-700 text-sm"
|
||||
title="Enter"
|
||||
>
|
||||
OK (Enter)
|
||||
</button>
|
||||
<button
|
||||
onClick={skipEntry}
|
||||
className="flex-1 px-4 py-2.5 bg-slate-200 text-slate-700 rounded-lg font-medium hover:bg-slate-300 text-sm"
|
||||
title="Tab"
|
||||
>
|
||||
Skip (Tab)
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Navigation */}
|
||||
<div className="flex items-center justify-between">
|
||||
<button
|
||||
onClick={() => goTo(currentIndex - 1)}
|
||||
disabled={currentIndex === 0}
|
||||
className="px-3 py-1.5 bg-slate-100 rounded-lg text-sm text-slate-600 hover:bg-slate-200 disabled:opacity-30"
|
||||
>
|
||||
← Zurueck
|
||||
</button>
|
||||
<span className="text-sm text-slate-500 font-medium">
|
||||
{currentIndex + 1} / {entries.length}
|
||||
</span>
|
||||
<button
|
||||
onClick={() => goTo(currentIndex + 1)}
|
||||
disabled={currentIndex === entries.length - 1}
|
||||
className="px-3 py-1.5 bg-slate-100 rounded-lg text-sm text-slate-600 hover:bg-slate-200 disabled:opacity-30"
|
||||
>
|
||||
Weiter →
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Progress stats */}
|
||||
<div className="text-xs text-slate-400 text-center">
|
||||
{confirmedCount} bestaetigt · {editedCount} editiert · {skippedCount} uebersprungen · {progress}%
|
||||
</div>
|
||||
|
||||
{/* Keyboard hints */}
|
||||
<div className="text-xs text-slate-400 text-center border-t border-slate-100 pt-2">
|
||||
Enter = Bestaetigen · Tab = Ueberspringen · ←→ = Navigieren
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{error && (
|
||||
<div className="mx-4 mb-4 p-3 bg-red-50 border border-red-200 rounded-lg text-red-700 text-sm">{error}</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -11,3 +11,5 @@ export { CellCorrectionDialog } from './CellCorrectionDialog'
|
||||
|
||||
export { BlockReviewPanel, BlockReviewSummary } from './BlockReviewPanel'
|
||||
export type { BlockStatus, MethodResult, BlockReviewData } from './BlockReviewPanel'
|
||||
|
||||
export { GroundTruthPanel } from './GroundTruthPanel'
|
||||
|
||||
@@ -2001,3 +2001,227 @@ async def load_latest_ocr_export():
|
||||
data = json.load(f)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Ground Truth Labeling
|
||||
# =============================================================================
|
||||
|
||||
GROUND_TRUTH_DIR = os.path.join(LOCAL_STORAGE_PATH, "ground-truth")
|
||||
|
||||
|
||||
async def extract_entries_with_boxes(image_bytes: bytes, lang: str = "eng+deu") -> dict:
|
||||
"""Extract vocabulary entries with bounding boxes using Tesseract + GridDetectionService.
|
||||
|
||||
Returns dict with 'entries' list and 'image_width'/'image_height'.
|
||||
Each entry has row_index, english, german, example, confidence, bbox, bbox_en, bbox_de, bbox_ex.
|
||||
All bbox coordinates are in percent (0-100).
|
||||
"""
|
||||
if not TESSERACT_AVAILABLE:
|
||||
raise HTTPException(status_code=500, detail="Tesseract not available")
|
||||
if not GRID_SERVICE_AVAILABLE:
|
||||
raise HTTPException(status_code=500, detail="GridDetectionService not available")
|
||||
|
||||
# Step 1: Tesseract word-level bounding boxes
|
||||
tess_result = await extract_bounding_boxes(image_bytes, lang=lang)
|
||||
words = tess_result.get("words", [])
|
||||
img_w = tess_result.get("image_width", 0)
|
||||
img_h = tess_result.get("image_height", 0)
|
||||
|
||||
if not words or img_w == 0 or img_h == 0:
|
||||
return {"entries": [], "image_width": img_w, "image_height": img_h}
|
||||
|
||||
# Step 2: Convert to OCR regions (percentage-based)
|
||||
service = GridDetectionService()
|
||||
regions = service.convert_tesseract_regions(words, img_w, img_h)
|
||||
|
||||
if not regions:
|
||||
return {"entries": [], "image_width": img_w, "image_height": img_h}
|
||||
|
||||
# Step 3: Detect grid
|
||||
grid_result = service.detect_grid(regions)
|
||||
|
||||
if not grid_result.cells:
|
||||
return {"entries": [], "image_width": img_w, "image_height": img_h}
|
||||
|
||||
# Step 4: Group cells by logical_row and column_type
|
||||
from services.grid_detection_service import ColumnType
|
||||
|
||||
entries = []
|
||||
for row_idx, row_cells in enumerate(grid_result.cells):
|
||||
en_text = ""
|
||||
de_text = ""
|
||||
ex_text = ""
|
||||
en_bbox = None
|
||||
de_bbox = None
|
||||
ex_bbox = None
|
||||
row_conf_sum = 0.0
|
||||
row_conf_count = 0
|
||||
|
||||
for cell in row_cells:
|
||||
cell_bbox = {"x": round(cell.x, 2), "y": round(cell.y, 2),
|
||||
"w": round(cell.width, 2), "h": round(cell.height, 2)}
|
||||
|
||||
if cell.column_type == ColumnType.ENGLISH:
|
||||
en_text = cell.text.strip()
|
||||
en_bbox = cell_bbox
|
||||
elif cell.column_type == ColumnType.GERMAN:
|
||||
de_text = cell.text.strip()
|
||||
de_bbox = cell_bbox
|
||||
elif cell.column_type == ColumnType.EXAMPLE:
|
||||
ex_text = cell.text.strip()
|
||||
ex_bbox = cell_bbox
|
||||
|
||||
if cell.text.strip():
|
||||
row_conf_sum += cell.confidence
|
||||
row_conf_count += 1
|
||||
|
||||
# Skip completely empty rows
|
||||
if not en_text and not de_text and not ex_text:
|
||||
continue
|
||||
|
||||
# Calculate whole-row bounding box
|
||||
all_bboxes = [b for b in [en_bbox, de_bbox, ex_bbox] if b is not None]
|
||||
if all_bboxes:
|
||||
row_x = min(b["x"] for b in all_bboxes)
|
||||
row_y = min(b["y"] for b in all_bboxes)
|
||||
row_right = max(b["x"] + b["w"] for b in all_bboxes)
|
||||
row_bottom = max(b["y"] + b["h"] for b in all_bboxes)
|
||||
row_bbox = {"x": round(row_x, 2), "y": round(row_y, 2),
|
||||
"w": round(row_right - row_x, 2), "h": round(row_bottom - row_y, 2)}
|
||||
else:
|
||||
row_bbox = {"x": 0, "y": 0, "w": 100, "h": 3}
|
||||
|
||||
avg_conf = round((row_conf_sum / row_conf_count * 100) if row_conf_count > 0 else 0, 1)
|
||||
|
||||
entries.append({
|
||||
"row_index": row_idx,
|
||||
"english": en_text,
|
||||
"german": de_text,
|
||||
"example": ex_text,
|
||||
"confidence": avg_conf,
|
||||
"bbox": row_bbox,
|
||||
"bbox_en": en_bbox or {"x": 0, "y": 0, "w": 0, "h": 0},
|
||||
"bbox_de": de_bbox or {"x": 0, "y": 0, "w": 0, "h": 0},
|
||||
"bbox_ex": ex_bbox or {"x": 0, "y": 0, "w": 0, "h": 0},
|
||||
})
|
||||
|
||||
return {"entries": entries, "image_width": img_w, "image_height": img_h}
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/extract-with-boxes/{page_number}")
|
||||
async def extract_with_boxes(session_id: str, page_number: int):
|
||||
"""Extract vocabulary entries with bounding boxes for ground truth labeling.
|
||||
|
||||
Uses Tesseract + GridDetectionService for spatial positioning.
|
||||
page_number is 0-indexed.
|
||||
"""
|
||||
logger.info(f"Extract with boxes for session {session_id}, page {page_number}")
|
||||
|
||||
if session_id not in _sessions:
|
||||
raise HTTPException(status_code=404, detail="Session not found")
|
||||
|
||||
session = _sessions[session_id]
|
||||
pdf_data = session.get("pdf_data")
|
||||
|
||||
if not pdf_data:
|
||||
raise HTTPException(status_code=400, detail="No PDF uploaded for this session")
|
||||
|
||||
page_count = session.get("pdf_page_count", 1)
|
||||
if page_number < 0 or page_number >= page_count:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid page number. PDF has {page_count} pages (0-indexed).")
|
||||
|
||||
# Convert page to hires image
|
||||
image_data = await convert_pdf_page_to_image(pdf_data, page_number, thumbnail=False)
|
||||
|
||||
# Extract entries with boxes
|
||||
result = await extract_entries_with_boxes(image_data)
|
||||
|
||||
# Cache in session
|
||||
if "gt_entries" not in session:
|
||||
session["gt_entries"] = {}
|
||||
session["gt_entries"][str(page_number)] = result["entries"]
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"entries": result["entries"],
|
||||
"entry_count": len(result["entries"]),
|
||||
"image_width": result["image_width"],
|
||||
"image_height": result["image_height"],
|
||||
}
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/ground-truth/{page_number}")
|
||||
async def save_ground_truth(session_id: str, page_number: int, data: dict = Body(...)):
|
||||
"""Save ground truth labels for a page.
|
||||
|
||||
Expects body with 'entries' list - each entry has english, german, example,
|
||||
status ('confirmed' | 'edited' | 'skipped'), and bbox fields.
|
||||
"""
|
||||
logger.info(f"Save ground truth for session {session_id}, page {page_number}")
|
||||
|
||||
if session_id not in _sessions:
|
||||
raise HTTPException(status_code=404, detail="Session not found")
|
||||
|
||||
entries = data.get("entries", [])
|
||||
if not entries:
|
||||
raise HTTPException(status_code=400, detail="No entries provided")
|
||||
|
||||
# Save in session
|
||||
session = _sessions[session_id]
|
||||
if "ground_truth" not in session:
|
||||
session["ground_truth"] = {}
|
||||
session["ground_truth"][str(page_number)] = entries
|
||||
|
||||
# Also save to disk
|
||||
os.makedirs(GROUND_TRUTH_DIR, exist_ok=True)
|
||||
gt_path = os.path.join(GROUND_TRUTH_DIR, f"{session_id}_page{page_number}.json")
|
||||
gt_data = {
|
||||
"session_id": session_id,
|
||||
"page_number": page_number,
|
||||
"saved_at": datetime.now().isoformat(),
|
||||
"entry_count": len(entries),
|
||||
"entries": entries,
|
||||
}
|
||||
with open(gt_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(gt_data, f, ensure_ascii=False, indent=2)
|
||||
|
||||
logger.info(f"Ground truth saved: {len(entries)} entries to {gt_path}")
|
||||
|
||||
confirmed = sum(1 for e in entries if e.get("status") == "confirmed")
|
||||
edited = sum(1 for e in entries if e.get("status") == "edited")
|
||||
skipped = sum(1 for e in entries if e.get("status") == "skipped")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"saved_count": len(entries),
|
||||
"confirmed": confirmed,
|
||||
"edited": edited,
|
||||
"skipped": skipped,
|
||||
"file_path": gt_path,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/sessions/{session_id}/ground-truth/{page_number}")
|
||||
async def load_ground_truth(session_id: str, page_number: int):
|
||||
"""Load saved ground truth for a page."""
|
||||
logger.info(f"Load ground truth for session {session_id}, page {page_number}")
|
||||
|
||||
if session_id not in _sessions:
|
||||
raise HTTPException(status_code=404, detail="Session not found")
|
||||
|
||||
# Try session cache first
|
||||
session = _sessions[session_id]
|
||||
cached = session.get("ground_truth", {}).get(str(page_number))
|
||||
if cached:
|
||||
return {"success": True, "entries": cached, "source": "cache"}
|
||||
|
||||
# Try disk
|
||||
gt_path = os.path.join(GROUND_TRUTH_DIR, f"{session_id}_page{page_number}.json")
|
||||
if not os.path.exists(gt_path):
|
||||
raise HTTPException(status_code=404, detail="No ground truth found for this page")
|
||||
|
||||
with open(gt_path, 'r', encoding='utf-8') as f:
|
||||
gt_data = json.load(f)
|
||||
|
||||
return {"success": True, "entries": gt_data.get("entries", []), "source": "disk"}
|
||||
|
||||
Reference in New Issue
Block a user