From 8c77df494bbe8b11921d90253cc599504624eb9e Mon Sep 17 00:00:00 2001 From: BreakPilot Dev Date: Tue, 10 Feb 2026 09:04:36 +0100 Subject: [PATCH] feat(ocr): Add Ground Truth labeling UI for OCR comparison Adds a step-through tool for creating 100% correct reference data (ground truth) with position information. Users scan a page, review each vocabulary entry with image crops, confirm or correct the OCR text, and save the result as JSON. Backend: extract_entries_with_boxes() helper + 3 endpoints (extract-with-boxes, ground-truth save/load). Frontend: GroundTruthPanel component with SVG overlay, ImageCrop, keyboard shortcuts (Enter/Tab/arrows), and tab navigation in page.tsx. Co-Authored-By: Claude Opus 4.6 --- admin-v2/app/(admin)/ai/ocr-compare/page.tsx | 44 +- admin-v2/components/ocr/GroundTruthPanel.tsx | 605 ++++++++++++++++++ admin-v2/components/ocr/index.ts | 2 + .../backend/vocab_worksheet_api.py | 224 +++++++ 4 files changed, 872 insertions(+), 3 deletions(-) create mode 100644 admin-v2/components/ocr/GroundTruthPanel.tsx diff --git a/admin-v2/app/(admin)/ai/ocr-compare/page.tsx b/admin-v2/app/(admin)/ai/ocr-compare/page.tsx index 4c2e69d..d6fd961 100644 --- a/admin-v2/app/(admin)/ai/ocr-compare/page.tsx +++ b/admin-v2/app/(admin)/ai/ocr-compare/page.tsx @@ -12,7 +12,7 @@ import { useState, useEffect, useCallback, useMemo } from 'react' import { PagePurpose } from '@/components/common/PagePurpose' import { AIToolsSidebarResponsive } from '@/components/ai/AIToolsSidebar' import { QRCodeUpload, UploadedFile } from '@/components/QRCodeUpload' -import { GridOverlay, GridStats, GridLegend, CellCorrectionDialog, BlockReviewPanel, BlockReviewSummary, getCellBlockNumber } from '@/components/ocr' +import { GridOverlay, GridStats, GridLegend, CellCorrectionDialog, BlockReviewPanel, BlockReviewSummary, getCellBlockNumber, GroundTruthPanel } from '@/components/ocr' import type { GridData, GridCell, BlockReviewData, BlockStatus } from '@/components/ocr' interface VocabEntry { @@ -155,6 +155,9 @@ export default function OCRComparePage() { const [isExporting, setIsExporting] = useState(false) const [exportSuccess, setExportSuccess] = useState(false) + // Tab State (compare vs ground truth) + const [activeTab, setActiveTab] = useState<'compare' | 'groundtruth'>('compare') + const KLAUSUR_API = '/klausur-api' // Load session history @@ -1065,8 +1068,43 @@ export default function OCRComparePage() { )} + {/* Tab Bar */} + {sessionId && pageCount > 0 && ( +
+ + +
+ )} + + {/* Ground Truth Panel */} + {activeTab === 'groundtruth' && sessionId && ( + + )} + {/* Full-Width Comparison View */} - {(thumbnails[selectedPage] || result) && sessionId && ( + {activeTab === 'compare' && (thumbnails[selectedPage] || result) && sessionId && (
@@ -1477,7 +1515,7 @@ export default function OCRComparePage() { )} {/* Comparison Summary */} - {result?.comparison && ( + {activeTab === 'compare' && result?.comparison && (

Vergleichszusammenfassung

diff --git a/admin-v2/components/ocr/GroundTruthPanel.tsx b/admin-v2/components/ocr/GroundTruthPanel.tsx new file mode 100644 index 0000000..e531c3d --- /dev/null +++ b/admin-v2/components/ocr/GroundTruthPanel.tsx @@ -0,0 +1,605 @@ +'use client' + +/** + * GroundTruthPanel — Step-through UI for labeling OCR ground truth. + * + * Shows page image with SVG overlay (color-coded bounding boxes), + * alongside crops of the current entry and editable text fields. + * Keyboard-driven: Enter=confirm, Tab=skip, Arrow keys=navigate. + */ + +import { useState, useEffect, useCallback, useRef } from 'react' + +// ---------- Types ---------- + +interface BBox { + x: number + y: number + w: number + h: number +} + +interface GTEntry { + row_index: number + english: string + german: string + example: string + confidence: number + bbox: BBox + bbox_en: BBox + bbox_de: BBox + bbox_ex: BBox + status?: 'pending' | 'confirmed' | 'edited' | 'skipped' +} + +interface GroundTruthPanelProps { + sessionId: string + selectedPage: number + pageImageUrl: string +} + +// ---------- Helpers ---------- + +const STATUS_COLORS: Record = { + current: { fill: 'rgba(250,204,21,0.25)', stroke: '#eab308' }, // yellow + confirmed: { fill: 'rgba(34,197,94,0.18)', stroke: '#16a34a' }, // green + edited: { fill: 'rgba(59,130,246,0.18)', stroke: '#2563eb' }, // blue + skipped: { fill: 'rgba(148,163,184,0.15)', stroke: '#94a3b8' }, // gray + pending: { fill: 'rgba(0,0,0,0)', stroke: '#cbd5e1' }, // outline only +} + +function getEntryColor(entry: GTEntry, index: number, currentIndex: number) { + if (index === currentIndex) return STATUS_COLORS.current + return STATUS_COLORS[entry.status || 'pending'] +} + +// ---------- ImageCrop ---------- + +function ImageCrop({ imageUrl, bbox, naturalWidth, naturalHeight, maxWidth = 380, label }: { + imageUrl: string + bbox: BBox + naturalWidth: number + naturalHeight: number + maxWidth?: number + label?: string +}) { + if (!bbox || bbox.w === 0 || bbox.h === 0) return null + + const cropWPx = (bbox.w / 100) * naturalWidth + const cropHPx = (bbox.h / 100) * naturalHeight + if (cropWPx < 1 || cropHPx < 1) return null + + const scale = maxWidth / cropWPx + const displayH = cropHPx * scale + + return ( +
+ {label &&
{label}
} +
+ +
+
+ ) +} + +// ---------- Main Component ---------- + +export function GroundTruthPanel({ sessionId, selectedPage, pageImageUrl }: GroundTruthPanelProps) { + const KLAUSUR_API = '/klausur-api' + + // State + const [entries, setEntries] = useState([]) + const [currentIndex, setCurrentIndex] = useState(0) + const [loading, setLoading] = useState(false) + const [saving, setSaving] = useState(false) + const [error, setError] = useState(null) + const [imageNatural, setImageNatural] = useState({ w: 0, h: 0 }) + const [showSummary, setShowSummary] = useState(false) + const [savedMessage, setSavedMessage] = useState(null) + + // Editable fields for current entry + const [editEn, setEditEn] = useState('') + const [editDe, setEditDe] = useState('') + const [editEx, setEditEx] = useState('') + + const panelRef = useRef(null) + const enInputRef = useRef(null) + + // Load natural image dimensions + useEffect(() => { + if (!pageImageUrl) return + const img = new Image() + img.onload = () => setImageNatural({ w: img.naturalWidth, h: img.naturalHeight }) + img.src = pageImageUrl + }, [pageImageUrl]) + + // Sync edit fields when current entry changes + useEffect(() => { + const entry = entries[currentIndex] + if (entry) { + setEditEn(entry.english) + setEditDe(entry.german) + setEditEx(entry.example) + } + }, [currentIndex, entries]) + + // ---------- Actions ---------- + + const handleExtract = useCallback(async () => { + setLoading(true) + setError(null) + setShowSummary(false) + setSavedMessage(null) + try { + const res = await fetch(`${KLAUSUR_API}/api/v1/vocab/sessions/${sessionId}/extract-with-boxes/${selectedPage}`, { + method: 'POST', + }) + if (!res.ok) { + const err = await res.json().catch(() => ({ detail: res.statusText })) + throw new Error(err.detail || 'Extract failed') + } + const data = await res.json() + const loaded: GTEntry[] = (data.entries || []).map((e: GTEntry) => ({ ...e, status: 'pending' as const })) + setEntries(loaded) + setCurrentIndex(0) + } catch (err) { + setError(err instanceof Error ? err.message : 'Extraction failed') + } finally { + setLoading(false) + } + }, [sessionId, selectedPage]) + + const confirmEntry = useCallback(() => { + if (entries.length === 0) return + const entry = entries[currentIndex] + const isEdited = editEn !== entry.english || editDe !== entry.german || editEx !== entry.example + const updated = [...entries] + updated[currentIndex] = { + ...entry, + english: editEn, + german: editDe, + example: editEx, + status: isEdited ? 'edited' : 'confirmed', + } + setEntries(updated) + if (currentIndex < entries.length - 1) { + setCurrentIndex(currentIndex + 1) + } else { + setShowSummary(true) + } + }, [entries, currentIndex, editEn, editDe, editEx]) + + const skipEntry = useCallback(() => { + if (entries.length === 0) return + const updated = [...entries] + updated[currentIndex] = { ...updated[currentIndex], status: 'skipped' } + setEntries(updated) + if (currentIndex < entries.length - 1) { + setCurrentIndex(currentIndex + 1) + } else { + setShowSummary(true) + } + }, [entries, currentIndex]) + + const goTo = useCallback((idx: number) => { + if (idx >= 0 && idx < entries.length) { + setCurrentIndex(idx) + setShowSummary(false) + } + }, [entries.length]) + + const handleSave = useCallback(async () => { + setSaving(true) + setError(null) + try { + const res = await fetch(`${KLAUSUR_API}/api/v1/vocab/sessions/${sessionId}/ground-truth/${selectedPage}`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ entries }), + }) + if (!res.ok) throw new Error('Save failed') + const data = await res.json() + setSavedMessage(`Gespeichert: ${data.confirmed} bestaetigt, ${data.edited} editiert, ${data.skipped} uebersprungen`) + } catch (err) { + setError(err instanceof Error ? err.message : 'Save failed') + } finally { + setSaving(false) + } + }, [sessionId, selectedPage, entries]) + + // ---------- Keyboard shortcuts ---------- + + useEffect(() => { + if (entries.length === 0 || showSummary) return + + const handler = (e: KeyboardEvent) => { + // Don't capture when typing in inputs + const tag = (e.target as HTMLElement)?.tagName + const isInput = tag === 'INPUT' || tag === 'TEXTAREA' + + if (e.key === 'Enter' && !e.shiftKey) { + e.preventDefault() + confirmEntry() + } else if (e.key === 'Tab' && !e.shiftKey) { + if (!isInput) { + e.preventDefault() + skipEntry() + } + } else if (e.key === 'ArrowLeft' && !isInput) { + e.preventDefault() + goTo(currentIndex - 1) + } else if (e.key === 'ArrowRight' && !isInput) { + e.preventDefault() + goTo(currentIndex + 1) + } + } + + window.addEventListener('keydown', handler) + return () => window.removeEventListener('keydown', handler) + }, [entries.length, showSummary, confirmEntry, skipEntry, goTo, currentIndex]) + + // ---------- Computed ---------- + + const currentEntry = entries[currentIndex] + const confirmedCount = entries.filter(e => e.status === 'confirmed').length + const editedCount = entries.filter(e => e.status === 'edited').length + const skippedCount = entries.filter(e => e.status === 'skipped').length + const processedCount = confirmedCount + editedCount + skippedCount + const progress = entries.length > 0 ? Math.round((processedCount / entries.length) * 100) : 0 + + // ---------- Render: No entries yet ---------- + + if (entries.length === 0) { + return ( +
+

Ground Truth Labeling

+

+ Erkennung starten um Vokabeln mit Positionen zu extrahieren. + Danach jede Zeile durchgehen und bestaetigen oder korrigieren. +

+ + {error && ( +
{error}
+ )} +
+ ) + } + + // ---------- Render: Summary ---------- + + if (showSummary) { + return ( +
+

Zusammenfassung

+
+
+
{confirmedCount}
+
Bestaetigt
+
+
+
{editedCount}
+
Editiert
+
+
+
{skippedCount}
+
Uebersprungen
+
+
+ +
+ + +
+ + {savedMessage && ( +
+ {savedMessage} +
+ )} + {error && ( +
{error}
+ )} + + {/* Entry list for quick review */} +
+ + + + + + + + + + + {entries.map((e, i) => ( + goTo(i)} + className="border-b border-slate-100 hover:bg-slate-50 cursor-pointer" + > + + + + + + ))} + +
#EnglishDeutschStatus
{i + 1}{e.english}{e.german} + + {e.status === 'confirmed' ? 'OK' : + e.status === 'edited' ? 'Editiert' : + e.status === 'skipped' ? 'Skip' : 'Offen'} + +
+
+
+ ) + } + + // ---------- Render: Main Review UI ---------- + + return ( +
+ {/* Progress bar */} +
+
+
+ +
+ {/* Left: Page image with SVG overlay (2/3) */} +
+
+ {pageImageUrl && ( + {`Seite + )} + {/* SVG Overlay */} + + {entries.map((entry, i) => { + const colors = getEntryColor(entry, i, currentIndex) + return ( + goTo(i)} + /> + ) + })} + +
+ + {/* Legend */} +
+ + Aktuell + + + Bestaetigt + + + Editiert + + + Uebersprungen + +
+
+ + {/* Right: Crops + Edit fields (1/3) */} +
+ {currentEntry && ( + <> + {/* Row crop */} + {imageNatural.w > 0 && ( + + )} + + {/* Column crops */} + {imageNatural.w > 0 && ( +
+ {currentEntry.bbox_en.w > 0 && ( + + )} + {currentEntry.bbox_de.w > 0 && ( + + )} + {currentEntry.bbox_ex.w > 0 && ( + + )} +
+ )} + + {/* Confidence badge */} +
+ = 70 ? 'bg-green-100 text-green-700' : + currentEntry.confidence >= 40 ? 'bg-yellow-100 text-yellow-700' : + 'bg-red-100 text-red-700' + }`}> + Konfidenz: {currentEntry.confidence}% + +
+ + {/* Edit fields */} +
+
+ + setEditEn(e.target.value)} + className="w-full px-3 py-2 border border-slate-300 rounded-lg text-sm focus:ring-2 focus:ring-teal-500 focus:border-teal-500" + /> +
+
+ + setEditDe(e.target.value)} + className="w-full px-3 py-2 border border-slate-300 rounded-lg text-sm focus:ring-2 focus:ring-teal-500 focus:border-teal-500" + /> +
+
+ + setEditEx(e.target.value)} + className="w-full px-3 py-2 border border-slate-300 rounded-lg text-sm focus:ring-2 focus:ring-teal-500 focus:border-teal-500" + /> +
+
+ + {/* Action buttons */} +
+ + +
+ + {/* Navigation */} +
+ + + {currentIndex + 1} / {entries.length} + + +
+ + {/* Progress stats */} +
+ {confirmedCount} bestaetigt · {editedCount} editiert · {skippedCount} uebersprungen · {progress}% +
+ + {/* Keyboard hints */} +
+ Enter = Bestaetigen · Tab = Ueberspringen · ←→ = Navigieren +
+ + )} +
+
+ + {error && ( +
{error}
+ )} +
+ ) +} diff --git a/admin-v2/components/ocr/index.ts b/admin-v2/components/ocr/index.ts index 740fb82..ef158b6 100644 --- a/admin-v2/components/ocr/index.ts +++ b/admin-v2/components/ocr/index.ts @@ -11,3 +11,5 @@ export { CellCorrectionDialog } from './CellCorrectionDialog' export { BlockReviewPanel, BlockReviewSummary } from './BlockReviewPanel' export type { BlockStatus, MethodResult, BlockReviewData } from './BlockReviewPanel' + +export { GroundTruthPanel } from './GroundTruthPanel' diff --git a/klausur-service/backend/vocab_worksheet_api.py b/klausur-service/backend/vocab_worksheet_api.py index f0ce3fe..495c6b9 100644 --- a/klausur-service/backend/vocab_worksheet_api.py +++ b/klausur-service/backend/vocab_worksheet_api.py @@ -2001,3 +2001,227 @@ async def load_latest_ocr_export(): data = json.load(f) return data + + +# ============================================================================= +# Ground Truth Labeling +# ============================================================================= + +GROUND_TRUTH_DIR = os.path.join(LOCAL_STORAGE_PATH, "ground-truth") + + +async def extract_entries_with_boxes(image_bytes: bytes, lang: str = "eng+deu") -> dict: + """Extract vocabulary entries with bounding boxes using Tesseract + GridDetectionService. + + Returns dict with 'entries' list and 'image_width'/'image_height'. + Each entry has row_index, english, german, example, confidence, bbox, bbox_en, bbox_de, bbox_ex. + All bbox coordinates are in percent (0-100). + """ + if not TESSERACT_AVAILABLE: + raise HTTPException(status_code=500, detail="Tesseract not available") + if not GRID_SERVICE_AVAILABLE: + raise HTTPException(status_code=500, detail="GridDetectionService not available") + + # Step 1: Tesseract word-level bounding boxes + tess_result = await extract_bounding_boxes(image_bytes, lang=lang) + words = tess_result.get("words", []) + img_w = tess_result.get("image_width", 0) + img_h = tess_result.get("image_height", 0) + + if not words or img_w == 0 or img_h == 0: + return {"entries": [], "image_width": img_w, "image_height": img_h} + + # Step 2: Convert to OCR regions (percentage-based) + service = GridDetectionService() + regions = service.convert_tesseract_regions(words, img_w, img_h) + + if not regions: + return {"entries": [], "image_width": img_w, "image_height": img_h} + + # Step 3: Detect grid + grid_result = service.detect_grid(regions) + + if not grid_result.cells: + return {"entries": [], "image_width": img_w, "image_height": img_h} + + # Step 4: Group cells by logical_row and column_type + from services.grid_detection_service import ColumnType + + entries = [] + for row_idx, row_cells in enumerate(grid_result.cells): + en_text = "" + de_text = "" + ex_text = "" + en_bbox = None + de_bbox = None + ex_bbox = None + row_conf_sum = 0.0 + row_conf_count = 0 + + for cell in row_cells: + cell_bbox = {"x": round(cell.x, 2), "y": round(cell.y, 2), + "w": round(cell.width, 2), "h": round(cell.height, 2)} + + if cell.column_type == ColumnType.ENGLISH: + en_text = cell.text.strip() + en_bbox = cell_bbox + elif cell.column_type == ColumnType.GERMAN: + de_text = cell.text.strip() + de_bbox = cell_bbox + elif cell.column_type == ColumnType.EXAMPLE: + ex_text = cell.text.strip() + ex_bbox = cell_bbox + + if cell.text.strip(): + row_conf_sum += cell.confidence + row_conf_count += 1 + + # Skip completely empty rows + if not en_text and not de_text and not ex_text: + continue + + # Calculate whole-row bounding box + all_bboxes = [b for b in [en_bbox, de_bbox, ex_bbox] if b is not None] + if all_bboxes: + row_x = min(b["x"] for b in all_bboxes) + row_y = min(b["y"] for b in all_bboxes) + row_right = max(b["x"] + b["w"] for b in all_bboxes) + row_bottom = max(b["y"] + b["h"] for b in all_bboxes) + row_bbox = {"x": round(row_x, 2), "y": round(row_y, 2), + "w": round(row_right - row_x, 2), "h": round(row_bottom - row_y, 2)} + else: + row_bbox = {"x": 0, "y": 0, "w": 100, "h": 3} + + avg_conf = round((row_conf_sum / row_conf_count * 100) if row_conf_count > 0 else 0, 1) + + entries.append({ + "row_index": row_idx, + "english": en_text, + "german": de_text, + "example": ex_text, + "confidence": avg_conf, + "bbox": row_bbox, + "bbox_en": en_bbox or {"x": 0, "y": 0, "w": 0, "h": 0}, + "bbox_de": de_bbox or {"x": 0, "y": 0, "w": 0, "h": 0}, + "bbox_ex": ex_bbox or {"x": 0, "y": 0, "w": 0, "h": 0}, + }) + + return {"entries": entries, "image_width": img_w, "image_height": img_h} + + +@router.post("/sessions/{session_id}/extract-with-boxes/{page_number}") +async def extract_with_boxes(session_id: str, page_number: int): + """Extract vocabulary entries with bounding boxes for ground truth labeling. + + Uses Tesseract + GridDetectionService for spatial positioning. + page_number is 0-indexed. + """ + logger.info(f"Extract with boxes for session {session_id}, page {page_number}") + + if session_id not in _sessions: + raise HTTPException(status_code=404, detail="Session not found") + + session = _sessions[session_id] + pdf_data = session.get("pdf_data") + + if not pdf_data: + raise HTTPException(status_code=400, detail="No PDF uploaded for this session") + + page_count = session.get("pdf_page_count", 1) + if page_number < 0 or page_number >= page_count: + raise HTTPException(status_code=400, detail=f"Invalid page number. PDF has {page_count} pages (0-indexed).") + + # Convert page to hires image + image_data = await convert_pdf_page_to_image(pdf_data, page_number, thumbnail=False) + + # Extract entries with boxes + result = await extract_entries_with_boxes(image_data) + + # Cache in session + if "gt_entries" not in session: + session["gt_entries"] = {} + session["gt_entries"][str(page_number)] = result["entries"] + + return { + "success": True, + "entries": result["entries"], + "entry_count": len(result["entries"]), + "image_width": result["image_width"], + "image_height": result["image_height"], + } + + +@router.post("/sessions/{session_id}/ground-truth/{page_number}") +async def save_ground_truth(session_id: str, page_number: int, data: dict = Body(...)): + """Save ground truth labels for a page. + + Expects body with 'entries' list - each entry has english, german, example, + status ('confirmed' | 'edited' | 'skipped'), and bbox fields. + """ + logger.info(f"Save ground truth for session {session_id}, page {page_number}") + + if session_id not in _sessions: + raise HTTPException(status_code=404, detail="Session not found") + + entries = data.get("entries", []) + if not entries: + raise HTTPException(status_code=400, detail="No entries provided") + + # Save in session + session = _sessions[session_id] + if "ground_truth" not in session: + session["ground_truth"] = {} + session["ground_truth"][str(page_number)] = entries + + # Also save to disk + os.makedirs(GROUND_TRUTH_DIR, exist_ok=True) + gt_path = os.path.join(GROUND_TRUTH_DIR, f"{session_id}_page{page_number}.json") + gt_data = { + "session_id": session_id, + "page_number": page_number, + "saved_at": datetime.now().isoformat(), + "entry_count": len(entries), + "entries": entries, + } + with open(gt_path, 'w', encoding='utf-8') as f: + json.dump(gt_data, f, ensure_ascii=False, indent=2) + + logger.info(f"Ground truth saved: {len(entries)} entries to {gt_path}") + + confirmed = sum(1 for e in entries if e.get("status") == "confirmed") + edited = sum(1 for e in entries if e.get("status") == "edited") + skipped = sum(1 for e in entries if e.get("status") == "skipped") + + return { + "success": True, + "saved_count": len(entries), + "confirmed": confirmed, + "edited": edited, + "skipped": skipped, + "file_path": gt_path, + } + + +@router.get("/sessions/{session_id}/ground-truth/{page_number}") +async def load_ground_truth(session_id: str, page_number: int): + """Load saved ground truth for a page.""" + logger.info(f"Load ground truth for session {session_id}, page {page_number}") + + if session_id not in _sessions: + raise HTTPException(status_code=404, detail="Session not found") + + # Try session cache first + session = _sessions[session_id] + cached = session.get("ground_truth", {}).get(str(page_number)) + if cached: + return {"success": True, "entries": cached, "source": "cache"} + + # Try disk + gt_path = os.path.join(GROUND_TRUTH_DIR, f"{session_id}_page{page_number}.json") + if not os.path.exists(gt_path): + raise HTTPException(status_code=404, detail="No ground truth found for this page") + + with open(gt_path, 'r', encoding='utf-8') as f: + gt_data = json.load(f) + + return {"success": True, "entries": gt_data.get("entries", []), "source": "disk"}