diff --git a/admin-lehrer/app/(admin)/ai/ocr-ground-truth/page.tsx b/admin-lehrer/app/(admin)/ai/ocr-ground-truth/page.tsx index 0b9a531..4c747f1 100644 --- a/admin-lehrer/app/(admin)/ai/ocr-ground-truth/page.tsx +++ b/admin-lehrer/app/(admin)/ai/ocr-ground-truth/page.tsx @@ -1,19 +1,15 @@ 'use client' /** - * Ground-Truth Review Workflow + * Ground-Truth Queue & Progress * - * Efficient mass-review of OCR sessions: - * - Session queue with auto-advance - * - Split-view: original image left, grid right - * - Confidence highlighting on cells - * - Quick-accept per row - * - Inline cell editing - * - Batch mark as ground truth - * - Progress tracking + * Overview page showing all sessions with their GT status. + * Clicking a session opens it in the Kombi Pipeline (/ai/ocr-overlay) + * where the actual review (split-view, inline edit, GT marking) happens. */ -import { useState, useEffect, useCallback, useRef } from 'react' +import { useState, useEffect, useCallback } from 'react' +import { useRouter } from 'next/navigation' import { PagePurpose } from '@/components/common/PagePurpose' const KLAUSUR_API = '/klausur-api' @@ -32,27 +28,14 @@ interface Session { has_ground_truth: boolean } -interface GridZone { - zone_id: string - zone_type: string - columns: Array<{ col_index: number; col_type: string; header: string }> - rows: Array<{ row_index: number; is_header: boolean }> - cells: GridCell[] -} - -interface GridCell { - cell_id: string - row_index: number - col_index: number - col_type: string - text: string - confidence?: number - is_bold?: boolean -} - -interface GridResult { - zones: GridZone[] - summary?: { +interface GTSession { + session_id: string + name: string + filename: string + document_category: string | null + pipeline: string | null + saved_at: string | null + summary: { total_zones: number total_columns: number total_rows: number @@ -60,221 +43,125 @@ interface GridResult { } } -// --------------------------------------------------------------------------- -// Helpers -// --------------------------------------------------------------------------- - -function confidenceColor(conf: number | undefined): string { - if (conf === undefined) return '' - if (conf >= 80) return 'bg-emerald-50' - if (conf >= 50) return 'bg-amber-50' - return 'bg-red-50' -} - -function confidenceBorder(conf: number | undefined): string { - if (conf === undefined) return 'border-slate-200' - if (conf >= 80) return 'border-emerald-200' - if (conf >= 50) return 'border-amber-300' - return 'border-red-300' -} - // --------------------------------------------------------------------------- // Component // --------------------------------------------------------------------------- -export default function GroundTruthReviewPage() { - // Session list & queue +export default function GroundTruthQueuePage() { + const router = useRouter() const [allSessions, setAllSessions] = useState([]) - const [filter, setFilter] = useState<'all' | 'unreviewed' | 'reviewed'>('unreviewed') - const [currentIdx, setCurrentIdx] = useState(0) + const [gtSessions, setGtSessions] = useState([]) + const [filter, setFilter] = useState<'all' | 'unreviewed' | 'reviewed'>('all') const [loading, setLoading] = useState(true) - - // Current session data - const [grid, setGrid] = useState(null) - const [loadingGrid, setLoadingGrid] = useState(false) - const [editingCell, setEditingCell] = useState(null) - const [editText, setEditText] = useState('') - const [acceptedRows, setAcceptedRows] = useState>(new Set()) - const [zoom, setZoom] = useState(100) - - // Batch operations const [selectedSessions, setSelectedSessions] = useState>(new Set()) const [marking, setMarking] = useState(false) const [markResult, setMarkResult] = useState(null) - // Stats - const [reviewedCount, setReviewedCount] = useState(0) - const [totalCount, setTotalCount] = useState(0) - - const imageRef = useRef(null) - - // Load all sessions - const loadSessions = useCallback(async () => { + // Load sessions + GT sessions + const loadData = useCallback(async () => { setLoading(true) try { - const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions?limit=200`) - if (!res.ok) return - const data = await res.json() - const sessions: Session[] = (data.sessions || []).map((s: any) => ({ - id: s.id, - name: s.name || '', - filename: s.filename || '', - status: s.status || 'active', - created_at: s.created_at || '', - document_category: s.document_category || null, - has_ground_truth: !!(s.ground_truth && s.ground_truth.build_grid_reference), - })) - setAllSessions(sessions) - setTotalCount(sessions.length) - setReviewedCount(sessions.filter(s => s.has_ground_truth).length) + const [sessRes, gtRes] = await Promise.all([ + fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions?limit=200`), + fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/ground-truth-sessions`), + ]) + + if (sessRes.ok) { + const data = await sessRes.json() + const gtSet = new Set() + + if (gtRes.ok) { + const gtData = await gtRes.json() + const gts: GTSession[] = gtData.sessions || [] + setGtSessions(gts) + for (const g of gts) gtSet.add(g.session_id) + } + + const sessions: Session[] = (data.sessions || []) + .filter((s: any) => !s.parent_session_id) + .map((s: any) => ({ + id: s.id, + name: s.name || '', + filename: s.filename || '', + status: s.status || 'active', + created_at: s.created_at || '', + document_category: s.document_category || null, + has_ground_truth: gtSet.has(s.id), + })) + setAllSessions(sessions) + } } catch (e) { - console.error('Failed to load sessions:', e) + console.error('Failed to load data:', e) } finally { setLoading(false) } }, []) - useEffect(() => { loadSessions() }, [loadSessions]) + useEffect(() => { + loadData() + }, [loadData]) // Filtered sessions - const filteredSessions = allSessions.filter(s => { - if (filter === 'unreviewed') return !s.has_ground_truth && s.status === 'active' + const filteredSessions = allSessions.filter((s) => { + if (filter === 'unreviewed') return !s.has_ground_truth if (filter === 'reviewed') return s.has_ground_truth return true }) - const currentSession = filteredSessions[currentIdx] || null + const reviewedCount = allSessions.filter((s) => s.has_ground_truth).length + const totalCount = allSessions.length + const pct = totalCount > 0 ? Math.round((reviewedCount / totalCount) * 100) : 0 - // Load grid for current session - const loadGrid = useCallback(async (sessionId: string) => { - setLoadingGrid(true) - setGrid(null) - setAcceptedRows(new Set()) - setEditingCell(null) - try { - const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/grid-editor`) - if (res.ok) { - const data = await res.json() - setGrid(data.grid || data) - } - } catch (e) { - console.error('Failed to load grid:', e) - } finally { - setLoadingGrid(false) - } - }, []) - - useEffect(() => { - if (currentSession) loadGrid(currentSession.id) - }, [currentSession, loadGrid]) - - // Navigation - const goNext = () => { - if (currentIdx < filteredSessions.length - 1) setCurrentIdx(currentIdx + 1) - } - const goPrev = () => { - if (currentIdx > 0) setCurrentIdx(currentIdx - 1) + // Open session in Kombi pipeline + const openInPipeline = (sessionId: string) => { + router.push(`/ai/ocr-overlay?session=${sessionId}&mode=kombi`) } - // Accept row - const acceptRow = (zoneId: string, rowIdx: number) => { - const key = `${zoneId}-${rowIdx}` - setAcceptedRows(prev => new Set([...prev, key])) - } - - // Edit cell - const startEdit = (cell: GridCell) => { - setEditingCell(cell.cell_id) - setEditText(cell.text) - } - - const saveEdit = async () => { - if (!editingCell || !currentSession) return - try { - await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${currentSession.id}/update-cell`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ cell_id: editingCell, text: editText }), - }) - // Update local state - if (grid) { - const newGrid = { ...grid } - for (const zone of newGrid.zones) { - for (const cell of zone.cells) { - if (cell.cell_id === editingCell) { - cell.text = editText - } - } - } - setGrid(newGrid) - } - } catch (e) { - console.error('Failed to save cell:', e) - } - setEditingCell(null) - } - - // Mark as ground truth - const markGroundTruth = async (sessionId: string) => { - setMarking(true) - setMarkResult(null) - try { - const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/mark-ground-truth`, { - method: 'POST', - }) - if (res.ok) { - setMarkResult('success') - // Update local session state - setAllSessions(prev => prev.map(s => - s.id === sessionId ? { ...s, has_ground_truth: true } : s - )) - setReviewedCount(prev => prev + 1) - } else { - setMarkResult('error') - } - } catch { - setMarkResult('error') - } finally { - setMarking(false) - } - } - - // Batch mark + // Batch mark as GT const batchMark = async () => { setMarking(true) let success = 0 for (const sid of selectedSessions) { try { - const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}/mark-ground-truth`, { - method: 'POST', - }) + const res = await fetch( + `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}/mark-ground-truth?pipeline=kombi`, + { method: 'POST' }, + ) if (res.ok) success++ - } catch { /* skip */ } + } catch { + /* skip */ + } } - setAllSessions(prev => prev.map(s => - selectedSessions.has(s.id) ? { ...s, has_ground_truth: true } : s - )) - setReviewedCount(prev => prev + success) setSelectedSessions(new Set()) setMarking(false) setMarkResult(`${success} Sessions als Ground Truth markiert`) setTimeout(() => setMarkResult(null), 3000) + loadData() } - // All cells for current grid - const allCells = grid?.zones?.flatMap(z => z.cells) || [] - const lowConfCells = allCells.filter(c => (c.confidence ?? 100) < 50) + const toggleSelect = (id: string) => { + setSelectedSessions((prev) => { + const next = new Set(prev) + if (next.has(id)) next.delete(id) + else next.add(id) + return next + }) + } - const imageUrl = currentSession - ? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${currentSession.id}/image/original` - : null + const selectAll = () => { + if (selectedSessions.size === filteredSessions.length) { + setSelectedSessions(new Set()) + } else { + setSelectedSessions(new Set(filteredSessions.map((s) => s.id))) + } + } return (
-
+
{/* Progress Bar */}
-

Ground Truth Review

+

+ Ground Truth Fortschritt +

- {reviewedCount} von {totalCount} geprueft ({totalCount > 0 ? Math.round(reviewedCount / totalCount * 100) : 0}%) + {reviewedCount} von {totalCount} markiert ({pct}%)
0 ? (reviewedCount / totalCount) * 100 : 0}%` }} + style={{ width: `${pct}%` }} />
+
+ + + {reviewedCount} Ground Truth + + + + {totalCount - reviewedCount} offen + + + {gtSessions.reduce((sum, g) => sum + g.summary.total_cells, 0)}{' '} + Referenz-Zellen gesamt + +
- {/* Filter + Queue */} -
+ {/* Filter + Actions */} +
- {(['unreviewed', 'reviewed', 'all'] as const).map(f => ( + {(['all', 'unreviewed', 'reviewed'] as const).map((f) => ( ))}
- {/* Navigation */} -
- - - {filteredSessions.length > 0 ? `${currentIdx + 1} / ${filteredSessions.length}` : '—'} - - + )} +
- - {/* Batch mark button */} - {selectedSessions.size > 0 && ( - - )}
{/* Toast */} {markResult && ( -
- {markResult === 'success' ? 'Als Ground Truth markiert!' : markResult === 'error' ? 'Fehler beim Markieren' : markResult} +
+ {markResult}
)} - {/* Main Content: Split View */} + {/* Session List */} {loading ? ( -
Lade Sessions...
- ) : !currentSession ? ( +
+ Lade Sessions... +
+ ) : filteredSessions.length === 0 ? (

Keine Sessions in dieser Ansicht

) : ( -
- {/* Left: Original Image */} -
-
- - {currentSession.name || currentSession.filename} - -
- - {zoom}% - -
-
-
- {imageUrl && ( - Original scan - )} -
-
- - {/* Right: Grid Review */} -
-
-
- - {allCells.length} Zellen - - {lowConfCells.length > 0 && ( - - {lowConfCells.length} niedrige Konfidenz - - )} -
-
- {!currentSession.has_ground_truth && ( -
) diff --git a/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx b/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx index d156af8..6b9bd34 100644 --- a/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx +++ b/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx @@ -1,6 +1,7 @@ 'use client' -import { useCallback, useEffect, useState } from 'react' +import { useCallback, useEffect, useState, useRef } from 'react' +import { useSearchParams } from 'next/navigation' import { PagePurpose } from '@/components/common/PagePurpose' import { PipelineStepper } from '@/components/ocr-pipeline/PipelineStepper' import { StepOrientation } from '@/components/ocr-pipeline/StepOrientation' @@ -13,6 +14,7 @@ import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecogniti import { OverlayReconstruction } from '@/components/ocr-overlay/OverlayReconstruction' import { PaddleDirectStep } from '@/components/ocr-overlay/PaddleDirectStep' import { GridEditor } from '@/components/grid-editor/GridEditor' +import { StepGridReview } from '@/components/ocr-pipeline/StepGridReview' import { OVERLAY_PIPELINE_STEPS, PADDLE_DIRECT_STEPS, KOMBI_STEPS, DOCUMENT_CATEGORIES, dbStepToOverlayUi, type PipelineStep, type SessionListItem, type DocumentCategory } from './types' const KLAUSUR_API = '/klausur-api' @@ -39,6 +41,9 @@ export default function OcrOverlayPage() { })), ) + const searchParams = useSearchParams() + const deepLinkHandled = useRef(false) + useEffect(() => { loadSessions() }, []) @@ -114,6 +119,22 @@ export default function OcrOverlayPage() { } }, []) + // Handle deep-link: ?session=xxx&mode=kombi (from GT Queue page) + useEffect(() => { + if (deepLinkHandled.current) return + const urlSession = searchParams.get('session') + const urlMode = searchParams.get('mode') + if (urlSession) { + deepLinkHandled.current = true + if (urlMode === 'kombi' || urlMode === 'paddle-direct') { + setMode(urlMode) + const baseSteps = urlMode === 'kombi' ? KOMBI_STEPS : PADDLE_DIRECT_STEPS + setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' }))) + } + openSession(urlSession) + } + }, [searchParams, openSession]) + const deleteSession = useCallback(async (sid: string) => { try { await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, { method: 'DELETE' }) @@ -306,7 +327,7 @@ export default function OcrOverlayPage() { ) : null case 6: return mode === 'kombi' ? ( - + ) : null default: return null diff --git a/admin-lehrer/app/(admin)/ai/ocr-overlay/types.ts b/admin-lehrer/app/(admin)/ai/ocr-overlay/types.ts index fb45719..45492ce 100644 --- a/admin-lehrer/app/(admin)/ai/ocr-overlay/types.ts +++ b/admin-lehrer/app/(admin)/ai/ocr-overlay/types.ts @@ -71,7 +71,7 @@ export const KOMBI_STEPS: PipelineStep[] = [ { id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' }, { id: 'kombi', name: 'PP-OCRv5 + Tesseract', icon: '🔀', status: 'pending' }, { id: 'structure', name: 'Struktur', icon: '🔍', status: 'pending' }, - { id: 'grid-editor', name: 'Tabelle', icon: '📊', status: 'pending' }, + { id: 'grid-editor', name: 'Review & GT', icon: '📊', status: 'pending' }, ] /** Map from DB step to overlay UI step index */ diff --git a/admin-lehrer/components/ocr-pipeline/StepGridReview.tsx b/admin-lehrer/components/ocr-pipeline/StepGridReview.tsx new file mode 100644 index 0000000..ad1a488 --- /dev/null +++ b/admin-lehrer/components/ocr-pipeline/StepGridReview.tsx @@ -0,0 +1,409 @@ +'use client' + +/** + * StepGridReview — Last step of the Kombi Pipeline + * + * Split view: original scan on the left, GridEditor on the right. + * Adds confidence stats, row-accept buttons, and integrates with + * the GT marking flow in the parent page. + */ + +import { useCallback, useEffect, useState } from 'react' +import { useGridEditor } from '@/components/grid-editor/useGridEditor' +import type { GridZone } from '@/components/grid-editor/types' +import { GridToolbar } from '@/components/grid-editor/GridToolbar' +import { GridTable } from '@/components/grid-editor/GridTable' + +const KLAUSUR_API = '/klausur-api' + +interface StepGridReviewProps { + sessionId: string | null + onNext?: () => void +} + +export function StepGridReview({ sessionId, onNext }: StepGridReviewProps) { + const { + grid, + loading, + saving, + error, + dirty, + selectedCell, + setSelectedCell, + buildGrid, + loadGrid, + saveGrid, + updateCellText, + toggleColumnBold, + toggleRowHeader, + undo, + redo, + canUndo, + canRedo, + getAdjacentCell, + } = useGridEditor(sessionId) + + const [showImage, setShowImage] = useState(true) + const [zoom, setZoom] = useState(100) + const [acceptedRows, setAcceptedRows] = useState>(new Set()) + + // Load grid on mount + useEffect(() => { + if (sessionId) loadGrid() + }, [sessionId, loadGrid]) + + // Reset accepted rows when session changes + useEffect(() => { + setAcceptedRows(new Set()) + }, [sessionId]) + + // Keyboard shortcuts + useEffect(() => { + const handler = (e: KeyboardEvent) => { + if ((e.metaKey || e.ctrlKey) && e.key === 'z' && !e.shiftKey) { + e.preventDefault() + undo() + } else if ((e.metaKey || e.ctrlKey) && e.key === 'z' && e.shiftKey) { + e.preventDefault() + redo() + } else if ((e.metaKey || e.ctrlKey) && e.key === 's') { + e.preventDefault() + saveGrid() + } + } + window.addEventListener('keydown', handler) + return () => window.removeEventListener('keydown', handler) + }, [undo, redo, saveGrid]) + + const handleNavigate = useCallback( + (cellId: string, direction: 'up' | 'down' | 'left' | 'right') => { + const target = getAdjacentCell(cellId, direction) + if (target) { + setSelectedCell(target) + setTimeout(() => { + const el = document.getElementById(`cell-${target}`) + if (el) { + el.focus() + if (el instanceof HTMLInputElement) el.select() + } + }, 0) + } + }, + [getAdjacentCell, setSelectedCell], + ) + + const acceptRow = (zoneIdx: number, rowIdx: number) => { + setAcceptedRows((prev) => { + const next = new Set(prev) + const key = `${zoneIdx}-${rowIdx}` + if (next.has(key)) next.delete(key) + else next.add(key) + return next + }) + } + + const acceptAllRows = () => { + if (!grid) return + const all = new Set() + for (const zone of grid.zones) { + for (const row of zone.rows) { + all.add(`${zone.zone_index}-${row.index}`) + } + } + setAcceptedRows(all) + } + + // Confidence stats + const allCells = grid?.zones?.flatMap((z) => z.cells) || [] + const lowConfCells = allCells.filter( + (c) => c.confidence > 0 && c.confidence < 60, + ) + const totalRows = grid?.zones?.reduce((sum, z) => sum + z.rows.length, 0) ?? 0 + + if (!sessionId) { + return ( +
+ Keine Session ausgewaehlt. +
+ ) + } + + if (loading) { + return ( +
+
+ + + + + Grid wird geladen... +
+
+ ) + } + + if (error) { + return ( +
+

+ Fehler: {error} +

+ +
+ ) + } + + if (!grid || !grid.zones.length) { + return ( +
+

Kein Grid vorhanden.

+ +
+ ) + } + + const imageUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped` + + return ( +
+ {/* Review Stats Bar */} +
+ + {grid.summary.total_zones} Zone(n), {grid.summary.total_columns} Spalten,{' '} + {grid.summary.total_rows} Zeilen, {grid.summary.total_cells} Zellen + + {lowConfCells.length > 0 && ( + + {lowConfCells.length} niedrige Konfidenz + + )} + + {acceptedRows.size}/{totalRows} Zeilen akzeptiert + + {acceptedRows.size < totalRows && ( + + )} +
+ + + {grid.duration_seconds.toFixed(1)}s + +
+
+ + {/* Toolbar */} +
+ setShowImage(!showImage)} + /> +
+ + {/* Split View: Image left + Grid right */} +
+ {/* Left: Original Image */} + {showImage && ( +
+
+ + Original Scan (zugeschnitten) + +
+ + + {zoom}% + + + +
+
+
+ {/* eslint-disable-next-line @next/next/no-img-element */} + Original scan +
+
+ )} + + {/* Right: Grid with row-accept buttons */} +
+ {/* Zone tables with row-accept buttons */} + {(() => { + // Group consecutive zones with same vsplit_group + const groups: GridZone[][] = [] + for (const zone of grid.zones) { + const prev = groups[groups.length - 1] + if ( + prev && + zone.vsplit_group != null && + prev[0].vsplit_group === zone.vsplit_group + ) { + prev.push(zone) + } else { + groups.push([zone]) + } + } + return groups.map((group) => ( +
+ {/* Row-accept sidebar wraps each zone group */} +
+ {/* Accept buttons column */} +
+ {group[0].rows.map((row) => { + const key = `${group[0].zone_index}-${row.index}` + const isAccepted = acceptedRows.has(key) + return ( + + ) + })} +
+ + {/* Grid table(s) */} +
1 ? 'flex gap-2' : ''}`} + > + {group.map((zone) => ( +
1 ? 'flex-1 min-w-0' : ''} bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 overflow-hidden`} + > + +
+ ))} +
+
+
+ )) + })()} +
+
+ + {/* Tips + Next */} +
+
+ Tab: naechste Zelle + Enter: Zeile runter + Ctrl+Z/Y: Undo/Redo + Ctrl+S: Speichern +
+ {onNext && ( + + )} +
+
+ ) +}