From c3f1547e32b8a59135f1a5e362224e59387ecfa9 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Sat, 14 Mar 2026 23:41:03 +0100 Subject: [PATCH] feat: add Excel-like grid editor for OCR overlay (Kombi mode step 6) Backend: new grid_editor_api.py with build-grid endpoint that detects bordered boxes, splits page into zones, clusters columns/rows per zone from Kombi word positions. New DB column grid_editor_result JSONB. Frontend: GridEditor component with editable HTML tables per zone, column bold toggle, header row toggle, undo/redo, keyboard navigation (Tab/Enter/Arrow), image overlay verification, and save/load. Co-Authored-By: Claude Opus 4.6 --- .../app/(admin)/ai/ocr-overlay/page.tsx | 15 +- .../app/(admin)/ai/ocr-overlay/types.ts | 1 + .../components/grid-editor/GridEditor.tsx | 218 +++++++++ .../grid-editor/GridImageOverlay.tsx | 98 ++++ .../components/grid-editor/GridTable.tsx | 153 +++++++ .../components/grid-editor/GridToolbar.tsx | 110 +++++ admin-lehrer/components/grid-editor/index.ts | 6 + admin-lehrer/components/grid-editor/types.ts | 97 ++++ .../components/grid-editor/useGridEditor.ts | 288 ++++++++++++ klausur-service/backend/grid_editor_api.py | 426 ++++++++++++++++++ klausur-service/backend/main.py | 2 + .../migrations/007_add_grid_editor_result.sql | 4 + .../backend/ocr_pipeline_session_store.py | 11 +- 13 files changed, 1423 insertions(+), 6 deletions(-) create mode 100644 admin-lehrer/components/grid-editor/GridEditor.tsx create mode 100644 admin-lehrer/components/grid-editor/GridImageOverlay.tsx create mode 100644 admin-lehrer/components/grid-editor/GridTable.tsx create mode 100644 admin-lehrer/components/grid-editor/GridToolbar.tsx create mode 100644 admin-lehrer/components/grid-editor/index.ts create mode 100644 admin-lehrer/components/grid-editor/types.ts create mode 100644 admin-lehrer/components/grid-editor/useGridEditor.ts create mode 100644 klausur-service/backend/grid_editor_api.py create mode 100644 klausur-service/backend/migrations/007_add_grid_editor_result.sql diff --git a/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx b/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx index 1c3ded3..f89c0ba 100644 --- a/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx +++ b/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx @@ -11,6 +11,7 @@ import { StepRowDetection } from '@/components/ocr-pipeline/StepRowDetection' import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition' import { OverlayReconstruction } from '@/components/ocr-overlay/OverlayReconstruction' import { PaddleDirectStep } from '@/components/ocr-overlay/PaddleDirectStep' +import { GridEditor } from '@/components/grid-editor/GridEditor' import { OVERLAY_PIPELINE_STEPS, PADDLE_DIRECT_STEPS, KOMBI_STEPS, DOCUMENT_CATEGORIES, dbStepToOverlayUi, type PipelineStep, type SessionListItem, type DocumentCategory } from './types' const KLAUSUR_API = '/klausur-api' @@ -72,13 +73,17 @@ export default function OcrOverlayPage() { const m = isKombi ? 'kombi' : 'paddle-direct' const baseSteps = isKombi ? KOMBI_STEPS : PADDLE_DIRECT_STEPS setMode(m) + + // For Kombi: if grid_editor_result exists, jump to grid editor step (5) + const hasGrid = isKombi && data.grid_editor_result + const activeStep = hasGrid ? 5 : 4 setSteps( baseSteps.map((s, i) => ({ ...s, - status: i < 4 ? 'completed' : i === 4 ? 'active' : 'pending', + status: i < activeStep ? 'completed' : i === activeStep ? 'active' : 'pending', })), ) - setCurrentStep(4) + setCurrentStep(activeStep) } else { setMode('pipeline') // Map DB step to overlay UI step @@ -256,6 +261,10 @@ export default function OcrOverlayPage() { ) : ( ) + case 5: + return mode === 'kombi' ? ( + + ) : null default: return null } @@ -512,7 +521,7 @@ export default function OcrOverlayPage() { : 'text-gray-500 dark:text-gray-400 hover:text-gray-700 dark:hover:text-gray-300' }`} > - Kombi (5 Schritte) + Kombi (6 Schritte) diff --git a/admin-lehrer/app/(admin)/ai/ocr-overlay/types.ts b/admin-lehrer/app/(admin)/ai/ocr-overlay/types.ts index 993092a..ee01e10 100644 --- a/admin-lehrer/app/(admin)/ai/ocr-overlay/types.ts +++ b/admin-lehrer/app/(admin)/ai/ocr-overlay/types.ts @@ -70,6 +70,7 @@ export const KOMBI_STEPS: PipelineStep[] = [ { id: 'dewarp', name: 'Entzerrung', icon: 'πŸ”§', status: 'pending' }, { id: 'crop', name: 'Zuschneiden', icon: 'βœ‚οΈ', status: 'pending' }, { id: 'kombi', name: 'PP-OCRv5 + Tesseract', icon: 'πŸ”€', status: 'pending' }, + { id: 'grid-editor', name: 'Tabelle', icon: 'πŸ“Š', status: 'pending' }, ] /** Map from DB step to overlay UI step index */ diff --git a/admin-lehrer/components/grid-editor/GridEditor.tsx b/admin-lehrer/components/grid-editor/GridEditor.tsx new file mode 100644 index 0000000..18af07e --- /dev/null +++ b/admin-lehrer/components/grid-editor/GridEditor.tsx @@ -0,0 +1,218 @@ +'use client' + +import { useCallback, useEffect, useState } from 'react' +import { useGridEditor } from './useGridEditor' +import { GridToolbar } from './GridToolbar' +import { GridTable } from './GridTable' +import { GridImageOverlay } from './GridImageOverlay' + +interface GridEditorProps { + sessionId: string | null + onNext?: () => void +} + +export function GridEditor({ sessionId, onNext }: GridEditorProps) { + const { + grid, + loading, + saving, + error, + dirty, + selectedCell, + setSelectedCell, + buildGrid, + loadGrid, + saveGrid, + updateCellText, + toggleColumnBold, + toggleRowHeader, + undo, + redo, + canUndo, + canRedo, + getAdjacentCell, + } = useGridEditor(sessionId) + + const [showOverlay, setShowOverlay] = useState(false) + + // Load grid on mount + useEffect(() => { + if (sessionId) { + loadGrid() + } + }, [sessionId, loadGrid]) + + // Keyboard shortcuts + useEffect(() => { + const handler = (e: KeyboardEvent) => { + if ((e.metaKey || e.ctrlKey) && e.key === 'z' && !e.shiftKey) { + e.preventDefault() + undo() + } else if ((e.metaKey || e.ctrlKey) && e.key === 'z' && e.shiftKey) { + e.preventDefault() + redo() + } else if ((e.metaKey || e.ctrlKey) && e.key === 's') { + e.preventDefault() + saveGrid() + } + } + window.addEventListener('keydown', handler) + return () => window.removeEventListener('keydown', handler) + }, [undo, redo, saveGrid]) + + const handleNavigate = useCallback( + (cellId: string, direction: 'up' | 'down' | 'left' | 'right') => { + const target = getAdjacentCell(cellId, direction) + if (target) { + setSelectedCell(target) + // Focus the input + setTimeout(() => { + const el = document.getElementById(`cell-${target}`) + if (el) { + el.focus() + if (el instanceof HTMLInputElement) el.select() + } + }, 0) + } + }, + [getAdjacentCell, setSelectedCell], + ) + + if (!sessionId) { + return ( +
+ Keine Session ausgewaehlt. +
+ ) + } + + if (loading) { + return ( +
+
+ + + + + Grid wird aufgebaut... +
+
+ ) + } + + if (error) { + return ( +
+

+ Fehler: {error} +

+ +
+ ) + } + + if (!grid || !grid.zones.length) { + return ( +
+

Kein Grid vorhanden.

+ +
+ ) + } + + return ( +
+ {/* Summary bar */} +
+ {grid.summary.total_zones} Zone(n) + {grid.summary.total_columns} Spalten + {grid.summary.total_rows} Zeilen + {grid.summary.total_cells} Zellen + {grid.boxes_detected > 0 && ( + + {grid.boxes_detected} Box(en) erkannt + + )} + + {grid.duration_seconds.toFixed(1)}s + +
+ + {/* Toolbar */} +
+ setShowOverlay(!showOverlay)} + /> +
+ + {/* Image overlay */} + {showOverlay && ( + + )} + + {/* Zone tables */} +
+ {grid.zones.map((zone) => ( +
+ +
+ ))} +
+ + {/* Tip */} +
+ Tab: naechste Zelle + Enter: Zeile runter + Spalte fett: Klick auf Spaltenkopf + Header: Klick auf Zeilennummer + Ctrl+Z/Y: Undo/Redo + Ctrl+S: Speichern +
+ + {/* Next step button */} + {onNext && ( +
+ +
+ )} +
+ ) +} diff --git a/admin-lehrer/components/grid-editor/GridImageOverlay.tsx b/admin-lehrer/components/grid-editor/GridImageOverlay.tsx new file mode 100644 index 0000000..9688dce --- /dev/null +++ b/admin-lehrer/components/grid-editor/GridImageOverlay.tsx @@ -0,0 +1,98 @@ +'use client' + +import type { StructuredGrid } from './types' + +const KLAUSUR_API = '/klausur-api' + +interface GridImageOverlayProps { + sessionId: string + grid: StructuredGrid +} + +const ZONE_COLORS = [ + { border: 'rgba(20,184,166,0.7)', fill: 'rgba(20,184,166,0.05)' }, // teal + { border: 'rgba(245,158,11,0.7)', fill: 'rgba(245,158,11,0.05)' }, // amber + { border: 'rgba(99,102,241,0.7)', fill: 'rgba(99,102,241,0.05)' }, // indigo + { border: 'rgba(236,72,153,0.7)', fill: 'rgba(236,72,153,0.05)' }, // pink +] + +export function GridImageOverlay({ sessionId, grid }: GridImageOverlayProps) { + const imgUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped` + + return ( +
+
+ {/* Source image */} + {/* eslint-disable-next-line @next/next/no-img-element */} + OCR Scan + + {/* SVG overlay */} + + {grid.zones.map((zone) => { + const colors = ZONE_COLORS[zone.zone_index % ZONE_COLORS.length] + const b = zone.bbox_px + + return ( + + {/* Zone border */} + + + {/* Column separators */} + {zone.columns.slice(1).map((col) => ( + + ))} + + {/* Row separators */} + {zone.rows.slice(1).map((row) => ( + + ))} + + {/* Zone label */} + + {zone.zone_type === 'box' ? 'BOX' : 'CONTENT'} Z{zone.zone_index} + {' '}({zone.columns.length}x{zone.rows.length}) + + + ) + })} + +
+
+ ) +} diff --git a/admin-lehrer/components/grid-editor/GridTable.tsx b/admin-lehrer/components/grid-editor/GridTable.tsx new file mode 100644 index 0000000..106ed39 --- /dev/null +++ b/admin-lehrer/components/grid-editor/GridTable.tsx @@ -0,0 +1,153 @@ +'use client' + +import { useCallback, useRef } from 'react' +import type { GridZone } from './types' + +interface GridTableProps { + zone: GridZone + selectedCell: string | null + onSelectCell: (cellId: string) => void + onCellTextChange: (cellId: string, text: string) => void + onToggleColumnBold: (zoneIndex: number, colIndex: number) => void + onToggleRowHeader: (zoneIndex: number, rowIndex: number) => void + onNavigate: (cellId: string, direction: 'up' | 'down' | 'left' | 'right') => void +} + +export function GridTable({ + zone, + selectedCell, + onSelectCell, + onCellTextChange, + onToggleColumnBold, + onToggleRowHeader, + onNavigate, +}: GridTableProps) { + const tableRef = useRef(null) + + const handleKeyDown = useCallback( + (e: React.KeyboardEvent, cellId: string) => { + if (e.key === 'Tab') { + e.preventDefault() + onNavigate(cellId, e.shiftKey ? 'left' : 'right') + } else if (e.key === 'Enter' && !e.shiftKey) { + e.preventDefault() + onNavigate(cellId, 'down') + } else if (e.key === 'ArrowUp' && e.altKey) { + e.preventDefault() + onNavigate(cellId, 'up') + } else if (e.key === 'ArrowDown' && e.altKey) { + e.preventDefault() + onNavigate(cellId, 'down') + } else if (e.key === 'Escape') { + ;(e.target as HTMLElement).blur() + } + }, + [onNavigate], + ) + + // Build row→col cell lookup + const cellMap = new Map() + for (const cell of zone.cells) { + cellMap.set(`${cell.row_index}_${cell.col_index}`, cell) + } + + const isBoxZone = zone.zone_type === 'box' + + return ( +
+ {/* Zone label */} +
+ + {isBoxZone ? 'Box' : 'Inhalt'} Zone {zone.zone_index} + + {zone.columns.length} Spalten, {zone.rows.length} Zeilen, {zone.cells.length} Zellen +
+ + + {/* Column headers */} + + + {/* Row number header */} + + ))} + + + + + {zone.rows.map((row) => ( + + {/* Row number */} + + + {/* Cells */} + {zone.columns.map((col) => { + const cell = cellMap.get(`${row.index}_${col.index}`) + const cellId = cell?.cell_id ?? `Z${zone.zone_index}_R${String(row.index).padStart(2, '0')}_C${col.index}` + const isSelected = selectedCell === cellId + const isBold = col.bold || cell?.is_bold + const isLowConf = cell && cell.confidence > 0 && cell.confidence < 60 + + return ( + + ) + })} + + ))} + +
+ {zone.columns.map((col) => ( + onToggleColumnBold(zone.zone_index, col.index)} + title={`Spalte ${col.index + 1} β€” Klick fuer Fett-Toggle`} + > +
+ {col.label} + {col.bold && ( + + B + + )} +
+
onToggleRowHeader(zone.zone_index, row.index)} + title={`Zeile ${row.index + 1} β€” Klick fuer Header-Toggle`} + > + {row.index + 1} + {row.is_header && H} + + { + if (cell) onCellTextChange(cellId, e.target.value) + }} + onFocus={() => onSelectCell(cellId)} + onKeyDown={(e) => handleKeyDown(e, cellId)} + className={`w-full px-2 py-1.5 bg-transparent border-0 outline-none text-gray-800 dark:text-gray-200 ${ + isBold ? 'font-bold' : 'font-normal' + } ${row.is_header ? 'text-base' : 'text-sm'}`} + spellCheck={false} + /> +
+
+ ) +} diff --git a/admin-lehrer/components/grid-editor/GridToolbar.tsx b/admin-lehrer/components/grid-editor/GridToolbar.tsx new file mode 100644 index 0000000..dc7100a --- /dev/null +++ b/admin-lehrer/components/grid-editor/GridToolbar.tsx @@ -0,0 +1,110 @@ +'use client' + +interface GridToolbarProps { + dirty: boolean + saving: boolean + canUndo: boolean + canRedo: boolean + showOverlay: boolean + onSave: () => void + onUndo: () => void + onRedo: () => void + onRebuild: () => void + onToggleOverlay: () => void +} + +export function GridToolbar({ + dirty, + saving, + canUndo, + canRedo, + showOverlay, + onSave, + onUndo, + onRedo, + onRebuild, + onToggleOverlay, +}: GridToolbarProps) { + return ( +
+ {/* Undo / Redo */} +
+ + +
+ + {/* Overlay toggle */} + + + {/* Rebuild */} + + + {/* Spacer */} +
+ + {/* Save */} + +
+ ) +} diff --git a/admin-lehrer/components/grid-editor/index.ts b/admin-lehrer/components/grid-editor/index.ts new file mode 100644 index 0000000..07605eb --- /dev/null +++ b/admin-lehrer/components/grid-editor/index.ts @@ -0,0 +1,6 @@ +export { GridEditor } from './GridEditor' +export { GridTable } from './GridTable' +export { GridToolbar } from './GridToolbar' +export { GridImageOverlay } from './GridImageOverlay' +export { useGridEditor } from './useGridEditor' +export type * from './types' diff --git a/admin-lehrer/components/grid-editor/types.ts b/admin-lehrer/components/grid-editor/types.ts new file mode 100644 index 0000000..fa596c5 --- /dev/null +++ b/admin-lehrer/components/grid-editor/types.ts @@ -0,0 +1,97 @@ +import type { OcrWordBox } from '@/app/(admin)/ai/ocr-pipeline/types' + +// Re-export for convenience +export type { OcrWordBox } + +/** A complete structured grid with zones, ready for the Excel-like editor. */ +export interface StructuredGrid { + session_id: string + image_width: number + image_height: number + zones: GridZone[] + boxes_detected: number + summary: GridSummary + formatting: GridFormatting + duration_seconds: number + edited?: boolean +} + +export interface GridSummary { + total_zones: number + total_columns: number + total_rows: number + total_cells: number + total_words: number +} + +export interface GridFormatting { + bold_columns: number[] + header_rows: number[] +} + +/** A horizontal zone of the page β€” either content or a bordered box. */ +export interface GridZone { + zone_index: number + zone_type: 'content' | 'box' + bbox_px: BBox + bbox_pct: BBox + border: ZoneBorder | null + word_count: number + columns: GridColumn[] + rows: GridRow[] + cells: GridEditorCell[] + header_rows: number[] +} + +export interface BBox { + x: number + y: number + w: number + h: number +} + +export interface ZoneBorder { + thickness: number + confidence: number +} + +export interface GridColumn { + index: number + label: string + x_min_px: number + x_max_px: number + x_min_pct: number + x_max_pct: number + bold: boolean +} + +export interface GridRow { + index: number + y_min_px: number + y_max_px: number + y_min_pct: number + y_max_pct: number + is_header: boolean +} + +export interface GridEditorCell { + cell_id: string + zone_index: number + row_index: number + col_index: number + col_type: string + text: string + confidence: number + bbox_px: BBox + bbox_pct: BBox + word_boxes: OcrWordBox[] + ocr_engine: string + is_bold: boolean +} + +/** Cell formatting applied by the user in the editor. */ +export interface CellFormatting { + bold: boolean + fontSize: 'small' | 'normal' | 'large' + align: 'left' | 'center' | 'right' +} diff --git a/admin-lehrer/components/grid-editor/useGridEditor.ts b/admin-lehrer/components/grid-editor/useGridEditor.ts new file mode 100644 index 0000000..0bd2491 --- /dev/null +++ b/admin-lehrer/components/grid-editor/useGridEditor.ts @@ -0,0 +1,288 @@ +import { useCallback, useRef, useState } from 'react' +import type { StructuredGrid, GridZone } from './types' + +const KLAUSUR_API = '/klausur-api' +const MAX_UNDO = 50 + +export interface GridEditorState { + grid: StructuredGrid | null + loading: boolean + saving: boolean + error: string | null + dirty: boolean + selectedCell: string | null + selectedZone: number | null +} + +export function useGridEditor(sessionId: string | null) { + const [grid, setGrid] = useState(null) + const [loading, setLoading] = useState(false) + const [saving, setSaving] = useState(false) + const [error, setError] = useState(null) + const [dirty, setDirty] = useState(false) + const [selectedCell, setSelectedCell] = useState(null) + const [selectedZone, setSelectedZone] = useState(null) + + // Undo/redo stacks store serialized zone arrays + const undoStack = useRef([]) + const redoStack = useRef([]) + + const pushUndo = useCallback((zones: GridZone[]) => { + undoStack.current.push(JSON.stringify(zones)) + if (undoStack.current.length > MAX_UNDO) { + undoStack.current.shift() + } + redoStack.current = [] + }, []) + + // ------------------------------------------------------------------ + // Load / Build + // ------------------------------------------------------------------ + + const buildGrid = useCallback(async () => { + if (!sessionId) return + setLoading(true) + setError(null) + try { + const res = await fetch( + `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/build-grid`, + { method: 'POST' }, + ) + if (!res.ok) { + const data = await res.json().catch(() => ({})) + throw new Error(data.detail || `HTTP ${res.status}`) + } + const data: StructuredGrid = await res.json() + setGrid(data) + setDirty(false) + undoStack.current = [] + redoStack.current = [] + } catch (e) { + setError(e instanceof Error ? e.message : String(e)) + } finally { + setLoading(false) + } + }, [sessionId]) + + const loadGrid = useCallback(async () => { + if (!sessionId) return + setLoading(true) + setError(null) + try { + const res = await fetch( + `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/grid-editor`, + ) + if (res.status === 404) { + // No grid yet β€” build it + await buildGrid() + return + } + if (!res.ok) { + const data = await res.json().catch(() => ({})) + throw new Error(data.detail || `HTTP ${res.status}`) + } + const data: StructuredGrid = await res.json() + setGrid(data) + setDirty(false) + undoStack.current = [] + redoStack.current = [] + } catch (e) { + setError(e instanceof Error ? e.message : String(e)) + } finally { + setLoading(false) + } + }, [sessionId, buildGrid]) + + // ------------------------------------------------------------------ + // Save + // ------------------------------------------------------------------ + + const saveGrid = useCallback(async () => { + if (!sessionId || !grid) return + setSaving(true) + try { + const res = await fetch( + `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/save-grid`, + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(grid), + }, + ) + if (!res.ok) { + const data = await res.json().catch(() => ({})) + throw new Error(data.detail || `HTTP ${res.status}`) + } + setDirty(false) + } catch (e) { + setError(e instanceof Error ? e.message : String(e)) + } finally { + setSaving(false) + } + }, [sessionId, grid]) + + // ------------------------------------------------------------------ + // Cell editing + // ------------------------------------------------------------------ + + const updateCellText = useCallback( + (cellId: string, newText: string) => { + if (!grid) return + pushUndo(grid.zones) + + setGrid((prev) => { + if (!prev) return prev + return { + ...prev, + zones: prev.zones.map((zone) => ({ + ...zone, + cells: zone.cells.map((cell) => + cell.cell_id === cellId ? { ...cell, text: newText } : cell, + ), + })), + } + }) + setDirty(true) + }, + [grid, pushUndo], + ) + + // ------------------------------------------------------------------ + // Column formatting + // ------------------------------------------------------------------ + + const toggleColumnBold = useCallback( + (zoneIndex: number, colIndex: number) => { + if (!grid) return + pushUndo(grid.zones) + + setGrid((prev) => { + if (!prev) return prev + return { + ...prev, + zones: prev.zones.map((zone) => { + if (zone.zone_index !== zoneIndex) return zone + const col = zone.columns.find((c) => c.index === colIndex) + const newBold = col ? !col.bold : true + return { + ...zone, + columns: zone.columns.map((c) => + c.index === colIndex ? { ...c, bold: newBold } : c, + ), + cells: zone.cells.map((cell) => + cell.col_index === colIndex + ? { ...cell, is_bold: newBold } + : cell, + ), + } + }), + } + }) + setDirty(true) + }, + [grid, pushUndo], + ) + + // ------------------------------------------------------------------ + // Row formatting + // ------------------------------------------------------------------ + + const toggleRowHeader = useCallback( + (zoneIndex: number, rowIndex: number) => { + if (!grid) return + pushUndo(grid.zones) + + setGrid((prev) => { + if (!prev) return prev + return { + ...prev, + zones: prev.zones.map((zone) => { + if (zone.zone_index !== zoneIndex) return zone + return { + ...zone, + rows: zone.rows.map((r) => + r.index === rowIndex ? { ...r, is_header: !r.is_header } : r, + ), + } + }), + } + }) + setDirty(true) + }, + [grid, pushUndo], + ) + + // ------------------------------------------------------------------ + // Undo / Redo + // ------------------------------------------------------------------ + + const undo = useCallback(() => { + if (!grid || undoStack.current.length === 0) return + redoStack.current.push(JSON.stringify(grid.zones)) + const prev = undoStack.current.pop()! + setGrid((g) => (g ? { ...g, zones: JSON.parse(prev) } : g)) + setDirty(true) + }, [grid]) + + const redo = useCallback(() => { + if (!grid || redoStack.current.length === 0) return + undoStack.current.push(JSON.stringify(grid.zones)) + const next = redoStack.current.pop()! + setGrid((g) => (g ? { ...g, zones: JSON.parse(next) } : g)) + setDirty(true) + }, [grid]) + + const canUndo = undoStack.current.length > 0 + const canRedo = redoStack.current.length > 0 + + // ------------------------------------------------------------------ + // Navigation helpers + // ------------------------------------------------------------------ + + const getAdjacentCell = useCallback( + (cellId: string, direction: 'up' | 'down' | 'left' | 'right'): string | null => { + if (!grid) return null + for (const zone of grid.zones) { + const cell = zone.cells.find((c) => c.cell_id === cellId) + if (!cell) continue + + let targetRow = cell.row_index + let targetCol = cell.col_index + if (direction === 'up') targetRow-- + if (direction === 'down') targetRow++ + if (direction === 'left') targetCol-- + if (direction === 'right') targetCol++ + + const target = zone.cells.find( + (c) => c.row_index === targetRow && c.col_index === targetCol, + ) + return target?.cell_id ?? null + } + return null + }, + [grid], + ) + + return { + grid, + loading, + saving, + error, + dirty, + selectedCell, + selectedZone, + setSelectedCell, + setSelectedZone, + buildGrid, + loadGrid, + saveGrid, + updateCellText, + toggleColumnBold, + toggleRowHeader, + undo, + redo, + canUndo, + canRedo, + getAdjacentCell, + } +} diff --git a/klausur-service/backend/grid_editor_api.py b/klausur-service/backend/grid_editor_api.py new file mode 100644 index 0000000..1b0edf9 --- /dev/null +++ b/klausur-service/backend/grid_editor_api.py @@ -0,0 +1,426 @@ +""" +Grid Editor API β€” builds a structured, zone-aware grid from Kombi OCR results. + +Takes the merged word positions from paddle-kombi / rapid-kombi and: + 1. Detects bordered boxes on the image (cv_box_detect) + 2. Splits the page into zones (content + box regions) + 3. Clusters words into columns and rows per zone + 4. Returns a hierarchical StructuredGrid for the frontend Excel-like editor + +Lizenz: Apache 2.0 (kommerziell nutzbar) +DATENSCHUTZ: Alle Verarbeitung erfolgt lokal. +""" + +import logging +import time +from typing import Any, Dict, List, Optional + +import cv2 +import numpy as np +from fastapi import APIRouter, HTTPException, Request + +from cv_box_detect import detect_boxes, split_page_into_zones +from cv_words_first import _cluster_columns, _cluster_rows, _build_cells +from ocr_pipeline_session_store import ( + get_session_db, + get_session_image, + update_session_db, +) + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["grid-editor"]) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _flatten_word_boxes(cells: List[Dict]) -> List[Dict]: + """Extract all word_boxes from cells into a flat list of word dicts.""" + words: List[Dict] = [] + for cell in cells: + for wb in cell.get("word_boxes") or []: + if wb.get("text", "").strip(): + words.append({ + "text": wb["text"], + "left": wb["left"], + "top": wb["top"], + "width": wb["width"], + "height": wb["height"], + "conf": wb.get("conf", 0), + }) + return words + + +def _words_in_zone( + words: List[Dict], + zone_y: int, + zone_h: int, + zone_x: int, + zone_w: int, +) -> List[Dict]: + """Filter words whose Y-center falls within a zone's bounds.""" + zone_y_end = zone_y + zone_h + zone_x_end = zone_x + zone_w + result = [] + for w in words: + cy = w["top"] + w["height"] / 2 + cx = w["left"] + w["width"] / 2 + if zone_y <= cy <= zone_y_end and zone_x <= cx <= zone_x_end: + result.append(w) + return result + + +def _detect_header_rows( + rows: List[Dict], + zone_words: List[Dict], + zone_y: int, +) -> List[int]: + """Heuristic: the first row is a header if it has bold/large text or + there's a significant gap after it.""" + if len(rows) < 2: + return [] + + headers = [] + first_row = rows[0] + second_row = rows[1] + + # Gap between first and second row > 1.5x average row height + avg_h = sum(r["y_max"] - r["y_min"] for r in rows) / len(rows) + gap = second_row["y_min"] - first_row["y_max"] + if gap > avg_h * 0.5: + headers.append(0) + + # Also check if first row words are taller than average (bold/header text) + first_row_words = [ + w for w in zone_words + if first_row["y_min"] <= w["top"] + w["height"] / 2 <= first_row["y_max"] + ] + if first_row_words: + first_h = max(w["height"] for w in first_row_words) + all_heights = [w["height"] for w in zone_words] + median_h = sorted(all_heights)[len(all_heights) // 2] if all_heights else first_h + if first_h > median_h * 1.3: + if 0 not in headers: + headers.append(0) + + return headers + + +def _build_zone_grid( + zone_words: List[Dict], + zone_x: int, + zone_y: int, + zone_w: int, + zone_h: int, + zone_index: int, + img_w: int, + img_h: int, +) -> Dict[str, Any]: + """Build columns, rows, cells for a single zone from its words.""" + if not zone_words: + return { + "columns": [], + "rows": [], + "cells": [], + "header_rows": [], + } + + # Cluster columns and rows + columns = _cluster_columns(zone_words, zone_w) + rows = _cluster_rows(zone_words) + + if not columns or not rows: + return { + "columns": [], + "rows": [], + "cells": [], + "header_rows": [], + } + + # Build cells + cells = _build_cells(zone_words, columns, rows, img_w, img_h) + + # Prefix cell IDs with zone index + for cell in cells: + cell["cell_id"] = f"Z{zone_index}_{cell['cell_id']}" + cell["zone_index"] = zone_index + + # Detect header rows + header_rows = _detect_header_rows(rows, zone_words, zone_y) + + # Convert columns to output format with percentages + out_columns = [] + for col in columns: + x_min = col["x_min"] + x_max = col["x_max"] + out_columns.append({ + "index": col["index"], + "label": col["type"], + "x_min_px": round(x_min), + "x_max_px": round(x_max), + "x_min_pct": round(x_min / img_w * 100, 2) if img_w else 0, + "x_max_pct": round(x_max / img_w * 100, 2) if img_w else 0, + "bold": False, + }) + + # Convert rows to output format with percentages + out_rows = [] + for row in rows: + out_rows.append({ + "index": row["index"], + "y_min_px": round(row["y_min"]), + "y_max_px": round(row["y_max"]), + "y_min_pct": round(row["y_min"] / img_h * 100, 2) if img_h else 0, + "y_max_pct": round(row["y_max"] / img_h * 100, 2) if img_h else 0, + "is_header": row["index"] in header_rows, + }) + + return { + "columns": out_columns, + "rows": out_rows, + "cells": cells, + "header_rows": header_rows, + } + + +def _get_content_bounds(words: List[Dict]) -> tuple: + """Get content bounds from word positions.""" + if not words: + return 0, 0, 0, 0 + x_min = min(w["left"] for w in words) + y_min = min(w["top"] for w in words) + x_max = max(w["left"] + w["width"] for w in words) + y_max = max(w["top"] + w["height"] for w in words) + return x_min, y_min, x_max - x_min, y_max - y_min + + +# --------------------------------------------------------------------------- +# Endpoints +# --------------------------------------------------------------------------- + +@router.post("/sessions/{session_id}/build-grid") +async def build_grid(session_id: str): + """Build a structured, zone-aware grid from existing Kombi word results. + + Requires that paddle-kombi or rapid-kombi has already been run on the session. + Uses the image for box detection and the word positions for grid structuring. + + Returns a StructuredGrid with zones, each containing their own + columns, rows, and cells β€” ready for the frontend Excel-like editor. + """ + t0 = time.time() + + # 1. Load session and word results + session = await get_session_db(session_id) + if not session: + raise HTTPException(status_code=404, detail=f"Session {session_id} not found") + + word_result = session.get("word_result") + if not word_result or not word_result.get("cells"): + raise HTTPException( + status_code=400, + detail="No word results found. Run paddle-kombi or rapid-kombi first.", + ) + + img_w = word_result.get("image_width", 0) + img_h = word_result.get("image_height", 0) + if not img_w or not img_h: + raise HTTPException(status_code=400, detail="Missing image dimensions in word_result") + + # 2. Flatten all word boxes from cells + all_words = _flatten_word_boxes(word_result["cells"]) + if not all_words: + raise HTTPException(status_code=400, detail="No word boxes found in cells") + + logger.info("build-grid session %s: %d words from %d cells", + session_id, len(all_words), len(word_result["cells"])) + + # 3. Load image for box detection + img_png = await get_session_image(session_id, "cropped") + if not img_png: + img_png = await get_session_image(session_id, "dewarped") + if not img_png: + img_png = await get_session_image(session_id, "original") + + zones_data: List[Dict[str, Any]] = [] + boxes_detected = 0 + + content_x, content_y, content_w, content_h = _get_content_bounds(all_words) + + if img_png: + # Decode image for box detection + arr = np.frombuffer(img_png, dtype=np.uint8) + img_bgr = cv2.imdecode(arr, cv2.IMREAD_COLOR) + + if img_bgr is not None: + # Detect bordered boxes + boxes = detect_boxes( + img_bgr, + content_x=content_x, + content_w=content_w, + content_y=content_y, + content_h=content_h, + ) + boxes_detected = len(boxes) + + if boxes: + # Split page into zones + page_zones = split_page_into_zones( + content_x, content_y, content_w, content_h, boxes + ) + + for pz in page_zones: + zone_words = _words_in_zone( + all_words, pz.y, pz.height, pz.x, pz.width + ) + grid = _build_zone_grid( + zone_words, pz.x, pz.y, pz.width, pz.height, + pz.index, img_w, img_h, + ) + + zone_entry: Dict[str, Any] = { + "zone_index": pz.index, + "zone_type": pz.zone_type, + "bbox_px": { + "x": pz.x, "y": pz.y, + "w": pz.width, "h": pz.height, + }, + "bbox_pct": { + "x": round(pz.x / img_w * 100, 2) if img_w else 0, + "y": round(pz.y / img_h * 100, 2) if img_h else 0, + "w": round(pz.width / img_w * 100, 2) if img_w else 0, + "h": round(pz.height / img_h * 100, 2) if img_h else 0, + }, + "border": None, + "word_count": len(zone_words), + **grid, + } + + if pz.box: + zone_entry["border"] = { + "thickness": pz.box.border_thickness, + "confidence": pz.box.confidence, + } + + zones_data.append(zone_entry) + + # 4. Fallback: no boxes detected β†’ single zone with all words + if not zones_data: + grid = _build_zone_grid( + all_words, content_x, content_y, content_w, content_h, + 0, img_w, img_h, + ) + zones_data.append({ + "zone_index": 0, + "zone_type": "content", + "bbox_px": { + "x": content_x, "y": content_y, + "w": content_w, "h": content_h, + }, + "bbox_pct": { + "x": round(content_x / img_w * 100, 2) if img_w else 0, + "y": round(content_y / img_h * 100, 2) if img_h else 0, + "w": round(content_w / img_w * 100, 2) if img_w else 0, + "h": round(content_h / img_h * 100, 2) if img_h else 0, + }, + "border": None, + "word_count": len(all_words), + **grid, + }) + + duration = time.time() - t0 + + # 5. Build result + total_cells = sum(len(z.get("cells", [])) for z in zones_data) + total_columns = sum(len(z.get("columns", [])) for z in zones_data) + total_rows = sum(len(z.get("rows", [])) for z in zones_data) + + result = { + "session_id": session_id, + "image_width": img_w, + "image_height": img_h, + "zones": zones_data, + "boxes_detected": boxes_detected, + "summary": { + "total_zones": len(zones_data), + "total_columns": total_columns, + "total_rows": total_rows, + "total_cells": total_cells, + "total_words": len(all_words), + }, + "formatting": { + "bold_columns": [], + "header_rows": [], + }, + "duration_seconds": round(duration, 2), + } + + # 6. Persist to DB + await update_session_db(session_id, grid_editor_result=result) + + logger.info( + "build-grid session %s: %d zones, %d cols, %d rows, %d cells, " + "%d boxes in %.2fs", + session_id, len(zones_data), total_columns, total_rows, + total_cells, boxes_detected, duration, + ) + + return result + + +@router.post("/sessions/{session_id}/save-grid") +async def save_grid(session_id: str, request: Request): + """Save edited grid data from the frontend Excel-like editor. + + Receives the full StructuredGrid with user edits (text changes, + formatting changes like bold columns, header rows, etc.) and + persists it to the session's grid_editor_result. + """ + session = await get_session_db(session_id) + if not session: + raise HTTPException(status_code=404, detail=f"Session {session_id} not found") + + body = await request.json() + + # Validate basic structure + if "zones" not in body: + raise HTTPException(status_code=400, detail="Missing 'zones' in request body") + + # Preserve metadata from the original build + existing = session.get("grid_editor_result") or {} + result = { + "session_id": session_id, + "image_width": body.get("image_width", existing.get("image_width", 0)), + "image_height": body.get("image_height", existing.get("image_height", 0)), + "zones": body["zones"], + "boxes_detected": body.get("boxes_detected", existing.get("boxes_detected", 0)), + "summary": body.get("summary", existing.get("summary", {})), + "formatting": body.get("formatting", existing.get("formatting", {})), + "duration_seconds": existing.get("duration_seconds", 0), + "edited": True, + } + + await update_session_db(session_id, grid_editor_result=result) + + logger.info("save-grid session %s: %d zones saved", session_id, len(body["zones"])) + + return {"session_id": session_id, "saved": True} + + +@router.get("/sessions/{session_id}/grid-editor") +async def get_grid(session_id: str): + """Retrieve the current grid editor state for a session.""" + session = await get_session_db(session_id) + if not session: + raise HTTPException(status_code=404, detail=f"Session {session_id} not found") + + result = session.get("grid_editor_result") + if not result: + raise HTTPException( + status_code=404, + detail="No grid editor data. Run build-grid first.", + ) + + return result diff --git a/klausur-service/backend/main.py b/klausur-service/backend/main.py index 24c931c..1eb272a 100644 --- a/klausur-service/backend/main.py +++ b/klausur-service/backend/main.py @@ -43,6 +43,7 @@ except ImportError: trocr_router = None from vocab_worksheet_api import router as vocab_router, set_db_pool as set_vocab_db_pool, _init_vocab_table, _load_all_sessions, DATABASE_URL as VOCAB_DATABASE_URL from ocr_pipeline_api import router as ocr_pipeline_router, _cache as ocr_pipeline_cache +from grid_editor_api import router as grid_editor_router from orientation_crop_api import router as orientation_crop_router, set_cache_ref as set_orientation_crop_cache from ocr_pipeline_session_store import init_ocr_pipeline_tables try: @@ -178,6 +179,7 @@ if trocr_router: app.include_router(trocr_router) # TrOCR Handwriting OCR app.include_router(vocab_router) # Vocabulary Worksheet Generator app.include_router(ocr_pipeline_router) # OCR Pipeline (step-by-step) +app.include_router(grid_editor_router) # Grid Editor (Excel-like) set_orientation_crop_cache(ocr_pipeline_cache) app.include_router(orientation_crop_router) # OCR Pipeline: Orientation + Crop if htr_router: diff --git a/klausur-service/backend/migrations/007_add_grid_editor_result.sql b/klausur-service/backend/migrations/007_add_grid_editor_result.sql new file mode 100644 index 0000000..3acccd0 --- /dev/null +++ b/klausur-service/backend/migrations/007_add_grid_editor_result.sql @@ -0,0 +1,4 @@ +-- Migration 007: Add grid_editor_result column for Excel-like grid editor +-- Stores structured grid with zones, columns, rows, cells, and formatting + +ALTER TABLE ocr_pipeline_sessions ADD COLUMN IF NOT EXISTS grid_editor_result JSONB; diff --git a/klausur-service/backend/ocr_pipeline_session_store.py b/klausur-service/backend/ocr_pipeline_session_store.py index e4adfe2..bf1d3ed 100644 --- a/klausur-service/backend/ocr_pipeline_session_store.py +++ b/klausur-service/backend/ocr_pipeline_session_store.py @@ -74,7 +74,8 @@ async def init_ocr_pipeline_tables(): ADD COLUMN IF NOT EXISTS orientation_result JSONB, ADD COLUMN IF NOT EXISTS crop_result JSONB, ADD COLUMN IF NOT EXISTS parent_session_id UUID REFERENCES ocr_pipeline_sessions(id) ON DELETE CASCADE, - ADD COLUMN IF NOT EXISTS box_index INT + ADD COLUMN IF NOT EXISTS box_index INT, + ADD COLUMN IF NOT EXISTS grid_editor_result JSONB """) @@ -110,6 +111,7 @@ async def create_session_db( word_result, ground_truth, auto_shear_degrees, doc_type, doc_type_result, document_category, pipeline_log, + grid_editor_result, parent_session_id, box_index, created_at, updated_at """, uuid.UUID(session_id), name, filename, original_png, @@ -129,6 +131,7 @@ async def get_session_db(session_id: str) -> Optional[Dict[str, Any]]: word_result, ground_truth, auto_shear_degrees, doc_type, doc_type_result, document_category, pipeline_log, + grid_editor_result, parent_session_id, box_index, created_at, updated_at FROM ocr_pipeline_sessions WHERE id = $1 @@ -180,10 +183,11 @@ async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any 'word_result', 'ground_truth', 'auto_shear_degrees', 'doc_type', 'doc_type_result', 'document_category', 'pipeline_log', + 'grid_editor_result', 'parent_session_id', 'box_index', } - jsonb_fields = {'orientation_result', 'crop_result', 'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'handwriting_removal_meta', 'doc_type_result', 'pipeline_log'} + jsonb_fields = {'orientation_result', 'crop_result', 'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'handwriting_removal_meta', 'doc_type_result', 'pipeline_log', 'grid_editor_result'} for key, value in kwargs.items(): if key in allowed_fields: @@ -212,6 +216,7 @@ async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any word_result, ground_truth, auto_shear_degrees, doc_type, doc_type_result, document_category, pipeline_log, + grid_editor_result, parent_session_id, box_index, created_at, updated_at """, *values) @@ -308,7 +313,7 @@ def _row_to_dict(row: asyncpg.Record) -> Dict[str, Any]: result[key] = result[key].isoformat() # JSONB β†’ parsed (asyncpg returns str for JSONB) - for key in ['orientation_result', 'crop_result', 'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'doc_type_result', 'pipeline_log']: + for key in ['orientation_result', 'crop_result', 'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'doc_type_result', 'pipeline_log', 'grid_editor_result']: if key in result and result[key] is not None: if isinstance(result[key], str): result[key] = json.loads(result[key])