diff --git a/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx b/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx
index 1c3ded3..f89c0ba 100644
--- a/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx
+++ b/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx
@@ -11,6 +11,7 @@ import { StepRowDetection } from '@/components/ocr-pipeline/StepRowDetection'
import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
import { OverlayReconstruction } from '@/components/ocr-overlay/OverlayReconstruction'
import { PaddleDirectStep } from '@/components/ocr-overlay/PaddleDirectStep'
+import { GridEditor } from '@/components/grid-editor/GridEditor'
import { OVERLAY_PIPELINE_STEPS, PADDLE_DIRECT_STEPS, KOMBI_STEPS, DOCUMENT_CATEGORIES, dbStepToOverlayUi, type PipelineStep, type SessionListItem, type DocumentCategory } from './types'
const KLAUSUR_API = '/klausur-api'
@@ -72,13 +73,17 @@ export default function OcrOverlayPage() {
const m = isKombi ? 'kombi' : 'paddle-direct'
const baseSteps = isKombi ? KOMBI_STEPS : PADDLE_DIRECT_STEPS
setMode(m)
+
+ // For Kombi: if grid_editor_result exists, jump to grid editor step (5)
+ const hasGrid = isKombi && data.grid_editor_result
+ const activeStep = hasGrid ? 5 : 4
setSteps(
baseSteps.map((s, i) => ({
...s,
- status: i < 4 ? 'completed' : i === 4 ? 'active' : 'pending',
+ status: i < activeStep ? 'completed' : i === activeStep ? 'active' : 'pending',
})),
)
- setCurrentStep(4)
+ setCurrentStep(activeStep)
} else {
setMode('pipeline')
// Map DB step to overlay UI step
@@ -256,6 +261,10 @@ export default function OcrOverlayPage() {
) : (
)
+ case 5:
+ return mode === 'kombi' ? (
+
+ ) : null
default:
return null
}
@@ -512,7 +521,7 @@ export default function OcrOverlayPage() {
: 'text-gray-500 dark:text-gray-400 hover:text-gray-700 dark:hover:text-gray-300'
}`}
>
- Kombi (5 Schritte)
+ Kombi (6 Schritte)
diff --git a/admin-lehrer/app/(admin)/ai/ocr-overlay/types.ts b/admin-lehrer/app/(admin)/ai/ocr-overlay/types.ts
index 993092a..ee01e10 100644
--- a/admin-lehrer/app/(admin)/ai/ocr-overlay/types.ts
+++ b/admin-lehrer/app/(admin)/ai/ocr-overlay/types.ts
@@ -70,6 +70,7 @@ export const KOMBI_STEPS: PipelineStep[] = [
{ id: 'dewarp', name: 'Entzerrung', icon: 'π§', status: 'pending' },
{ id: 'crop', name: 'Zuschneiden', icon: 'βοΈ', status: 'pending' },
{ id: 'kombi', name: 'PP-OCRv5 + Tesseract', icon: 'π', status: 'pending' },
+ { id: 'grid-editor', name: 'Tabelle', icon: 'π', status: 'pending' },
]
/** Map from DB step to overlay UI step index */
diff --git a/admin-lehrer/components/grid-editor/GridEditor.tsx b/admin-lehrer/components/grid-editor/GridEditor.tsx
new file mode 100644
index 0000000..18af07e
--- /dev/null
+++ b/admin-lehrer/components/grid-editor/GridEditor.tsx
@@ -0,0 +1,218 @@
+'use client'
+
+import { useCallback, useEffect, useState } from 'react'
+import { useGridEditor } from './useGridEditor'
+import { GridToolbar } from './GridToolbar'
+import { GridTable } from './GridTable'
+import { GridImageOverlay } from './GridImageOverlay'
+
+interface GridEditorProps {
+ sessionId: string | null
+ onNext?: () => void
+}
+
+export function GridEditor({ sessionId, onNext }: GridEditorProps) {
+ const {
+ grid,
+ loading,
+ saving,
+ error,
+ dirty,
+ selectedCell,
+ setSelectedCell,
+ buildGrid,
+ loadGrid,
+ saveGrid,
+ updateCellText,
+ toggleColumnBold,
+ toggleRowHeader,
+ undo,
+ redo,
+ canUndo,
+ canRedo,
+ getAdjacentCell,
+ } = useGridEditor(sessionId)
+
+ const [showOverlay, setShowOverlay] = useState(false)
+
+ // Load grid on mount
+ useEffect(() => {
+ if (sessionId) {
+ loadGrid()
+ }
+ }, [sessionId, loadGrid])
+
+ // Keyboard shortcuts
+ useEffect(() => {
+ const handler = (e: KeyboardEvent) => {
+ if ((e.metaKey || e.ctrlKey) && e.key === 'z' && !e.shiftKey) {
+ e.preventDefault()
+ undo()
+ } else if ((e.metaKey || e.ctrlKey) && e.key === 'z' && e.shiftKey) {
+ e.preventDefault()
+ redo()
+ } else if ((e.metaKey || e.ctrlKey) && e.key === 's') {
+ e.preventDefault()
+ saveGrid()
+ }
+ }
+ window.addEventListener('keydown', handler)
+ return () => window.removeEventListener('keydown', handler)
+ }, [undo, redo, saveGrid])
+
+ const handleNavigate = useCallback(
+ (cellId: string, direction: 'up' | 'down' | 'left' | 'right') => {
+ const target = getAdjacentCell(cellId, direction)
+ if (target) {
+ setSelectedCell(target)
+ // Focus the input
+ setTimeout(() => {
+ const el = document.getElementById(`cell-${target}`)
+ if (el) {
+ el.focus()
+ if (el instanceof HTMLInputElement) el.select()
+ }
+ }, 0)
+ }
+ },
+ [getAdjacentCell, setSelectedCell],
+ )
+
+ if (!sessionId) {
+ return (
+
+ Keine Session ausgewaehlt.
+
+ )
+ }
+
+ if (loading) {
+ return (
+
+
+
+ Grid wird aufgebaut...
+
+
+ )
+ }
+
+ if (error) {
+ return (
+
+
+ Fehler: {error}
+
+
+
+ )
+ }
+
+ if (!grid || !grid.zones.length) {
+ return (
+
+
Kein Grid vorhanden.
+
+
+ )
+ }
+
+ return (
+
+ {/* Summary bar */}
+
+ {grid.summary.total_zones} Zone(n)
+ {grid.summary.total_columns} Spalten
+ {grid.summary.total_rows} Zeilen
+ {grid.summary.total_cells} Zellen
+ {grid.boxes_detected > 0 && (
+
+ {grid.boxes_detected} Box(en) erkannt
+
+ )}
+
+ {grid.duration_seconds.toFixed(1)}s
+
+
+
+ {/* Toolbar */}
+
+ setShowOverlay(!showOverlay)}
+ />
+
+
+ {/* Image overlay */}
+ {showOverlay && (
+
+ )}
+
+ {/* Zone tables */}
+
+ {grid.zones.map((zone) => (
+
+
+
+ ))}
+
+
+ {/* Tip */}
+
+ Tab: naechste Zelle
+ Enter: Zeile runter
+ Spalte fett: Klick auf Spaltenkopf
+ Header: Klick auf Zeilennummer
+ Ctrl+Z/Y: Undo/Redo
+ Ctrl+S: Speichern
+
+
+ {/* Next step button */}
+ {onNext && (
+
+
+
+ )}
+
+ )
+}
diff --git a/admin-lehrer/components/grid-editor/GridImageOverlay.tsx b/admin-lehrer/components/grid-editor/GridImageOverlay.tsx
new file mode 100644
index 0000000..9688dce
--- /dev/null
+++ b/admin-lehrer/components/grid-editor/GridImageOverlay.tsx
@@ -0,0 +1,98 @@
+'use client'
+
+import type { StructuredGrid } from './types'
+
+const KLAUSUR_API = '/klausur-api'
+
+interface GridImageOverlayProps {
+ sessionId: string
+ grid: StructuredGrid
+}
+
+const ZONE_COLORS = [
+ { border: 'rgba(20,184,166,0.7)', fill: 'rgba(20,184,166,0.05)' }, // teal
+ { border: 'rgba(245,158,11,0.7)', fill: 'rgba(245,158,11,0.05)' }, // amber
+ { border: 'rgba(99,102,241,0.7)', fill: 'rgba(99,102,241,0.05)' }, // indigo
+ { border: 'rgba(236,72,153,0.7)', fill: 'rgba(236,72,153,0.05)' }, // pink
+]
+
+export function GridImageOverlay({ sessionId, grid }: GridImageOverlayProps) {
+ const imgUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
+
+ return (
+
+
+ {/* Source image */}
+ {/* eslint-disable-next-line @next/next/no-img-element */}
+

+
+ {/* SVG overlay */}
+
+
+
+ )
+}
diff --git a/admin-lehrer/components/grid-editor/GridTable.tsx b/admin-lehrer/components/grid-editor/GridTable.tsx
new file mode 100644
index 0000000..106ed39
--- /dev/null
+++ b/admin-lehrer/components/grid-editor/GridTable.tsx
@@ -0,0 +1,153 @@
+'use client'
+
+import { useCallback, useRef } from 'react'
+import type { GridZone } from './types'
+
+interface GridTableProps {
+ zone: GridZone
+ selectedCell: string | null
+ onSelectCell: (cellId: string) => void
+ onCellTextChange: (cellId: string, text: string) => void
+ onToggleColumnBold: (zoneIndex: number, colIndex: number) => void
+ onToggleRowHeader: (zoneIndex: number, rowIndex: number) => void
+ onNavigate: (cellId: string, direction: 'up' | 'down' | 'left' | 'right') => void
+}
+
+export function GridTable({
+ zone,
+ selectedCell,
+ onSelectCell,
+ onCellTextChange,
+ onToggleColumnBold,
+ onToggleRowHeader,
+ onNavigate,
+}: GridTableProps) {
+ const tableRef = useRef(null)
+
+ const handleKeyDown = useCallback(
+ (e: React.KeyboardEvent, cellId: string) => {
+ if (e.key === 'Tab') {
+ e.preventDefault()
+ onNavigate(cellId, e.shiftKey ? 'left' : 'right')
+ } else if (e.key === 'Enter' && !e.shiftKey) {
+ e.preventDefault()
+ onNavigate(cellId, 'down')
+ } else if (e.key === 'ArrowUp' && e.altKey) {
+ e.preventDefault()
+ onNavigate(cellId, 'up')
+ } else if (e.key === 'ArrowDown' && e.altKey) {
+ e.preventDefault()
+ onNavigate(cellId, 'down')
+ } else if (e.key === 'Escape') {
+ ;(e.target as HTMLElement).blur()
+ }
+ },
+ [onNavigate],
+ )
+
+ // Build rowβcol cell lookup
+ const cellMap = new Map()
+ for (const cell of zone.cells) {
+ cellMap.set(`${cell.row_index}_${cell.col_index}`, cell)
+ }
+
+ const isBoxZone = zone.zone_type === 'box'
+
+ return (
+
+ {/* Zone label */}
+
+
+ {isBoxZone ? 'Box' : 'Inhalt'} Zone {zone.zone_index}
+
+ {zone.columns.length} Spalten, {zone.rows.length} Zeilen, {zone.cells.length} Zellen
+
+
+
+
+ )
+}
diff --git a/admin-lehrer/components/grid-editor/GridToolbar.tsx b/admin-lehrer/components/grid-editor/GridToolbar.tsx
new file mode 100644
index 0000000..dc7100a
--- /dev/null
+++ b/admin-lehrer/components/grid-editor/GridToolbar.tsx
@@ -0,0 +1,110 @@
+'use client'
+
+interface GridToolbarProps {
+ dirty: boolean
+ saving: boolean
+ canUndo: boolean
+ canRedo: boolean
+ showOverlay: boolean
+ onSave: () => void
+ onUndo: () => void
+ onRedo: () => void
+ onRebuild: () => void
+ onToggleOverlay: () => void
+}
+
+export function GridToolbar({
+ dirty,
+ saving,
+ canUndo,
+ canRedo,
+ showOverlay,
+ onSave,
+ onUndo,
+ onRedo,
+ onRebuild,
+ onToggleOverlay,
+}: GridToolbarProps) {
+ return (
+
+ {/* Undo / Redo */}
+
+
+ {/* Overlay toggle */}
+
+
+ {/* Rebuild */}
+
+
+ {/* Spacer */}
+
+
+ {/* Save */}
+
+
+ )
+}
diff --git a/admin-lehrer/components/grid-editor/index.ts b/admin-lehrer/components/grid-editor/index.ts
new file mode 100644
index 0000000..07605eb
--- /dev/null
+++ b/admin-lehrer/components/grid-editor/index.ts
@@ -0,0 +1,6 @@
+export { GridEditor } from './GridEditor'
+export { GridTable } from './GridTable'
+export { GridToolbar } from './GridToolbar'
+export { GridImageOverlay } from './GridImageOverlay'
+export { useGridEditor } from './useGridEditor'
+export type * from './types'
diff --git a/admin-lehrer/components/grid-editor/types.ts b/admin-lehrer/components/grid-editor/types.ts
new file mode 100644
index 0000000..fa596c5
--- /dev/null
+++ b/admin-lehrer/components/grid-editor/types.ts
@@ -0,0 +1,97 @@
+import type { OcrWordBox } from '@/app/(admin)/ai/ocr-pipeline/types'
+
+// Re-export for convenience
+export type { OcrWordBox }
+
+/** A complete structured grid with zones, ready for the Excel-like editor. */
+export interface StructuredGrid {
+ session_id: string
+ image_width: number
+ image_height: number
+ zones: GridZone[]
+ boxes_detected: number
+ summary: GridSummary
+ formatting: GridFormatting
+ duration_seconds: number
+ edited?: boolean
+}
+
+export interface GridSummary {
+ total_zones: number
+ total_columns: number
+ total_rows: number
+ total_cells: number
+ total_words: number
+}
+
+export interface GridFormatting {
+ bold_columns: number[]
+ header_rows: number[]
+}
+
+/** A horizontal zone of the page β either content or a bordered box. */
+export interface GridZone {
+ zone_index: number
+ zone_type: 'content' | 'box'
+ bbox_px: BBox
+ bbox_pct: BBox
+ border: ZoneBorder | null
+ word_count: number
+ columns: GridColumn[]
+ rows: GridRow[]
+ cells: GridEditorCell[]
+ header_rows: number[]
+}
+
+export interface BBox {
+ x: number
+ y: number
+ w: number
+ h: number
+}
+
+export interface ZoneBorder {
+ thickness: number
+ confidence: number
+}
+
+export interface GridColumn {
+ index: number
+ label: string
+ x_min_px: number
+ x_max_px: number
+ x_min_pct: number
+ x_max_pct: number
+ bold: boolean
+}
+
+export interface GridRow {
+ index: number
+ y_min_px: number
+ y_max_px: number
+ y_min_pct: number
+ y_max_pct: number
+ is_header: boolean
+}
+
+export interface GridEditorCell {
+ cell_id: string
+ zone_index: number
+ row_index: number
+ col_index: number
+ col_type: string
+ text: string
+ confidence: number
+ bbox_px: BBox
+ bbox_pct: BBox
+ word_boxes: OcrWordBox[]
+ ocr_engine: string
+ is_bold: boolean
+}
+
+/** Cell formatting applied by the user in the editor. */
+export interface CellFormatting {
+ bold: boolean
+ fontSize: 'small' | 'normal' | 'large'
+ align: 'left' | 'center' | 'right'
+}
diff --git a/admin-lehrer/components/grid-editor/useGridEditor.ts b/admin-lehrer/components/grid-editor/useGridEditor.ts
new file mode 100644
index 0000000..0bd2491
--- /dev/null
+++ b/admin-lehrer/components/grid-editor/useGridEditor.ts
@@ -0,0 +1,288 @@
+import { useCallback, useRef, useState } from 'react'
+import type { StructuredGrid, GridZone } from './types'
+
+const KLAUSUR_API = '/klausur-api'
+const MAX_UNDO = 50
+
+export interface GridEditorState {
+ grid: StructuredGrid | null
+ loading: boolean
+ saving: boolean
+ error: string | null
+ dirty: boolean
+ selectedCell: string | null
+ selectedZone: number | null
+}
+
+export function useGridEditor(sessionId: string | null) {
+ const [grid, setGrid] = useState(null)
+ const [loading, setLoading] = useState(false)
+ const [saving, setSaving] = useState(false)
+ const [error, setError] = useState(null)
+ const [dirty, setDirty] = useState(false)
+ const [selectedCell, setSelectedCell] = useState(null)
+ const [selectedZone, setSelectedZone] = useState(null)
+
+ // Undo/redo stacks store serialized zone arrays
+ const undoStack = useRef([])
+ const redoStack = useRef([])
+
+ const pushUndo = useCallback((zones: GridZone[]) => {
+ undoStack.current.push(JSON.stringify(zones))
+ if (undoStack.current.length > MAX_UNDO) {
+ undoStack.current.shift()
+ }
+ redoStack.current = []
+ }, [])
+
+ // ------------------------------------------------------------------
+ // Load / Build
+ // ------------------------------------------------------------------
+
+ const buildGrid = useCallback(async () => {
+ if (!sessionId) return
+ setLoading(true)
+ setError(null)
+ try {
+ const res = await fetch(
+ `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/build-grid`,
+ { method: 'POST' },
+ )
+ if (!res.ok) {
+ const data = await res.json().catch(() => ({}))
+ throw new Error(data.detail || `HTTP ${res.status}`)
+ }
+ const data: StructuredGrid = await res.json()
+ setGrid(data)
+ setDirty(false)
+ undoStack.current = []
+ redoStack.current = []
+ } catch (e) {
+ setError(e instanceof Error ? e.message : String(e))
+ } finally {
+ setLoading(false)
+ }
+ }, [sessionId])
+
+ const loadGrid = useCallback(async () => {
+ if (!sessionId) return
+ setLoading(true)
+ setError(null)
+ try {
+ const res = await fetch(
+ `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/grid-editor`,
+ )
+ if (res.status === 404) {
+ // No grid yet β build it
+ await buildGrid()
+ return
+ }
+ if (!res.ok) {
+ const data = await res.json().catch(() => ({}))
+ throw new Error(data.detail || `HTTP ${res.status}`)
+ }
+ const data: StructuredGrid = await res.json()
+ setGrid(data)
+ setDirty(false)
+ undoStack.current = []
+ redoStack.current = []
+ } catch (e) {
+ setError(e instanceof Error ? e.message : String(e))
+ } finally {
+ setLoading(false)
+ }
+ }, [sessionId, buildGrid])
+
+ // ------------------------------------------------------------------
+ // Save
+ // ------------------------------------------------------------------
+
+ const saveGrid = useCallback(async () => {
+ if (!sessionId || !grid) return
+ setSaving(true)
+ try {
+ const res = await fetch(
+ `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/save-grid`,
+ {
+ method: 'POST',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify(grid),
+ },
+ )
+ if (!res.ok) {
+ const data = await res.json().catch(() => ({}))
+ throw new Error(data.detail || `HTTP ${res.status}`)
+ }
+ setDirty(false)
+ } catch (e) {
+ setError(e instanceof Error ? e.message : String(e))
+ } finally {
+ setSaving(false)
+ }
+ }, [sessionId, grid])
+
+ // ------------------------------------------------------------------
+ // Cell editing
+ // ------------------------------------------------------------------
+
+ const updateCellText = useCallback(
+ (cellId: string, newText: string) => {
+ if (!grid) return
+ pushUndo(grid.zones)
+
+ setGrid((prev) => {
+ if (!prev) return prev
+ return {
+ ...prev,
+ zones: prev.zones.map((zone) => ({
+ ...zone,
+ cells: zone.cells.map((cell) =>
+ cell.cell_id === cellId ? { ...cell, text: newText } : cell,
+ ),
+ })),
+ }
+ })
+ setDirty(true)
+ },
+ [grid, pushUndo],
+ )
+
+ // ------------------------------------------------------------------
+ // Column formatting
+ // ------------------------------------------------------------------
+
+ const toggleColumnBold = useCallback(
+ (zoneIndex: number, colIndex: number) => {
+ if (!grid) return
+ pushUndo(grid.zones)
+
+ setGrid((prev) => {
+ if (!prev) return prev
+ return {
+ ...prev,
+ zones: prev.zones.map((zone) => {
+ if (zone.zone_index !== zoneIndex) return zone
+ const col = zone.columns.find((c) => c.index === colIndex)
+ const newBold = col ? !col.bold : true
+ return {
+ ...zone,
+ columns: zone.columns.map((c) =>
+ c.index === colIndex ? { ...c, bold: newBold } : c,
+ ),
+ cells: zone.cells.map((cell) =>
+ cell.col_index === colIndex
+ ? { ...cell, is_bold: newBold }
+ : cell,
+ ),
+ }
+ }),
+ }
+ })
+ setDirty(true)
+ },
+ [grid, pushUndo],
+ )
+
+ // ------------------------------------------------------------------
+ // Row formatting
+ // ------------------------------------------------------------------
+
+ const toggleRowHeader = useCallback(
+ (zoneIndex: number, rowIndex: number) => {
+ if (!grid) return
+ pushUndo(grid.zones)
+
+ setGrid((prev) => {
+ if (!prev) return prev
+ return {
+ ...prev,
+ zones: prev.zones.map((zone) => {
+ if (zone.zone_index !== zoneIndex) return zone
+ return {
+ ...zone,
+ rows: zone.rows.map((r) =>
+ r.index === rowIndex ? { ...r, is_header: !r.is_header } : r,
+ ),
+ }
+ }),
+ }
+ })
+ setDirty(true)
+ },
+ [grid, pushUndo],
+ )
+
+ // ------------------------------------------------------------------
+ // Undo / Redo
+ // ------------------------------------------------------------------
+
+ const undo = useCallback(() => {
+ if (!grid || undoStack.current.length === 0) return
+ redoStack.current.push(JSON.stringify(grid.zones))
+ const prev = undoStack.current.pop()!
+ setGrid((g) => (g ? { ...g, zones: JSON.parse(prev) } : g))
+ setDirty(true)
+ }, [grid])
+
+ const redo = useCallback(() => {
+ if (!grid || redoStack.current.length === 0) return
+ undoStack.current.push(JSON.stringify(grid.zones))
+ const next = redoStack.current.pop()!
+ setGrid((g) => (g ? { ...g, zones: JSON.parse(next) } : g))
+ setDirty(true)
+ }, [grid])
+
+ const canUndo = undoStack.current.length > 0
+ const canRedo = redoStack.current.length > 0
+
+ // ------------------------------------------------------------------
+ // Navigation helpers
+ // ------------------------------------------------------------------
+
+ const getAdjacentCell = useCallback(
+ (cellId: string, direction: 'up' | 'down' | 'left' | 'right'): string | null => {
+ if (!grid) return null
+ for (const zone of grid.zones) {
+ const cell = zone.cells.find((c) => c.cell_id === cellId)
+ if (!cell) continue
+
+ let targetRow = cell.row_index
+ let targetCol = cell.col_index
+ if (direction === 'up') targetRow--
+ if (direction === 'down') targetRow++
+ if (direction === 'left') targetCol--
+ if (direction === 'right') targetCol++
+
+ const target = zone.cells.find(
+ (c) => c.row_index === targetRow && c.col_index === targetCol,
+ )
+ return target?.cell_id ?? null
+ }
+ return null
+ },
+ [grid],
+ )
+
+ return {
+ grid,
+ loading,
+ saving,
+ error,
+ dirty,
+ selectedCell,
+ selectedZone,
+ setSelectedCell,
+ setSelectedZone,
+ buildGrid,
+ loadGrid,
+ saveGrid,
+ updateCellText,
+ toggleColumnBold,
+ toggleRowHeader,
+ undo,
+ redo,
+ canUndo,
+ canRedo,
+ getAdjacentCell,
+ }
+}
diff --git a/klausur-service/backend/grid_editor_api.py b/klausur-service/backend/grid_editor_api.py
new file mode 100644
index 0000000..1b0edf9
--- /dev/null
+++ b/klausur-service/backend/grid_editor_api.py
@@ -0,0 +1,426 @@
+"""
+Grid Editor API β builds a structured, zone-aware grid from Kombi OCR results.
+
+Takes the merged word positions from paddle-kombi / rapid-kombi and:
+ 1. Detects bordered boxes on the image (cv_box_detect)
+ 2. Splits the page into zones (content + box regions)
+ 3. Clusters words into columns and rows per zone
+ 4. Returns a hierarchical StructuredGrid for the frontend Excel-like editor
+
+Lizenz: Apache 2.0 (kommerziell nutzbar)
+DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
+"""
+
+import logging
+import time
+from typing import Any, Dict, List, Optional
+
+import cv2
+import numpy as np
+from fastapi import APIRouter, HTTPException, Request
+
+from cv_box_detect import detect_boxes, split_page_into_zones
+from cv_words_first import _cluster_columns, _cluster_rows, _build_cells
+from ocr_pipeline_session_store import (
+ get_session_db,
+ get_session_image,
+ update_session_db,
+)
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["grid-editor"])
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _flatten_word_boxes(cells: List[Dict]) -> List[Dict]:
+ """Extract all word_boxes from cells into a flat list of word dicts."""
+ words: List[Dict] = []
+ for cell in cells:
+ for wb in cell.get("word_boxes") or []:
+ if wb.get("text", "").strip():
+ words.append({
+ "text": wb["text"],
+ "left": wb["left"],
+ "top": wb["top"],
+ "width": wb["width"],
+ "height": wb["height"],
+ "conf": wb.get("conf", 0),
+ })
+ return words
+
+
+def _words_in_zone(
+ words: List[Dict],
+ zone_y: int,
+ zone_h: int,
+ zone_x: int,
+ zone_w: int,
+) -> List[Dict]:
+ """Filter words whose Y-center falls within a zone's bounds."""
+ zone_y_end = zone_y + zone_h
+ zone_x_end = zone_x + zone_w
+ result = []
+ for w in words:
+ cy = w["top"] + w["height"] / 2
+ cx = w["left"] + w["width"] / 2
+ if zone_y <= cy <= zone_y_end and zone_x <= cx <= zone_x_end:
+ result.append(w)
+ return result
+
+
+def _detect_header_rows(
+ rows: List[Dict],
+ zone_words: List[Dict],
+ zone_y: int,
+) -> List[int]:
+ """Heuristic: the first row is a header if it has bold/large text or
+ there's a significant gap after it."""
+ if len(rows) < 2:
+ return []
+
+ headers = []
+ first_row = rows[0]
+ second_row = rows[1]
+
+ # Gap between first and second row > 1.5x average row height
+ avg_h = sum(r["y_max"] - r["y_min"] for r in rows) / len(rows)
+ gap = second_row["y_min"] - first_row["y_max"]
+ if gap > avg_h * 0.5:
+ headers.append(0)
+
+ # Also check if first row words are taller than average (bold/header text)
+ first_row_words = [
+ w for w in zone_words
+ if first_row["y_min"] <= w["top"] + w["height"] / 2 <= first_row["y_max"]
+ ]
+ if first_row_words:
+ first_h = max(w["height"] for w in first_row_words)
+ all_heights = [w["height"] for w in zone_words]
+ median_h = sorted(all_heights)[len(all_heights) // 2] if all_heights else first_h
+ if first_h > median_h * 1.3:
+ if 0 not in headers:
+ headers.append(0)
+
+ return headers
+
+
+def _build_zone_grid(
+ zone_words: List[Dict],
+ zone_x: int,
+ zone_y: int,
+ zone_w: int,
+ zone_h: int,
+ zone_index: int,
+ img_w: int,
+ img_h: int,
+) -> Dict[str, Any]:
+ """Build columns, rows, cells for a single zone from its words."""
+ if not zone_words:
+ return {
+ "columns": [],
+ "rows": [],
+ "cells": [],
+ "header_rows": [],
+ }
+
+ # Cluster columns and rows
+ columns = _cluster_columns(zone_words, zone_w)
+ rows = _cluster_rows(zone_words)
+
+ if not columns or not rows:
+ return {
+ "columns": [],
+ "rows": [],
+ "cells": [],
+ "header_rows": [],
+ }
+
+ # Build cells
+ cells = _build_cells(zone_words, columns, rows, img_w, img_h)
+
+ # Prefix cell IDs with zone index
+ for cell in cells:
+ cell["cell_id"] = f"Z{zone_index}_{cell['cell_id']}"
+ cell["zone_index"] = zone_index
+
+ # Detect header rows
+ header_rows = _detect_header_rows(rows, zone_words, zone_y)
+
+ # Convert columns to output format with percentages
+ out_columns = []
+ for col in columns:
+ x_min = col["x_min"]
+ x_max = col["x_max"]
+ out_columns.append({
+ "index": col["index"],
+ "label": col["type"],
+ "x_min_px": round(x_min),
+ "x_max_px": round(x_max),
+ "x_min_pct": round(x_min / img_w * 100, 2) if img_w else 0,
+ "x_max_pct": round(x_max / img_w * 100, 2) if img_w else 0,
+ "bold": False,
+ })
+
+ # Convert rows to output format with percentages
+ out_rows = []
+ for row in rows:
+ out_rows.append({
+ "index": row["index"],
+ "y_min_px": round(row["y_min"]),
+ "y_max_px": round(row["y_max"]),
+ "y_min_pct": round(row["y_min"] / img_h * 100, 2) if img_h else 0,
+ "y_max_pct": round(row["y_max"] / img_h * 100, 2) if img_h else 0,
+ "is_header": row["index"] in header_rows,
+ })
+
+ return {
+ "columns": out_columns,
+ "rows": out_rows,
+ "cells": cells,
+ "header_rows": header_rows,
+ }
+
+
+def _get_content_bounds(words: List[Dict]) -> tuple:
+ """Get content bounds from word positions."""
+ if not words:
+ return 0, 0, 0, 0
+ x_min = min(w["left"] for w in words)
+ y_min = min(w["top"] for w in words)
+ x_max = max(w["left"] + w["width"] for w in words)
+ y_max = max(w["top"] + w["height"] for w in words)
+ return x_min, y_min, x_max - x_min, y_max - y_min
+
+
+# ---------------------------------------------------------------------------
+# Endpoints
+# ---------------------------------------------------------------------------
+
+@router.post("/sessions/{session_id}/build-grid")
+async def build_grid(session_id: str):
+ """Build a structured, zone-aware grid from existing Kombi word results.
+
+ Requires that paddle-kombi or rapid-kombi has already been run on the session.
+ Uses the image for box detection and the word positions for grid structuring.
+
+ Returns a StructuredGrid with zones, each containing their own
+ columns, rows, and cells β ready for the frontend Excel-like editor.
+ """
+ t0 = time.time()
+
+ # 1. Load session and word results
+ session = await get_session_db(session_id)
+ if not session:
+ raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
+
+ word_result = session.get("word_result")
+ if not word_result or not word_result.get("cells"):
+ raise HTTPException(
+ status_code=400,
+ detail="No word results found. Run paddle-kombi or rapid-kombi first.",
+ )
+
+ img_w = word_result.get("image_width", 0)
+ img_h = word_result.get("image_height", 0)
+ if not img_w or not img_h:
+ raise HTTPException(status_code=400, detail="Missing image dimensions in word_result")
+
+ # 2. Flatten all word boxes from cells
+ all_words = _flatten_word_boxes(word_result["cells"])
+ if not all_words:
+ raise HTTPException(status_code=400, detail="No word boxes found in cells")
+
+ logger.info("build-grid session %s: %d words from %d cells",
+ session_id, len(all_words), len(word_result["cells"]))
+
+ # 3. Load image for box detection
+ img_png = await get_session_image(session_id, "cropped")
+ if not img_png:
+ img_png = await get_session_image(session_id, "dewarped")
+ if not img_png:
+ img_png = await get_session_image(session_id, "original")
+
+ zones_data: List[Dict[str, Any]] = []
+ boxes_detected = 0
+
+ content_x, content_y, content_w, content_h = _get_content_bounds(all_words)
+
+ if img_png:
+ # Decode image for box detection
+ arr = np.frombuffer(img_png, dtype=np.uint8)
+ img_bgr = cv2.imdecode(arr, cv2.IMREAD_COLOR)
+
+ if img_bgr is not None:
+ # Detect bordered boxes
+ boxes = detect_boxes(
+ img_bgr,
+ content_x=content_x,
+ content_w=content_w,
+ content_y=content_y,
+ content_h=content_h,
+ )
+ boxes_detected = len(boxes)
+
+ if boxes:
+ # Split page into zones
+ page_zones = split_page_into_zones(
+ content_x, content_y, content_w, content_h, boxes
+ )
+
+ for pz in page_zones:
+ zone_words = _words_in_zone(
+ all_words, pz.y, pz.height, pz.x, pz.width
+ )
+ grid = _build_zone_grid(
+ zone_words, pz.x, pz.y, pz.width, pz.height,
+ pz.index, img_w, img_h,
+ )
+
+ zone_entry: Dict[str, Any] = {
+ "zone_index": pz.index,
+ "zone_type": pz.zone_type,
+ "bbox_px": {
+ "x": pz.x, "y": pz.y,
+ "w": pz.width, "h": pz.height,
+ },
+ "bbox_pct": {
+ "x": round(pz.x / img_w * 100, 2) if img_w else 0,
+ "y": round(pz.y / img_h * 100, 2) if img_h else 0,
+ "w": round(pz.width / img_w * 100, 2) if img_w else 0,
+ "h": round(pz.height / img_h * 100, 2) if img_h else 0,
+ },
+ "border": None,
+ "word_count": len(zone_words),
+ **grid,
+ }
+
+ if pz.box:
+ zone_entry["border"] = {
+ "thickness": pz.box.border_thickness,
+ "confidence": pz.box.confidence,
+ }
+
+ zones_data.append(zone_entry)
+
+ # 4. Fallback: no boxes detected β single zone with all words
+ if not zones_data:
+ grid = _build_zone_grid(
+ all_words, content_x, content_y, content_w, content_h,
+ 0, img_w, img_h,
+ )
+ zones_data.append({
+ "zone_index": 0,
+ "zone_type": "content",
+ "bbox_px": {
+ "x": content_x, "y": content_y,
+ "w": content_w, "h": content_h,
+ },
+ "bbox_pct": {
+ "x": round(content_x / img_w * 100, 2) if img_w else 0,
+ "y": round(content_y / img_h * 100, 2) if img_h else 0,
+ "w": round(content_w / img_w * 100, 2) if img_w else 0,
+ "h": round(content_h / img_h * 100, 2) if img_h else 0,
+ },
+ "border": None,
+ "word_count": len(all_words),
+ **grid,
+ })
+
+ duration = time.time() - t0
+
+ # 5. Build result
+ total_cells = sum(len(z.get("cells", [])) for z in zones_data)
+ total_columns = sum(len(z.get("columns", [])) for z in zones_data)
+ total_rows = sum(len(z.get("rows", [])) for z in zones_data)
+
+ result = {
+ "session_id": session_id,
+ "image_width": img_w,
+ "image_height": img_h,
+ "zones": zones_data,
+ "boxes_detected": boxes_detected,
+ "summary": {
+ "total_zones": len(zones_data),
+ "total_columns": total_columns,
+ "total_rows": total_rows,
+ "total_cells": total_cells,
+ "total_words": len(all_words),
+ },
+ "formatting": {
+ "bold_columns": [],
+ "header_rows": [],
+ },
+ "duration_seconds": round(duration, 2),
+ }
+
+ # 6. Persist to DB
+ await update_session_db(session_id, grid_editor_result=result)
+
+ logger.info(
+ "build-grid session %s: %d zones, %d cols, %d rows, %d cells, "
+ "%d boxes in %.2fs",
+ session_id, len(zones_data), total_columns, total_rows,
+ total_cells, boxes_detected, duration,
+ )
+
+ return result
+
+
+@router.post("/sessions/{session_id}/save-grid")
+async def save_grid(session_id: str, request: Request):
+ """Save edited grid data from the frontend Excel-like editor.
+
+ Receives the full StructuredGrid with user edits (text changes,
+ formatting changes like bold columns, header rows, etc.) and
+ persists it to the session's grid_editor_result.
+ """
+ session = await get_session_db(session_id)
+ if not session:
+ raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
+
+ body = await request.json()
+
+ # Validate basic structure
+ if "zones" not in body:
+ raise HTTPException(status_code=400, detail="Missing 'zones' in request body")
+
+ # Preserve metadata from the original build
+ existing = session.get("grid_editor_result") or {}
+ result = {
+ "session_id": session_id,
+ "image_width": body.get("image_width", existing.get("image_width", 0)),
+ "image_height": body.get("image_height", existing.get("image_height", 0)),
+ "zones": body["zones"],
+ "boxes_detected": body.get("boxes_detected", existing.get("boxes_detected", 0)),
+ "summary": body.get("summary", existing.get("summary", {})),
+ "formatting": body.get("formatting", existing.get("formatting", {})),
+ "duration_seconds": existing.get("duration_seconds", 0),
+ "edited": True,
+ }
+
+ await update_session_db(session_id, grid_editor_result=result)
+
+ logger.info("save-grid session %s: %d zones saved", session_id, len(body["zones"]))
+
+ return {"session_id": session_id, "saved": True}
+
+
+@router.get("/sessions/{session_id}/grid-editor")
+async def get_grid(session_id: str):
+ """Retrieve the current grid editor state for a session."""
+ session = await get_session_db(session_id)
+ if not session:
+ raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
+
+ result = session.get("grid_editor_result")
+ if not result:
+ raise HTTPException(
+ status_code=404,
+ detail="No grid editor data. Run build-grid first.",
+ )
+
+ return result
diff --git a/klausur-service/backend/main.py b/klausur-service/backend/main.py
index 24c931c..1eb272a 100644
--- a/klausur-service/backend/main.py
+++ b/klausur-service/backend/main.py
@@ -43,6 +43,7 @@ except ImportError:
trocr_router = None
from vocab_worksheet_api import router as vocab_router, set_db_pool as set_vocab_db_pool, _init_vocab_table, _load_all_sessions, DATABASE_URL as VOCAB_DATABASE_URL
from ocr_pipeline_api import router as ocr_pipeline_router, _cache as ocr_pipeline_cache
+from grid_editor_api import router as grid_editor_router
from orientation_crop_api import router as orientation_crop_router, set_cache_ref as set_orientation_crop_cache
from ocr_pipeline_session_store import init_ocr_pipeline_tables
try:
@@ -178,6 +179,7 @@ if trocr_router:
app.include_router(trocr_router) # TrOCR Handwriting OCR
app.include_router(vocab_router) # Vocabulary Worksheet Generator
app.include_router(ocr_pipeline_router) # OCR Pipeline (step-by-step)
+app.include_router(grid_editor_router) # Grid Editor (Excel-like)
set_orientation_crop_cache(ocr_pipeline_cache)
app.include_router(orientation_crop_router) # OCR Pipeline: Orientation + Crop
if htr_router:
diff --git a/klausur-service/backend/migrations/007_add_grid_editor_result.sql b/klausur-service/backend/migrations/007_add_grid_editor_result.sql
new file mode 100644
index 0000000..3acccd0
--- /dev/null
+++ b/klausur-service/backend/migrations/007_add_grid_editor_result.sql
@@ -0,0 +1,4 @@
+-- Migration 007: Add grid_editor_result column for Excel-like grid editor
+-- Stores structured grid with zones, columns, rows, cells, and formatting
+
+ALTER TABLE ocr_pipeline_sessions ADD COLUMN IF NOT EXISTS grid_editor_result JSONB;
diff --git a/klausur-service/backend/ocr_pipeline_session_store.py b/klausur-service/backend/ocr_pipeline_session_store.py
index e4adfe2..bf1d3ed 100644
--- a/klausur-service/backend/ocr_pipeline_session_store.py
+++ b/klausur-service/backend/ocr_pipeline_session_store.py
@@ -74,7 +74,8 @@ async def init_ocr_pipeline_tables():
ADD COLUMN IF NOT EXISTS orientation_result JSONB,
ADD COLUMN IF NOT EXISTS crop_result JSONB,
ADD COLUMN IF NOT EXISTS parent_session_id UUID REFERENCES ocr_pipeline_sessions(id) ON DELETE CASCADE,
- ADD COLUMN IF NOT EXISTS box_index INT
+ ADD COLUMN IF NOT EXISTS box_index INT,
+ ADD COLUMN IF NOT EXISTS grid_editor_result JSONB
""")
@@ -110,6 +111,7 @@ async def create_session_db(
word_result, ground_truth, auto_shear_degrees,
doc_type, doc_type_result,
document_category, pipeline_log,
+ grid_editor_result,
parent_session_id, box_index,
created_at, updated_at
""", uuid.UUID(session_id), name, filename, original_png,
@@ -129,6 +131,7 @@ async def get_session_db(session_id: str) -> Optional[Dict[str, Any]]:
word_result, ground_truth, auto_shear_degrees,
doc_type, doc_type_result,
document_category, pipeline_log,
+ grid_editor_result,
parent_session_id, box_index,
created_at, updated_at
FROM ocr_pipeline_sessions WHERE id = $1
@@ -180,10 +183,11 @@ async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any
'word_result', 'ground_truth', 'auto_shear_degrees',
'doc_type', 'doc_type_result',
'document_category', 'pipeline_log',
+ 'grid_editor_result',
'parent_session_id', 'box_index',
}
- jsonb_fields = {'orientation_result', 'crop_result', 'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'handwriting_removal_meta', 'doc_type_result', 'pipeline_log'}
+ jsonb_fields = {'orientation_result', 'crop_result', 'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'handwriting_removal_meta', 'doc_type_result', 'pipeline_log', 'grid_editor_result'}
for key, value in kwargs.items():
if key in allowed_fields:
@@ -212,6 +216,7 @@ async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any
word_result, ground_truth, auto_shear_degrees,
doc_type, doc_type_result,
document_category, pipeline_log,
+ grid_editor_result,
parent_session_id, box_index,
created_at, updated_at
""", *values)
@@ -308,7 +313,7 @@ def _row_to_dict(row: asyncpg.Record) -> Dict[str, Any]:
result[key] = result[key].isoformat()
# JSONB β parsed (asyncpg returns str for JSONB)
- for key in ['orientation_result', 'crop_result', 'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'doc_type_result', 'pipeline_log']:
+ for key in ['orientation_result', 'crop_result', 'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'doc_type_result', 'pipeline_log', 'grid_editor_result']:
if key in result and result[key] is not None:
if isinstance(result[key], str):
result[key] = json.loads(result[key])