feat: add Excel-like grid editor for OCR overlay (Kombi mode step 6)
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 27s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m1s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 17s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 27s
CI / test-go-edu-search (push) Successful in 28s
CI / test-python-klausur (push) Failing after 2m1s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 17s
Backend: new grid_editor_api.py with build-grid endpoint that detects bordered boxes, splits page into zones, clusters columns/rows per zone from Kombi word positions. New DB column grid_editor_result JSONB. Frontend: GridEditor component with editable HTML tables per zone, column bold toggle, header row toggle, undo/redo, keyboard navigation (Tab/Enter/Arrow), image overlay verification, and save/load. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -11,6 +11,7 @@ import { StepRowDetection } from '@/components/ocr-pipeline/StepRowDetection'
|
||||
import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
|
||||
import { OverlayReconstruction } from '@/components/ocr-overlay/OverlayReconstruction'
|
||||
import { PaddleDirectStep } from '@/components/ocr-overlay/PaddleDirectStep'
|
||||
import { GridEditor } from '@/components/grid-editor/GridEditor'
|
||||
import { OVERLAY_PIPELINE_STEPS, PADDLE_DIRECT_STEPS, KOMBI_STEPS, DOCUMENT_CATEGORIES, dbStepToOverlayUi, type PipelineStep, type SessionListItem, type DocumentCategory } from './types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
@@ -72,13 +73,17 @@ export default function OcrOverlayPage() {
|
||||
const m = isKombi ? 'kombi' : 'paddle-direct'
|
||||
const baseSteps = isKombi ? KOMBI_STEPS : PADDLE_DIRECT_STEPS
|
||||
setMode(m)
|
||||
|
||||
// For Kombi: if grid_editor_result exists, jump to grid editor step (5)
|
||||
const hasGrid = isKombi && data.grid_editor_result
|
||||
const activeStep = hasGrid ? 5 : 4
|
||||
setSteps(
|
||||
baseSteps.map((s, i) => ({
|
||||
...s,
|
||||
status: i < 4 ? 'completed' : i === 4 ? 'active' : 'pending',
|
||||
status: i < activeStep ? 'completed' : i === activeStep ? 'active' : 'pending',
|
||||
})),
|
||||
)
|
||||
setCurrentStep(4)
|
||||
setCurrentStep(activeStep)
|
||||
} else {
|
||||
setMode('pipeline')
|
||||
// Map DB step to overlay UI step
|
||||
@@ -256,6 +261,10 @@ export default function OcrOverlayPage() {
|
||||
) : (
|
||||
<PaddleDirectStep sessionId={sessionId} onNext={handleNext} />
|
||||
)
|
||||
case 5:
|
||||
return mode === 'kombi' ? (
|
||||
<GridEditor sessionId={sessionId} onNext={handleNext} />
|
||||
) : null
|
||||
default:
|
||||
return null
|
||||
}
|
||||
@@ -512,7 +521,7 @@ export default function OcrOverlayPage() {
|
||||
: 'text-gray-500 dark:text-gray-400 hover:text-gray-700 dark:hover:text-gray-300'
|
||||
}`}
|
||||
>
|
||||
Kombi (5 Schritte)
|
||||
Kombi (6 Schritte)
|
||||
</button>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -70,6 +70,7 @@ export const KOMBI_STEPS: PipelineStep[] = [
|
||||
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
|
||||
{ id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
|
||||
{ id: 'kombi', name: 'PP-OCRv5 + Tesseract', icon: '🔀', status: 'pending' },
|
||||
{ id: 'grid-editor', name: 'Tabelle', icon: '📊', status: 'pending' },
|
||||
]
|
||||
|
||||
/** Map from DB step to overlay UI step index */
|
||||
|
||||
218
admin-lehrer/components/grid-editor/GridEditor.tsx
Normal file
218
admin-lehrer/components/grid-editor/GridEditor.tsx
Normal file
@@ -0,0 +1,218 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useEffect, useState } from 'react'
|
||||
import { useGridEditor } from './useGridEditor'
|
||||
import { GridToolbar } from './GridToolbar'
|
||||
import { GridTable } from './GridTable'
|
||||
import { GridImageOverlay } from './GridImageOverlay'
|
||||
|
||||
interface GridEditorProps {
|
||||
sessionId: string | null
|
||||
onNext?: () => void
|
||||
}
|
||||
|
||||
export function GridEditor({ sessionId, onNext }: GridEditorProps) {
|
||||
const {
|
||||
grid,
|
||||
loading,
|
||||
saving,
|
||||
error,
|
||||
dirty,
|
||||
selectedCell,
|
||||
setSelectedCell,
|
||||
buildGrid,
|
||||
loadGrid,
|
||||
saveGrid,
|
||||
updateCellText,
|
||||
toggleColumnBold,
|
||||
toggleRowHeader,
|
||||
undo,
|
||||
redo,
|
||||
canUndo,
|
||||
canRedo,
|
||||
getAdjacentCell,
|
||||
} = useGridEditor(sessionId)
|
||||
|
||||
const [showOverlay, setShowOverlay] = useState(false)
|
||||
|
||||
// Load grid on mount
|
||||
useEffect(() => {
|
||||
if (sessionId) {
|
||||
loadGrid()
|
||||
}
|
||||
}, [sessionId, loadGrid])
|
||||
|
||||
// Keyboard shortcuts
|
||||
useEffect(() => {
|
||||
const handler = (e: KeyboardEvent) => {
|
||||
if ((e.metaKey || e.ctrlKey) && e.key === 'z' && !e.shiftKey) {
|
||||
e.preventDefault()
|
||||
undo()
|
||||
} else if ((e.metaKey || e.ctrlKey) && e.key === 'z' && e.shiftKey) {
|
||||
e.preventDefault()
|
||||
redo()
|
||||
} else if ((e.metaKey || e.ctrlKey) && e.key === 's') {
|
||||
e.preventDefault()
|
||||
saveGrid()
|
||||
}
|
||||
}
|
||||
window.addEventListener('keydown', handler)
|
||||
return () => window.removeEventListener('keydown', handler)
|
||||
}, [undo, redo, saveGrid])
|
||||
|
||||
const handleNavigate = useCallback(
|
||||
(cellId: string, direction: 'up' | 'down' | 'left' | 'right') => {
|
||||
const target = getAdjacentCell(cellId, direction)
|
||||
if (target) {
|
||||
setSelectedCell(target)
|
||||
// Focus the input
|
||||
setTimeout(() => {
|
||||
const el = document.getElementById(`cell-${target}`)
|
||||
if (el) {
|
||||
el.focus()
|
||||
if (el instanceof HTMLInputElement) el.select()
|
||||
}
|
||||
}, 0)
|
||||
}
|
||||
},
|
||||
[getAdjacentCell, setSelectedCell],
|
||||
)
|
||||
|
||||
if (!sessionId) {
|
||||
return (
|
||||
<div className="text-center py-12 text-gray-400">
|
||||
Keine Session ausgewaehlt.
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (loading) {
|
||||
return (
|
||||
<div className="flex items-center justify-center py-16">
|
||||
<div className="flex items-center gap-3 text-gray-500 dark:text-gray-400">
|
||||
<svg className="w-5 h-5 animate-spin" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
|
||||
</svg>
|
||||
Grid wird aufgebaut...
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (error) {
|
||||
return (
|
||||
<div className="bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-800 rounded-lg p-4">
|
||||
<p className="text-sm text-red-700 dark:text-red-300">
|
||||
Fehler: {error}
|
||||
</p>
|
||||
<button
|
||||
onClick={buildGrid}
|
||||
className="mt-2 text-xs px-3 py-1.5 bg-red-600 text-white rounded hover:bg-red-700"
|
||||
>
|
||||
Erneut versuchen
|
||||
</button>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (!grid || !grid.zones.length) {
|
||||
return (
|
||||
<div className="text-center py-12">
|
||||
<p className="text-gray-400 mb-4">Kein Grid vorhanden.</p>
|
||||
<button
|
||||
onClick={buildGrid}
|
||||
className="px-4 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 text-sm"
|
||||
>
|
||||
Grid aus OCR-Ergebnissen erstellen
|
||||
</button>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Summary bar */}
|
||||
<div className="flex items-center gap-4 text-xs text-gray-500 dark:text-gray-400">
|
||||
<span>{grid.summary.total_zones} Zone(n)</span>
|
||||
<span>{grid.summary.total_columns} Spalten</span>
|
||||
<span>{grid.summary.total_rows} Zeilen</span>
|
||||
<span>{grid.summary.total_cells} Zellen</span>
|
||||
{grid.boxes_detected > 0 && (
|
||||
<span className="text-amber-600 dark:text-amber-400">
|
||||
{grid.boxes_detected} Box(en) erkannt
|
||||
</span>
|
||||
)}
|
||||
<span className="text-gray-400">
|
||||
{grid.duration_seconds.toFixed(1)}s
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Toolbar */}
|
||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 px-3 py-2">
|
||||
<GridToolbar
|
||||
dirty={dirty}
|
||||
saving={saving}
|
||||
canUndo={canUndo}
|
||||
canRedo={canRedo}
|
||||
showOverlay={showOverlay}
|
||||
onSave={saveGrid}
|
||||
onUndo={undo}
|
||||
onRedo={redo}
|
||||
onRebuild={buildGrid}
|
||||
onToggleOverlay={() => setShowOverlay(!showOverlay)}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Image overlay */}
|
||||
{showOverlay && (
|
||||
<GridImageOverlay sessionId={sessionId} grid={grid} />
|
||||
)}
|
||||
|
||||
{/* Zone tables */}
|
||||
<div className="space-y-4">
|
||||
{grid.zones.map((zone) => (
|
||||
<div
|
||||
key={zone.zone_index}
|
||||
className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 overflow-hidden"
|
||||
>
|
||||
<GridTable
|
||||
zone={zone}
|
||||
selectedCell={selectedCell}
|
||||
onSelectCell={setSelectedCell}
|
||||
onCellTextChange={updateCellText}
|
||||
onToggleColumnBold={toggleColumnBold}
|
||||
onToggleRowHeader={toggleRowHeader}
|
||||
onNavigate={handleNavigate}
|
||||
/>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{/* Tip */}
|
||||
<div className="text-[11px] text-gray-400 dark:text-gray-500 flex items-center gap-4">
|
||||
<span>Tab: naechste Zelle</span>
|
||||
<span>Enter: Zeile runter</span>
|
||||
<span>Spalte fett: Klick auf Spaltenkopf</span>
|
||||
<span>Header: Klick auf Zeilennummer</span>
|
||||
<span>Ctrl+Z/Y: Undo/Redo</span>
|
||||
<span>Ctrl+S: Speichern</span>
|
||||
</div>
|
||||
|
||||
{/* Next step button */}
|
||||
{onNext && (
|
||||
<div className="flex justify-end">
|
||||
<button
|
||||
onClick={async () => {
|
||||
if (dirty) await saveGrid()
|
||||
onNext()
|
||||
}}
|
||||
className="px-4 py-2 bg-teal-600 text-white text-sm rounded-lg hover:bg-teal-700 transition-colors"
|
||||
>
|
||||
Fertig
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
98
admin-lehrer/components/grid-editor/GridImageOverlay.tsx
Normal file
98
admin-lehrer/components/grid-editor/GridImageOverlay.tsx
Normal file
@@ -0,0 +1,98 @@
|
||||
'use client'
|
||||
|
||||
import type { StructuredGrid } from './types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
interface GridImageOverlayProps {
|
||||
sessionId: string
|
||||
grid: StructuredGrid
|
||||
}
|
||||
|
||||
const ZONE_COLORS = [
|
||||
{ border: 'rgba(20,184,166,0.7)', fill: 'rgba(20,184,166,0.05)' }, // teal
|
||||
{ border: 'rgba(245,158,11,0.7)', fill: 'rgba(245,158,11,0.05)' }, // amber
|
||||
{ border: 'rgba(99,102,241,0.7)', fill: 'rgba(99,102,241,0.05)' }, // indigo
|
||||
{ border: 'rgba(236,72,153,0.7)', fill: 'rgba(236,72,153,0.05)' }, // pink
|
||||
]
|
||||
|
||||
export function GridImageOverlay({ sessionId, grid }: GridImageOverlayProps) {
|
||||
const imgUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
|
||||
|
||||
return (
|
||||
<div className="relative w-full overflow-auto border border-gray-200 dark:border-gray-700 rounded-lg bg-gray-100 dark:bg-gray-900">
|
||||
<div className="relative inline-block">
|
||||
{/* Source image */}
|
||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||
<img
|
||||
src={imgUrl}
|
||||
alt="OCR Scan"
|
||||
className="block max-w-full"
|
||||
style={{ imageRendering: 'auto' }}
|
||||
/>
|
||||
|
||||
{/* SVG overlay */}
|
||||
<svg
|
||||
className="absolute inset-0 w-full h-full pointer-events-none"
|
||||
viewBox={`0 0 ${grid.image_width} ${grid.image_height}`}
|
||||
preserveAspectRatio="xMinYMin meet"
|
||||
>
|
||||
{grid.zones.map((zone) => {
|
||||
const colors = ZONE_COLORS[zone.zone_index % ZONE_COLORS.length]
|
||||
const b = zone.bbox_px
|
||||
|
||||
return (
|
||||
<g key={zone.zone_index}>
|
||||
{/* Zone border */}
|
||||
<rect
|
||||
x={b.x} y={b.y} width={b.w} height={b.h}
|
||||
fill={colors.fill}
|
||||
stroke={colors.border}
|
||||
strokeWidth={zone.zone_type === 'box' ? 3 : 1.5}
|
||||
strokeDasharray={zone.zone_type === 'box' ? undefined : '6 3'}
|
||||
/>
|
||||
|
||||
{/* Column separators */}
|
||||
{zone.columns.slice(1).map((col) => (
|
||||
<line
|
||||
key={`col-${col.index}`}
|
||||
x1={col.x_min_px} y1={b.y}
|
||||
x2={col.x_min_px} y2={b.y + b.h}
|
||||
stroke={colors.border}
|
||||
strokeWidth={1}
|
||||
strokeDasharray="4 2"
|
||||
/>
|
||||
))}
|
||||
|
||||
{/* Row separators */}
|
||||
{zone.rows.slice(1).map((row) => (
|
||||
<line
|
||||
key={`row-${row.index}`}
|
||||
x1={b.x} y1={row.y_min_px}
|
||||
x2={b.x + b.w} y2={row.y_min_px}
|
||||
stroke={colors.border}
|
||||
strokeWidth={0.5}
|
||||
strokeDasharray="3 3"
|
||||
opacity={0.5}
|
||||
/>
|
||||
))}
|
||||
|
||||
{/* Zone label */}
|
||||
<text
|
||||
x={b.x + 4} y={b.y + 14}
|
||||
fill={colors.border}
|
||||
fontSize={12}
|
||||
fontWeight="bold"
|
||||
fontFamily="monospace"
|
||||
>
|
||||
{zone.zone_type === 'box' ? 'BOX' : 'CONTENT'} Z{zone.zone_index}
|
||||
{' '}({zone.columns.length}x{zone.rows.length})
|
||||
</text>
|
||||
</g>
|
||||
)
|
||||
})}
|
||||
</svg>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
153
admin-lehrer/components/grid-editor/GridTable.tsx
Normal file
153
admin-lehrer/components/grid-editor/GridTable.tsx
Normal file
@@ -0,0 +1,153 @@
|
||||
'use client'
|
||||
|
||||
import { useCallback, useRef } from 'react'
|
||||
import type { GridZone } from './types'
|
||||
|
||||
interface GridTableProps {
|
||||
zone: GridZone
|
||||
selectedCell: string | null
|
||||
onSelectCell: (cellId: string) => void
|
||||
onCellTextChange: (cellId: string, text: string) => void
|
||||
onToggleColumnBold: (zoneIndex: number, colIndex: number) => void
|
||||
onToggleRowHeader: (zoneIndex: number, rowIndex: number) => void
|
||||
onNavigate: (cellId: string, direction: 'up' | 'down' | 'left' | 'right') => void
|
||||
}
|
||||
|
||||
export function GridTable({
|
||||
zone,
|
||||
selectedCell,
|
||||
onSelectCell,
|
||||
onCellTextChange,
|
||||
onToggleColumnBold,
|
||||
onToggleRowHeader,
|
||||
onNavigate,
|
||||
}: GridTableProps) {
|
||||
const tableRef = useRef<HTMLTableElement>(null)
|
||||
|
||||
const handleKeyDown = useCallback(
|
||||
(e: React.KeyboardEvent, cellId: string) => {
|
||||
if (e.key === 'Tab') {
|
||||
e.preventDefault()
|
||||
onNavigate(cellId, e.shiftKey ? 'left' : 'right')
|
||||
} else if (e.key === 'Enter' && !e.shiftKey) {
|
||||
e.preventDefault()
|
||||
onNavigate(cellId, 'down')
|
||||
} else if (e.key === 'ArrowUp' && e.altKey) {
|
||||
e.preventDefault()
|
||||
onNavigate(cellId, 'up')
|
||||
} else if (e.key === 'ArrowDown' && e.altKey) {
|
||||
e.preventDefault()
|
||||
onNavigate(cellId, 'down')
|
||||
} else if (e.key === 'Escape') {
|
||||
;(e.target as HTMLElement).blur()
|
||||
}
|
||||
},
|
||||
[onNavigate],
|
||||
)
|
||||
|
||||
// Build row→col cell lookup
|
||||
const cellMap = new Map<string, (typeof zone.cells)[0]>()
|
||||
for (const cell of zone.cells) {
|
||||
cellMap.set(`${cell.row_index}_${cell.col_index}`, cell)
|
||||
}
|
||||
|
||||
const isBoxZone = zone.zone_type === 'box'
|
||||
|
||||
return (
|
||||
<div className={`overflow-x-auto ${isBoxZone ? 'border-2 border-gray-400 dark:border-gray-500 rounded-lg' : ''}`}>
|
||||
{/* Zone label */}
|
||||
<div className="flex items-center gap-2 px-2 py-1 text-xs text-gray-500 dark:text-gray-400">
|
||||
<span className={`inline-flex items-center gap-1 px-1.5 py-0.5 rounded text-[10px] font-medium ${
|
||||
isBoxZone
|
||||
? 'bg-amber-50 dark:bg-amber-900/20 text-amber-700 dark:text-amber-300 border border-amber-200 dark:border-amber-800'
|
||||
: 'bg-gray-50 dark:bg-gray-800 text-gray-500 dark:text-gray-400 border border-gray-200 dark:border-gray-700'
|
||||
}`}>
|
||||
{isBoxZone ? 'Box' : 'Inhalt'} Zone {zone.zone_index}
|
||||
</span>
|
||||
<span>{zone.columns.length} Spalten, {zone.rows.length} Zeilen, {zone.cells.length} Zellen</span>
|
||||
</div>
|
||||
|
||||
<table ref={tableRef} className="w-full border-collapse text-sm">
|
||||
{/* Column headers */}
|
||||
<thead>
|
||||
<tr>
|
||||
{/* Row number header */}
|
||||
<th className="w-8 px-1 py-1.5 text-[10px] text-gray-400 dark:text-gray-500 font-normal border-b border-r border-gray-200 dark:border-gray-700 bg-gray-50 dark:bg-gray-800/50" />
|
||||
{zone.columns.map((col) => (
|
||||
<th
|
||||
key={col.index}
|
||||
className={`px-2 py-1.5 text-xs font-medium border-b border-r border-gray-200 dark:border-gray-700 bg-gray-50 dark:bg-gray-800/50 cursor-pointer select-none transition-colors hover:bg-gray-100 dark:hover:bg-gray-700 ${
|
||||
col.bold ? 'text-teal-700 dark:text-teal-300' : 'text-gray-600 dark:text-gray-400'
|
||||
}`}
|
||||
onClick={() => onToggleColumnBold(zone.zone_index, col.index)}
|
||||
title={`Spalte ${col.index + 1} — Klick fuer Fett-Toggle`}
|
||||
>
|
||||
<div className="flex items-center gap-1 justify-center">
|
||||
<span>{col.label}</span>
|
||||
{col.bold && (
|
||||
<span className="text-[9px] px-1 py-0 rounded bg-teal-100 dark:bg-teal-900/40 text-teal-600 dark:text-teal-400">
|
||||
B
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
</th>
|
||||
))}
|
||||
</tr>
|
||||
</thead>
|
||||
|
||||
<tbody>
|
||||
{zone.rows.map((row) => (
|
||||
<tr key={row.index} className={row.is_header ? 'bg-blue-50/50 dark:bg-blue-900/10' : ''}>
|
||||
{/* Row number */}
|
||||
<td
|
||||
className={`w-8 px-1 py-1 text-center text-[10px] border-b border-r border-gray-200 dark:border-gray-700 cursor-pointer select-none transition-colors hover:bg-gray-100 dark:hover:bg-gray-700 ${
|
||||
row.is_header
|
||||
? 'bg-blue-50 dark:bg-blue-900/20 text-blue-600 dark:text-blue-400 font-medium'
|
||||
: 'bg-gray-50 dark:bg-gray-800/50 text-gray-400 dark:text-gray-500'
|
||||
}`}
|
||||
onClick={() => onToggleRowHeader(zone.zone_index, row.index)}
|
||||
title={`Zeile ${row.index + 1} — Klick fuer Header-Toggle`}
|
||||
>
|
||||
{row.index + 1}
|
||||
{row.is_header && <span className="block text-[8px]">H</span>}
|
||||
</td>
|
||||
|
||||
{/* Cells */}
|
||||
{zone.columns.map((col) => {
|
||||
const cell = cellMap.get(`${row.index}_${col.index}`)
|
||||
const cellId = cell?.cell_id ?? `Z${zone.zone_index}_R${String(row.index).padStart(2, '0')}_C${col.index}`
|
||||
const isSelected = selectedCell === cellId
|
||||
const isBold = col.bold || cell?.is_bold
|
||||
const isLowConf = cell && cell.confidence > 0 && cell.confidence < 60
|
||||
|
||||
return (
|
||||
<td
|
||||
key={col.index}
|
||||
className={`border-b border-r border-gray-200 dark:border-gray-700 p-0 transition-shadow ${
|
||||
isSelected ? 'ring-2 ring-teal-500 ring-inset z-10 relative' : ''
|
||||
} ${isLowConf ? 'bg-amber-50/50 dark:bg-amber-900/10' : ''}`}
|
||||
>
|
||||
<input
|
||||
id={`cell-${cellId}`}
|
||||
type="text"
|
||||
value={cell?.text ?? ''}
|
||||
onChange={(e) => {
|
||||
if (cell) onCellTextChange(cellId, e.target.value)
|
||||
}}
|
||||
onFocus={() => onSelectCell(cellId)}
|
||||
onKeyDown={(e) => handleKeyDown(e, cellId)}
|
||||
className={`w-full px-2 py-1.5 bg-transparent border-0 outline-none text-gray-800 dark:text-gray-200 ${
|
||||
isBold ? 'font-bold' : 'font-normal'
|
||||
} ${row.is_header ? 'text-base' : 'text-sm'}`}
|
||||
spellCheck={false}
|
||||
/>
|
||||
</td>
|
||||
)
|
||||
})}
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
110
admin-lehrer/components/grid-editor/GridToolbar.tsx
Normal file
110
admin-lehrer/components/grid-editor/GridToolbar.tsx
Normal file
@@ -0,0 +1,110 @@
|
||||
'use client'
|
||||
|
||||
interface GridToolbarProps {
|
||||
dirty: boolean
|
||||
saving: boolean
|
||||
canUndo: boolean
|
||||
canRedo: boolean
|
||||
showOverlay: boolean
|
||||
onSave: () => void
|
||||
onUndo: () => void
|
||||
onRedo: () => void
|
||||
onRebuild: () => void
|
||||
onToggleOverlay: () => void
|
||||
}
|
||||
|
||||
export function GridToolbar({
|
||||
dirty,
|
||||
saving,
|
||||
canUndo,
|
||||
canRedo,
|
||||
showOverlay,
|
||||
onSave,
|
||||
onUndo,
|
||||
onRedo,
|
||||
onRebuild,
|
||||
onToggleOverlay,
|
||||
}: GridToolbarProps) {
|
||||
return (
|
||||
<div className="flex items-center gap-2 flex-wrap">
|
||||
{/* Undo / Redo */}
|
||||
<div className="flex items-center gap-1 border-r border-gray-200 dark:border-gray-700 pr-2">
|
||||
<button
|
||||
onClick={onUndo}
|
||||
disabled={!canUndo}
|
||||
className="p-1.5 rounded hover:bg-gray-100 dark:hover:bg-gray-700 disabled:opacity-30 disabled:cursor-not-allowed"
|
||||
title="Rueckgaengig (Ctrl+Z)"
|
||||
>
|
||||
<svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M3 10h10a5 5 0 015 5v2M3 10l4-4M3 10l4 4" />
|
||||
</svg>
|
||||
</button>
|
||||
<button
|
||||
onClick={onRedo}
|
||||
disabled={!canRedo}
|
||||
className="p-1.5 rounded hover:bg-gray-100 dark:hover:bg-gray-700 disabled:opacity-30 disabled:cursor-not-allowed"
|
||||
title="Wiederholen (Ctrl+Shift+Z)"
|
||||
>
|
||||
<svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M21 10H11a5 5 0 00-5 5v2M21 10l-4-4M21 10l-4 4" />
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Overlay toggle */}
|
||||
<button
|
||||
onClick={onToggleOverlay}
|
||||
className={`flex items-center gap-1 px-2.5 py-1.5 text-xs rounded-md border transition-colors ${
|
||||
showOverlay
|
||||
? 'bg-teal-50 dark:bg-teal-900/30 border-teal-300 dark:border-teal-700 text-teal-700 dark:text-teal-300'
|
||||
: 'border-gray-200 dark:border-gray-700 text-gray-600 dark:text-gray-400 hover:bg-gray-50 dark:hover:bg-gray-700'
|
||||
}`}
|
||||
title="Grid auf Bild anzeigen"
|
||||
>
|
||||
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M4 5a1 1 0 011-1h14a1 1 0 011 1v2a1 1 0 01-1 1H5a1 1 0 01-1-1V5zM4 13a1 1 0 011-1h6a1 1 0 011 1v6a1 1 0 01-1 1H5a1 1 0 01-1-1v-6zM16 13a1 1 0 011-1h2a1 1 0 011 1v6a1 1 0 01-1 1h-2a1 1 0 01-1-1v-6z" />
|
||||
</svg>
|
||||
Bild-Overlay
|
||||
</button>
|
||||
|
||||
{/* Rebuild */}
|
||||
<button
|
||||
onClick={onRebuild}
|
||||
className="flex items-center gap-1 px-2.5 py-1.5 text-xs rounded-md border border-gray-200 dark:border-gray-700 text-gray-600 dark:text-gray-400 hover:bg-gray-50 dark:hover:bg-gray-700 transition-colors"
|
||||
title="Grid neu berechnen"
|
||||
>
|
||||
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M4 4v5h.582m15.356 2A8.001 8.001 0 004.582 9m0 0H9m11 11v-5h-.581m0 0a8.003 8.003 0 01-15.357-2m15.357 2H15" />
|
||||
</svg>
|
||||
Neu berechnen
|
||||
</button>
|
||||
|
||||
{/* Spacer */}
|
||||
<div className="flex-1" />
|
||||
|
||||
{/* Save */}
|
||||
<button
|
||||
onClick={onSave}
|
||||
disabled={!dirty || saving}
|
||||
className={`flex items-center gap-1.5 px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${
|
||||
dirty
|
||||
? 'bg-teal-600 text-white hover:bg-teal-700'
|
||||
: 'bg-gray-100 dark:bg-gray-800 text-gray-400 cursor-not-allowed'
|
||||
}`}
|
||||
title="Speichern (Ctrl+S)"
|
||||
>
|
||||
{saving ? (
|
||||
<svg className="w-3.5 h-3.5 animate-spin" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
|
||||
</svg>
|
||||
) : (
|
||||
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" d="M8 7H5a2 2 0 00-2 2v9a2 2 0 002 2h14a2 2 0 002-2V9a2 2 0 00-2-2h-3m-1 4l-3 3m0 0l-3-3m3 3V4" />
|
||||
</svg>
|
||||
)}
|
||||
{saving ? 'Speichert...' : dirty ? 'Speichern' : 'Gespeichert'}
|
||||
</button>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
6
admin-lehrer/components/grid-editor/index.ts
Normal file
6
admin-lehrer/components/grid-editor/index.ts
Normal file
@@ -0,0 +1,6 @@
|
||||
export { GridEditor } from './GridEditor'
|
||||
export { GridTable } from './GridTable'
|
||||
export { GridToolbar } from './GridToolbar'
|
||||
export { GridImageOverlay } from './GridImageOverlay'
|
||||
export { useGridEditor } from './useGridEditor'
|
||||
export type * from './types'
|
||||
97
admin-lehrer/components/grid-editor/types.ts
Normal file
97
admin-lehrer/components/grid-editor/types.ts
Normal file
@@ -0,0 +1,97 @@
|
||||
import type { OcrWordBox } from '@/app/(admin)/ai/ocr-pipeline/types'
|
||||
|
||||
// Re-export for convenience
|
||||
export type { OcrWordBox }
|
||||
|
||||
/** A complete structured grid with zones, ready for the Excel-like editor. */
|
||||
export interface StructuredGrid {
|
||||
session_id: string
|
||||
image_width: number
|
||||
image_height: number
|
||||
zones: GridZone[]
|
||||
boxes_detected: number
|
||||
summary: GridSummary
|
||||
formatting: GridFormatting
|
||||
duration_seconds: number
|
||||
edited?: boolean
|
||||
}
|
||||
|
||||
export interface GridSummary {
|
||||
total_zones: number
|
||||
total_columns: number
|
||||
total_rows: number
|
||||
total_cells: number
|
||||
total_words: number
|
||||
}
|
||||
|
||||
export interface GridFormatting {
|
||||
bold_columns: number[]
|
||||
header_rows: number[]
|
||||
}
|
||||
|
||||
/** A horizontal zone of the page — either content or a bordered box. */
|
||||
export interface GridZone {
|
||||
zone_index: number
|
||||
zone_type: 'content' | 'box'
|
||||
bbox_px: BBox
|
||||
bbox_pct: BBox
|
||||
border: ZoneBorder | null
|
||||
word_count: number
|
||||
columns: GridColumn[]
|
||||
rows: GridRow[]
|
||||
cells: GridEditorCell[]
|
||||
header_rows: number[]
|
||||
}
|
||||
|
||||
export interface BBox {
|
||||
x: number
|
||||
y: number
|
||||
w: number
|
||||
h: number
|
||||
}
|
||||
|
||||
export interface ZoneBorder {
|
||||
thickness: number
|
||||
confidence: number
|
||||
}
|
||||
|
||||
export interface GridColumn {
|
||||
index: number
|
||||
label: string
|
||||
x_min_px: number
|
||||
x_max_px: number
|
||||
x_min_pct: number
|
||||
x_max_pct: number
|
||||
bold: boolean
|
||||
}
|
||||
|
||||
export interface GridRow {
|
||||
index: number
|
||||
y_min_px: number
|
||||
y_max_px: number
|
||||
y_min_pct: number
|
||||
y_max_pct: number
|
||||
is_header: boolean
|
||||
}
|
||||
|
||||
export interface GridEditorCell {
|
||||
cell_id: string
|
||||
zone_index: number
|
||||
row_index: number
|
||||
col_index: number
|
||||
col_type: string
|
||||
text: string
|
||||
confidence: number
|
||||
bbox_px: BBox
|
||||
bbox_pct: BBox
|
||||
word_boxes: OcrWordBox[]
|
||||
ocr_engine: string
|
||||
is_bold: boolean
|
||||
}
|
||||
|
||||
/** Cell formatting applied by the user in the editor. */
|
||||
export interface CellFormatting {
|
||||
bold: boolean
|
||||
fontSize: 'small' | 'normal' | 'large'
|
||||
align: 'left' | 'center' | 'right'
|
||||
}
|
||||
288
admin-lehrer/components/grid-editor/useGridEditor.ts
Normal file
288
admin-lehrer/components/grid-editor/useGridEditor.ts
Normal file
@@ -0,0 +1,288 @@
|
||||
import { useCallback, useRef, useState } from 'react'
|
||||
import type { StructuredGrid, GridZone } from './types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
const MAX_UNDO = 50
|
||||
|
||||
export interface GridEditorState {
|
||||
grid: StructuredGrid | null
|
||||
loading: boolean
|
||||
saving: boolean
|
||||
error: string | null
|
||||
dirty: boolean
|
||||
selectedCell: string | null
|
||||
selectedZone: number | null
|
||||
}
|
||||
|
||||
export function useGridEditor(sessionId: string | null) {
|
||||
const [grid, setGrid] = useState<StructuredGrid | null>(null)
|
||||
const [loading, setLoading] = useState(false)
|
||||
const [saving, setSaving] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [dirty, setDirty] = useState(false)
|
||||
const [selectedCell, setSelectedCell] = useState<string | null>(null)
|
||||
const [selectedZone, setSelectedZone] = useState<number | null>(null)
|
||||
|
||||
// Undo/redo stacks store serialized zone arrays
|
||||
const undoStack = useRef<string[]>([])
|
||||
const redoStack = useRef<string[]>([])
|
||||
|
||||
const pushUndo = useCallback((zones: GridZone[]) => {
|
||||
undoStack.current.push(JSON.stringify(zones))
|
||||
if (undoStack.current.length > MAX_UNDO) {
|
||||
undoStack.current.shift()
|
||||
}
|
||||
redoStack.current = []
|
||||
}, [])
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Load / Build
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
const buildGrid = useCallback(async () => {
|
||||
if (!sessionId) return
|
||||
setLoading(true)
|
||||
setError(null)
|
||||
try {
|
||||
const res = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/build-grid`,
|
||||
{ method: 'POST' },
|
||||
)
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}))
|
||||
throw new Error(data.detail || `HTTP ${res.status}`)
|
||||
}
|
||||
const data: StructuredGrid = await res.json()
|
||||
setGrid(data)
|
||||
setDirty(false)
|
||||
undoStack.current = []
|
||||
redoStack.current = []
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
} finally {
|
||||
setLoading(false)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
const loadGrid = useCallback(async () => {
|
||||
if (!sessionId) return
|
||||
setLoading(true)
|
||||
setError(null)
|
||||
try {
|
||||
const res = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/grid-editor`,
|
||||
)
|
||||
if (res.status === 404) {
|
||||
// No grid yet — build it
|
||||
await buildGrid()
|
||||
return
|
||||
}
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}))
|
||||
throw new Error(data.detail || `HTTP ${res.status}`)
|
||||
}
|
||||
const data: StructuredGrid = await res.json()
|
||||
setGrid(data)
|
||||
setDirty(false)
|
||||
undoStack.current = []
|
||||
redoStack.current = []
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
} finally {
|
||||
setLoading(false)
|
||||
}
|
||||
}, [sessionId, buildGrid])
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Save
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
const saveGrid = useCallback(async () => {
|
||||
if (!sessionId || !grid) return
|
||||
setSaving(true)
|
||||
try {
|
||||
const res = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/save-grid`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(grid),
|
||||
},
|
||||
)
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}))
|
||||
throw new Error(data.detail || `HTTP ${res.status}`)
|
||||
}
|
||||
setDirty(false)
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : String(e))
|
||||
} finally {
|
||||
setSaving(false)
|
||||
}
|
||||
}, [sessionId, grid])
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Cell editing
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
const updateCellText = useCallback(
|
||||
(cellId: string, newText: string) => {
|
||||
if (!grid) return
|
||||
pushUndo(grid.zones)
|
||||
|
||||
setGrid((prev) => {
|
||||
if (!prev) return prev
|
||||
return {
|
||||
...prev,
|
||||
zones: prev.zones.map((zone) => ({
|
||||
...zone,
|
||||
cells: zone.cells.map((cell) =>
|
||||
cell.cell_id === cellId ? { ...cell, text: newText } : cell,
|
||||
),
|
||||
})),
|
||||
}
|
||||
})
|
||||
setDirty(true)
|
||||
},
|
||||
[grid, pushUndo],
|
||||
)
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Column formatting
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
const toggleColumnBold = useCallback(
|
||||
(zoneIndex: number, colIndex: number) => {
|
||||
if (!grid) return
|
||||
pushUndo(grid.zones)
|
||||
|
||||
setGrid((prev) => {
|
||||
if (!prev) return prev
|
||||
return {
|
||||
...prev,
|
||||
zones: prev.zones.map((zone) => {
|
||||
if (zone.zone_index !== zoneIndex) return zone
|
||||
const col = zone.columns.find((c) => c.index === colIndex)
|
||||
const newBold = col ? !col.bold : true
|
||||
return {
|
||||
...zone,
|
||||
columns: zone.columns.map((c) =>
|
||||
c.index === colIndex ? { ...c, bold: newBold } : c,
|
||||
),
|
||||
cells: zone.cells.map((cell) =>
|
||||
cell.col_index === colIndex
|
||||
? { ...cell, is_bold: newBold }
|
||||
: cell,
|
||||
),
|
||||
}
|
||||
}),
|
||||
}
|
||||
})
|
||||
setDirty(true)
|
||||
},
|
||||
[grid, pushUndo],
|
||||
)
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Row formatting
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
const toggleRowHeader = useCallback(
|
||||
(zoneIndex: number, rowIndex: number) => {
|
||||
if (!grid) return
|
||||
pushUndo(grid.zones)
|
||||
|
||||
setGrid((prev) => {
|
||||
if (!prev) return prev
|
||||
return {
|
||||
...prev,
|
||||
zones: prev.zones.map((zone) => {
|
||||
if (zone.zone_index !== zoneIndex) return zone
|
||||
return {
|
||||
...zone,
|
||||
rows: zone.rows.map((r) =>
|
||||
r.index === rowIndex ? { ...r, is_header: !r.is_header } : r,
|
||||
),
|
||||
}
|
||||
}),
|
||||
}
|
||||
})
|
||||
setDirty(true)
|
||||
},
|
||||
[grid, pushUndo],
|
||||
)
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Undo / Redo
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
const undo = useCallback(() => {
|
||||
if (!grid || undoStack.current.length === 0) return
|
||||
redoStack.current.push(JSON.stringify(grid.zones))
|
||||
const prev = undoStack.current.pop()!
|
||||
setGrid((g) => (g ? { ...g, zones: JSON.parse(prev) } : g))
|
||||
setDirty(true)
|
||||
}, [grid])
|
||||
|
||||
const redo = useCallback(() => {
|
||||
if (!grid || redoStack.current.length === 0) return
|
||||
undoStack.current.push(JSON.stringify(grid.zones))
|
||||
const next = redoStack.current.pop()!
|
||||
setGrid((g) => (g ? { ...g, zones: JSON.parse(next) } : g))
|
||||
setDirty(true)
|
||||
}, [grid])
|
||||
|
||||
const canUndo = undoStack.current.length > 0
|
||||
const canRedo = redoStack.current.length > 0
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Navigation helpers
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
const getAdjacentCell = useCallback(
|
||||
(cellId: string, direction: 'up' | 'down' | 'left' | 'right'): string | null => {
|
||||
if (!grid) return null
|
||||
for (const zone of grid.zones) {
|
||||
const cell = zone.cells.find((c) => c.cell_id === cellId)
|
||||
if (!cell) continue
|
||||
|
||||
let targetRow = cell.row_index
|
||||
let targetCol = cell.col_index
|
||||
if (direction === 'up') targetRow--
|
||||
if (direction === 'down') targetRow++
|
||||
if (direction === 'left') targetCol--
|
||||
if (direction === 'right') targetCol++
|
||||
|
||||
const target = zone.cells.find(
|
||||
(c) => c.row_index === targetRow && c.col_index === targetCol,
|
||||
)
|
||||
return target?.cell_id ?? null
|
||||
}
|
||||
return null
|
||||
},
|
||||
[grid],
|
||||
)
|
||||
|
||||
return {
|
||||
grid,
|
||||
loading,
|
||||
saving,
|
||||
error,
|
||||
dirty,
|
||||
selectedCell,
|
||||
selectedZone,
|
||||
setSelectedCell,
|
||||
setSelectedZone,
|
||||
buildGrid,
|
||||
loadGrid,
|
||||
saveGrid,
|
||||
updateCellText,
|
||||
toggleColumnBold,
|
||||
toggleRowHeader,
|
||||
undo,
|
||||
redo,
|
||||
canUndo,
|
||||
canRedo,
|
||||
getAdjacentCell,
|
||||
}
|
||||
}
|
||||
426
klausur-service/backend/grid_editor_api.py
Normal file
426
klausur-service/backend/grid_editor_api.py
Normal file
@@ -0,0 +1,426 @@
|
||||
"""
|
||||
Grid Editor API — builds a structured, zone-aware grid from Kombi OCR results.
|
||||
|
||||
Takes the merged word positions from paddle-kombi / rapid-kombi and:
|
||||
1. Detects bordered boxes on the image (cv_box_detect)
|
||||
2. Splits the page into zones (content + box regions)
|
||||
3. Clusters words into columns and rows per zone
|
||||
4. Returns a hierarchical StructuredGrid for the frontend Excel-like editor
|
||||
|
||||
Lizenz: Apache 2.0 (kommerziell nutzbar)
|
||||
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from fastapi import APIRouter, HTTPException, Request
|
||||
|
||||
from cv_box_detect import detect_boxes, split_page_into_zones
|
||||
from cv_words_first import _cluster_columns, _cluster_rows, _build_cells
|
||||
from ocr_pipeline_session_store import (
|
||||
get_session_db,
|
||||
get_session_image,
|
||||
update_session_db,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/v1/ocr-pipeline", tags=["grid-editor"])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _flatten_word_boxes(cells: List[Dict]) -> List[Dict]:
|
||||
"""Extract all word_boxes from cells into a flat list of word dicts."""
|
||||
words: List[Dict] = []
|
||||
for cell in cells:
|
||||
for wb in cell.get("word_boxes") or []:
|
||||
if wb.get("text", "").strip():
|
||||
words.append({
|
||||
"text": wb["text"],
|
||||
"left": wb["left"],
|
||||
"top": wb["top"],
|
||||
"width": wb["width"],
|
||||
"height": wb["height"],
|
||||
"conf": wb.get("conf", 0),
|
||||
})
|
||||
return words
|
||||
|
||||
|
||||
def _words_in_zone(
|
||||
words: List[Dict],
|
||||
zone_y: int,
|
||||
zone_h: int,
|
||||
zone_x: int,
|
||||
zone_w: int,
|
||||
) -> List[Dict]:
|
||||
"""Filter words whose Y-center falls within a zone's bounds."""
|
||||
zone_y_end = zone_y + zone_h
|
||||
zone_x_end = zone_x + zone_w
|
||||
result = []
|
||||
for w in words:
|
||||
cy = w["top"] + w["height"] / 2
|
||||
cx = w["left"] + w["width"] / 2
|
||||
if zone_y <= cy <= zone_y_end and zone_x <= cx <= zone_x_end:
|
||||
result.append(w)
|
||||
return result
|
||||
|
||||
|
||||
def _detect_header_rows(
|
||||
rows: List[Dict],
|
||||
zone_words: List[Dict],
|
||||
zone_y: int,
|
||||
) -> List[int]:
|
||||
"""Heuristic: the first row is a header if it has bold/large text or
|
||||
there's a significant gap after it."""
|
||||
if len(rows) < 2:
|
||||
return []
|
||||
|
||||
headers = []
|
||||
first_row = rows[0]
|
||||
second_row = rows[1]
|
||||
|
||||
# Gap between first and second row > 1.5x average row height
|
||||
avg_h = sum(r["y_max"] - r["y_min"] for r in rows) / len(rows)
|
||||
gap = second_row["y_min"] - first_row["y_max"]
|
||||
if gap > avg_h * 0.5:
|
||||
headers.append(0)
|
||||
|
||||
# Also check if first row words are taller than average (bold/header text)
|
||||
first_row_words = [
|
||||
w for w in zone_words
|
||||
if first_row["y_min"] <= w["top"] + w["height"] / 2 <= first_row["y_max"]
|
||||
]
|
||||
if first_row_words:
|
||||
first_h = max(w["height"] for w in first_row_words)
|
||||
all_heights = [w["height"] for w in zone_words]
|
||||
median_h = sorted(all_heights)[len(all_heights) // 2] if all_heights else first_h
|
||||
if first_h > median_h * 1.3:
|
||||
if 0 not in headers:
|
||||
headers.append(0)
|
||||
|
||||
return headers
|
||||
|
||||
|
||||
def _build_zone_grid(
|
||||
zone_words: List[Dict],
|
||||
zone_x: int,
|
||||
zone_y: int,
|
||||
zone_w: int,
|
||||
zone_h: int,
|
||||
zone_index: int,
|
||||
img_w: int,
|
||||
img_h: int,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build columns, rows, cells for a single zone from its words."""
|
||||
if not zone_words:
|
||||
return {
|
||||
"columns": [],
|
||||
"rows": [],
|
||||
"cells": [],
|
||||
"header_rows": [],
|
||||
}
|
||||
|
||||
# Cluster columns and rows
|
||||
columns = _cluster_columns(zone_words, zone_w)
|
||||
rows = _cluster_rows(zone_words)
|
||||
|
||||
if not columns or not rows:
|
||||
return {
|
||||
"columns": [],
|
||||
"rows": [],
|
||||
"cells": [],
|
||||
"header_rows": [],
|
||||
}
|
||||
|
||||
# Build cells
|
||||
cells = _build_cells(zone_words, columns, rows, img_w, img_h)
|
||||
|
||||
# Prefix cell IDs with zone index
|
||||
for cell in cells:
|
||||
cell["cell_id"] = f"Z{zone_index}_{cell['cell_id']}"
|
||||
cell["zone_index"] = zone_index
|
||||
|
||||
# Detect header rows
|
||||
header_rows = _detect_header_rows(rows, zone_words, zone_y)
|
||||
|
||||
# Convert columns to output format with percentages
|
||||
out_columns = []
|
||||
for col in columns:
|
||||
x_min = col["x_min"]
|
||||
x_max = col["x_max"]
|
||||
out_columns.append({
|
||||
"index": col["index"],
|
||||
"label": col["type"],
|
||||
"x_min_px": round(x_min),
|
||||
"x_max_px": round(x_max),
|
||||
"x_min_pct": round(x_min / img_w * 100, 2) if img_w else 0,
|
||||
"x_max_pct": round(x_max / img_w * 100, 2) if img_w else 0,
|
||||
"bold": False,
|
||||
})
|
||||
|
||||
# Convert rows to output format with percentages
|
||||
out_rows = []
|
||||
for row in rows:
|
||||
out_rows.append({
|
||||
"index": row["index"],
|
||||
"y_min_px": round(row["y_min"]),
|
||||
"y_max_px": round(row["y_max"]),
|
||||
"y_min_pct": round(row["y_min"] / img_h * 100, 2) if img_h else 0,
|
||||
"y_max_pct": round(row["y_max"] / img_h * 100, 2) if img_h else 0,
|
||||
"is_header": row["index"] in header_rows,
|
||||
})
|
||||
|
||||
return {
|
||||
"columns": out_columns,
|
||||
"rows": out_rows,
|
||||
"cells": cells,
|
||||
"header_rows": header_rows,
|
||||
}
|
||||
|
||||
|
||||
def _get_content_bounds(words: List[Dict]) -> tuple:
|
||||
"""Get content bounds from word positions."""
|
||||
if not words:
|
||||
return 0, 0, 0, 0
|
||||
x_min = min(w["left"] for w in words)
|
||||
y_min = min(w["top"] for w in words)
|
||||
x_max = max(w["left"] + w["width"] for w in words)
|
||||
y_max = max(w["top"] + w["height"] for w in words)
|
||||
return x_min, y_min, x_max - x_min, y_max - y_min
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Endpoints
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.post("/sessions/{session_id}/build-grid")
|
||||
async def build_grid(session_id: str):
|
||||
"""Build a structured, zone-aware grid from existing Kombi word results.
|
||||
|
||||
Requires that paddle-kombi or rapid-kombi has already been run on the session.
|
||||
Uses the image for box detection and the word positions for grid structuring.
|
||||
|
||||
Returns a StructuredGrid with zones, each containing their own
|
||||
columns, rows, and cells — ready for the frontend Excel-like editor.
|
||||
"""
|
||||
t0 = time.time()
|
||||
|
||||
# 1. Load session and word results
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
|
||||
word_result = session.get("word_result")
|
||||
if not word_result or not word_result.get("cells"):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="No word results found. Run paddle-kombi or rapid-kombi first.",
|
||||
)
|
||||
|
||||
img_w = word_result.get("image_width", 0)
|
||||
img_h = word_result.get("image_height", 0)
|
||||
if not img_w or not img_h:
|
||||
raise HTTPException(status_code=400, detail="Missing image dimensions in word_result")
|
||||
|
||||
# 2. Flatten all word boxes from cells
|
||||
all_words = _flatten_word_boxes(word_result["cells"])
|
||||
if not all_words:
|
||||
raise HTTPException(status_code=400, detail="No word boxes found in cells")
|
||||
|
||||
logger.info("build-grid session %s: %d words from %d cells",
|
||||
session_id, len(all_words), len(word_result["cells"]))
|
||||
|
||||
# 3. Load image for box detection
|
||||
img_png = await get_session_image(session_id, "cropped")
|
||||
if not img_png:
|
||||
img_png = await get_session_image(session_id, "dewarped")
|
||||
if not img_png:
|
||||
img_png = await get_session_image(session_id, "original")
|
||||
|
||||
zones_data: List[Dict[str, Any]] = []
|
||||
boxes_detected = 0
|
||||
|
||||
content_x, content_y, content_w, content_h = _get_content_bounds(all_words)
|
||||
|
||||
if img_png:
|
||||
# Decode image for box detection
|
||||
arr = np.frombuffer(img_png, dtype=np.uint8)
|
||||
img_bgr = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
||||
|
||||
if img_bgr is not None:
|
||||
# Detect bordered boxes
|
||||
boxes = detect_boxes(
|
||||
img_bgr,
|
||||
content_x=content_x,
|
||||
content_w=content_w,
|
||||
content_y=content_y,
|
||||
content_h=content_h,
|
||||
)
|
||||
boxes_detected = len(boxes)
|
||||
|
||||
if boxes:
|
||||
# Split page into zones
|
||||
page_zones = split_page_into_zones(
|
||||
content_x, content_y, content_w, content_h, boxes
|
||||
)
|
||||
|
||||
for pz in page_zones:
|
||||
zone_words = _words_in_zone(
|
||||
all_words, pz.y, pz.height, pz.x, pz.width
|
||||
)
|
||||
grid = _build_zone_grid(
|
||||
zone_words, pz.x, pz.y, pz.width, pz.height,
|
||||
pz.index, img_w, img_h,
|
||||
)
|
||||
|
||||
zone_entry: Dict[str, Any] = {
|
||||
"zone_index": pz.index,
|
||||
"zone_type": pz.zone_type,
|
||||
"bbox_px": {
|
||||
"x": pz.x, "y": pz.y,
|
||||
"w": pz.width, "h": pz.height,
|
||||
},
|
||||
"bbox_pct": {
|
||||
"x": round(pz.x / img_w * 100, 2) if img_w else 0,
|
||||
"y": round(pz.y / img_h * 100, 2) if img_h else 0,
|
||||
"w": round(pz.width / img_w * 100, 2) if img_w else 0,
|
||||
"h": round(pz.height / img_h * 100, 2) if img_h else 0,
|
||||
},
|
||||
"border": None,
|
||||
"word_count": len(zone_words),
|
||||
**grid,
|
||||
}
|
||||
|
||||
if pz.box:
|
||||
zone_entry["border"] = {
|
||||
"thickness": pz.box.border_thickness,
|
||||
"confidence": pz.box.confidence,
|
||||
}
|
||||
|
||||
zones_data.append(zone_entry)
|
||||
|
||||
# 4. Fallback: no boxes detected → single zone with all words
|
||||
if not zones_data:
|
||||
grid = _build_zone_grid(
|
||||
all_words, content_x, content_y, content_w, content_h,
|
||||
0, img_w, img_h,
|
||||
)
|
||||
zones_data.append({
|
||||
"zone_index": 0,
|
||||
"zone_type": "content",
|
||||
"bbox_px": {
|
||||
"x": content_x, "y": content_y,
|
||||
"w": content_w, "h": content_h,
|
||||
},
|
||||
"bbox_pct": {
|
||||
"x": round(content_x / img_w * 100, 2) if img_w else 0,
|
||||
"y": round(content_y / img_h * 100, 2) if img_h else 0,
|
||||
"w": round(content_w / img_w * 100, 2) if img_w else 0,
|
||||
"h": round(content_h / img_h * 100, 2) if img_h else 0,
|
||||
},
|
||||
"border": None,
|
||||
"word_count": len(all_words),
|
||||
**grid,
|
||||
})
|
||||
|
||||
duration = time.time() - t0
|
||||
|
||||
# 5. Build result
|
||||
total_cells = sum(len(z.get("cells", [])) for z in zones_data)
|
||||
total_columns = sum(len(z.get("columns", [])) for z in zones_data)
|
||||
total_rows = sum(len(z.get("rows", [])) for z in zones_data)
|
||||
|
||||
result = {
|
||||
"session_id": session_id,
|
||||
"image_width": img_w,
|
||||
"image_height": img_h,
|
||||
"zones": zones_data,
|
||||
"boxes_detected": boxes_detected,
|
||||
"summary": {
|
||||
"total_zones": len(zones_data),
|
||||
"total_columns": total_columns,
|
||||
"total_rows": total_rows,
|
||||
"total_cells": total_cells,
|
||||
"total_words": len(all_words),
|
||||
},
|
||||
"formatting": {
|
||||
"bold_columns": [],
|
||||
"header_rows": [],
|
||||
},
|
||||
"duration_seconds": round(duration, 2),
|
||||
}
|
||||
|
||||
# 6. Persist to DB
|
||||
await update_session_db(session_id, grid_editor_result=result)
|
||||
|
||||
logger.info(
|
||||
"build-grid session %s: %d zones, %d cols, %d rows, %d cells, "
|
||||
"%d boxes in %.2fs",
|
||||
session_id, len(zones_data), total_columns, total_rows,
|
||||
total_cells, boxes_detected, duration,
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@router.post("/sessions/{session_id}/save-grid")
|
||||
async def save_grid(session_id: str, request: Request):
|
||||
"""Save edited grid data from the frontend Excel-like editor.
|
||||
|
||||
Receives the full StructuredGrid with user edits (text changes,
|
||||
formatting changes like bold columns, header rows, etc.) and
|
||||
persists it to the session's grid_editor_result.
|
||||
"""
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
|
||||
body = await request.json()
|
||||
|
||||
# Validate basic structure
|
||||
if "zones" not in body:
|
||||
raise HTTPException(status_code=400, detail="Missing 'zones' in request body")
|
||||
|
||||
# Preserve metadata from the original build
|
||||
existing = session.get("grid_editor_result") or {}
|
||||
result = {
|
||||
"session_id": session_id,
|
||||
"image_width": body.get("image_width", existing.get("image_width", 0)),
|
||||
"image_height": body.get("image_height", existing.get("image_height", 0)),
|
||||
"zones": body["zones"],
|
||||
"boxes_detected": body.get("boxes_detected", existing.get("boxes_detected", 0)),
|
||||
"summary": body.get("summary", existing.get("summary", {})),
|
||||
"formatting": body.get("formatting", existing.get("formatting", {})),
|
||||
"duration_seconds": existing.get("duration_seconds", 0),
|
||||
"edited": True,
|
||||
}
|
||||
|
||||
await update_session_db(session_id, grid_editor_result=result)
|
||||
|
||||
logger.info("save-grid session %s: %d zones saved", session_id, len(body["zones"]))
|
||||
|
||||
return {"session_id": session_id, "saved": True}
|
||||
|
||||
|
||||
@router.get("/sessions/{session_id}/grid-editor")
|
||||
async def get_grid(session_id: str):
|
||||
"""Retrieve the current grid editor state for a session."""
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
|
||||
result = session.get("grid_editor_result")
|
||||
if not result:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="No grid editor data. Run build-grid first.",
|
||||
)
|
||||
|
||||
return result
|
||||
@@ -43,6 +43,7 @@ except ImportError:
|
||||
trocr_router = None
|
||||
from vocab_worksheet_api import router as vocab_router, set_db_pool as set_vocab_db_pool, _init_vocab_table, _load_all_sessions, DATABASE_URL as VOCAB_DATABASE_URL
|
||||
from ocr_pipeline_api import router as ocr_pipeline_router, _cache as ocr_pipeline_cache
|
||||
from grid_editor_api import router as grid_editor_router
|
||||
from orientation_crop_api import router as orientation_crop_router, set_cache_ref as set_orientation_crop_cache
|
||||
from ocr_pipeline_session_store import init_ocr_pipeline_tables
|
||||
try:
|
||||
@@ -178,6 +179,7 @@ if trocr_router:
|
||||
app.include_router(trocr_router) # TrOCR Handwriting OCR
|
||||
app.include_router(vocab_router) # Vocabulary Worksheet Generator
|
||||
app.include_router(ocr_pipeline_router) # OCR Pipeline (step-by-step)
|
||||
app.include_router(grid_editor_router) # Grid Editor (Excel-like)
|
||||
set_orientation_crop_cache(ocr_pipeline_cache)
|
||||
app.include_router(orientation_crop_router) # OCR Pipeline: Orientation + Crop
|
||||
if htr_router:
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
-- Migration 007: Add grid_editor_result column for Excel-like grid editor
|
||||
-- Stores structured grid with zones, columns, rows, cells, and formatting
|
||||
|
||||
ALTER TABLE ocr_pipeline_sessions ADD COLUMN IF NOT EXISTS grid_editor_result JSONB;
|
||||
@@ -74,7 +74,8 @@ async def init_ocr_pipeline_tables():
|
||||
ADD COLUMN IF NOT EXISTS orientation_result JSONB,
|
||||
ADD COLUMN IF NOT EXISTS crop_result JSONB,
|
||||
ADD COLUMN IF NOT EXISTS parent_session_id UUID REFERENCES ocr_pipeline_sessions(id) ON DELETE CASCADE,
|
||||
ADD COLUMN IF NOT EXISTS box_index INT
|
||||
ADD COLUMN IF NOT EXISTS box_index INT,
|
||||
ADD COLUMN IF NOT EXISTS grid_editor_result JSONB
|
||||
""")
|
||||
|
||||
|
||||
@@ -110,6 +111,7 @@ async def create_session_db(
|
||||
word_result, ground_truth, auto_shear_degrees,
|
||||
doc_type, doc_type_result,
|
||||
document_category, pipeline_log,
|
||||
grid_editor_result,
|
||||
parent_session_id, box_index,
|
||||
created_at, updated_at
|
||||
""", uuid.UUID(session_id), name, filename, original_png,
|
||||
@@ -129,6 +131,7 @@ async def get_session_db(session_id: str) -> Optional[Dict[str, Any]]:
|
||||
word_result, ground_truth, auto_shear_degrees,
|
||||
doc_type, doc_type_result,
|
||||
document_category, pipeline_log,
|
||||
grid_editor_result,
|
||||
parent_session_id, box_index,
|
||||
created_at, updated_at
|
||||
FROM ocr_pipeline_sessions WHERE id = $1
|
||||
@@ -180,10 +183,11 @@ async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any
|
||||
'word_result', 'ground_truth', 'auto_shear_degrees',
|
||||
'doc_type', 'doc_type_result',
|
||||
'document_category', 'pipeline_log',
|
||||
'grid_editor_result',
|
||||
'parent_session_id', 'box_index',
|
||||
}
|
||||
|
||||
jsonb_fields = {'orientation_result', 'crop_result', 'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'handwriting_removal_meta', 'doc_type_result', 'pipeline_log'}
|
||||
jsonb_fields = {'orientation_result', 'crop_result', 'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'handwriting_removal_meta', 'doc_type_result', 'pipeline_log', 'grid_editor_result'}
|
||||
|
||||
for key, value in kwargs.items():
|
||||
if key in allowed_fields:
|
||||
@@ -212,6 +216,7 @@ async def update_session_db(session_id: str, **kwargs) -> Optional[Dict[str, Any
|
||||
word_result, ground_truth, auto_shear_degrees,
|
||||
doc_type, doc_type_result,
|
||||
document_category, pipeline_log,
|
||||
grid_editor_result,
|
||||
parent_session_id, box_index,
|
||||
created_at, updated_at
|
||||
""", *values)
|
||||
@@ -308,7 +313,7 @@ def _row_to_dict(row: asyncpg.Record) -> Dict[str, Any]:
|
||||
result[key] = result[key].isoformat()
|
||||
|
||||
# JSONB → parsed (asyncpg returns str for JSONB)
|
||||
for key in ['orientation_result', 'crop_result', 'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'doc_type_result', 'pipeline_log']:
|
||||
for key in ['orientation_result', 'crop_result', 'deskew_result', 'dewarp_result', 'column_result', 'row_result', 'word_result', 'ground_truth', 'doc_type_result', 'pipeline_log', 'grid_editor_result']:
|
||||
if key in result and result[key] is not None:
|
||||
if isinstance(result[key], str):
|
||||
result[key] = json.loads(result[key])
|
||||
|
||||
Reference in New Issue
Block a user