feat: ImageLayoutEditor, arrow-key nav, multi-select bold, wider columns
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 32s
CI / test-go-edu-search (push) Successful in 25s
CI / test-python-klausur (push) Failing after 1m52s
CI / test-python-agent-core (push) Successful in 15s
CI / test-nodejs-website (push) Successful in 18s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 32s
CI / test-go-edu-search (push) Successful in 25s
CI / test-python-klausur (push) Failing after 1m52s
CI / test-python-agent-core (push) Successful in 15s
CI / test-nodejs-website (push) Successful in 18s
- New ImageLayoutEditor: SVG overlay on original scan with draggable column dividers, horizontal guidelines (margins/header/footer), double-click to add columns, x-button to delete - GridTable: MIN_COL_WIDTH 40→80px for better readability - Arrow up/down keys navigate between rows in the grid editor - Ctrl+Click for multi-cell selection, Ctrl+B to toggle bold on selection - getAdjacentCell works for cells that don't exist yet (new rows/cols) - deleteColumn now merges x-boundaries correctly - Session restore fix: grid_editor_result/structure_result in session GET - Footer row 3-state cycle, auto-create cells for empty footer rows - Grid save/build/GT-mark now advance current_step=11 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -43,6 +43,7 @@ export default function OcrOverlayPage() {
|
|||||||
|
|
||||||
const searchParams = useSearchParams()
|
const searchParams = useSearchParams()
|
||||||
const deepLinkHandled = useRef(false)
|
const deepLinkHandled = useRef(false)
|
||||||
|
const gridSaveRef = useRef<(() => Promise<void>) | null>(null)
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
loadSessions()
|
loadSessions()
|
||||||
@@ -271,6 +272,10 @@ export default function OcrOverlayPage() {
|
|||||||
setGtSaving(true)
|
setGtSaving(true)
|
||||||
setGtMessage('')
|
setGtMessage('')
|
||||||
try {
|
try {
|
||||||
|
// Auto-save grid editor before marking GT (so DB has latest edits)
|
||||||
|
if (gridSaveRef.current) {
|
||||||
|
await gridSaveRef.current()
|
||||||
|
}
|
||||||
const resp = await fetch(
|
const resp = await fetch(
|
||||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/mark-ground-truth?pipeline=${mode}`,
|
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/mark-ground-truth?pipeline=${mode}`,
|
||||||
{ method: 'POST' }
|
{ method: 'POST' }
|
||||||
@@ -327,7 +332,7 @@ export default function OcrOverlayPage() {
|
|||||||
) : null
|
) : null
|
||||||
case 6:
|
case 6:
|
||||||
return mode === 'kombi' ? (
|
return mode === 'kombi' ? (
|
||||||
<StepGridReview sessionId={sessionId} onNext={handleNext} />
|
<StepGridReview sessionId={sessionId} onNext={handleNext} saveRef={gridSaveRef} />
|
||||||
) : null
|
) : null
|
||||||
default:
|
default:
|
||||||
return null
|
return null
|
||||||
|
|||||||
@@ -34,6 +34,8 @@ export function GridEditor({ sessionId, onNext }: GridEditorProps) {
|
|||||||
getAdjacentCell,
|
getAdjacentCell,
|
||||||
deleteColumn,
|
deleteColumn,
|
||||||
addColumn,
|
addColumn,
|
||||||
|
deleteRow,
|
||||||
|
addRow,
|
||||||
} = useGridEditor(sessionId)
|
} = useGridEditor(sessionId)
|
||||||
|
|
||||||
const [showOverlay, setShowOverlay] = useState(false)
|
const [showOverlay, setShowOverlay] = useState(false)
|
||||||
@@ -163,6 +165,11 @@ export function GridEditor({ sessionId, onNext }: GridEditorProps) {
|
|||||||
+{grid.summary.recovered_colored} recovered
|
+{grid.summary.recovered_colored} recovered
|
||||||
</span>
|
</span>
|
||||||
)}
|
)}
|
||||||
|
{grid.dictionary_detection?.is_dictionary && (
|
||||||
|
<span className="px-1.5 py-0.5 rounded bg-blue-50 dark:bg-blue-900/20 text-blue-600 dark:text-blue-400 border border-blue-200 dark:border-blue-800">
|
||||||
|
Woerterbuch ({Math.round(grid.dictionary_detection.confidence * 100)}%)
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
<span className="text-gray-400">
|
<span className="text-gray-400">
|
||||||
{grid.duration_seconds.toFixed(1)}s
|
{grid.duration_seconds.toFixed(1)}s
|
||||||
</span>
|
</span>
|
||||||
@@ -223,6 +230,8 @@ export function GridEditor({ sessionId, onNext }: GridEditorProps) {
|
|||||||
onNavigate={handleNavigate}
|
onNavigate={handleNavigate}
|
||||||
onDeleteColumn={deleteColumn}
|
onDeleteColumn={deleteColumn}
|
||||||
onAddColumn={addColumn}
|
onAddColumn={addColumn}
|
||||||
|
onDeleteRow={deleteRow}
|
||||||
|
onAddRow={addRow}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
) : (
|
) : (
|
||||||
|
|||||||
@@ -7,20 +7,24 @@ interface GridTableProps {
|
|||||||
zone: GridZone
|
zone: GridZone
|
||||||
layoutMetrics?: LayoutMetrics
|
layoutMetrics?: LayoutMetrics
|
||||||
selectedCell: string | null
|
selectedCell: string | null
|
||||||
|
selectedCells?: Set<string>
|
||||||
onSelectCell: (cellId: string) => void
|
onSelectCell: (cellId: string) => void
|
||||||
|
onToggleCellSelection?: (cellId: string) => void
|
||||||
onCellTextChange: (cellId: string, text: string) => void
|
onCellTextChange: (cellId: string, text: string) => void
|
||||||
onToggleColumnBold: (zoneIndex: number, colIndex: number) => void
|
onToggleColumnBold: (zoneIndex: number, colIndex: number) => void
|
||||||
onToggleRowHeader: (zoneIndex: number, rowIndex: number) => void
|
onToggleRowHeader: (zoneIndex: number, rowIndex: number) => void
|
||||||
onNavigate: (cellId: string, direction: 'up' | 'down' | 'left' | 'right') => void
|
onNavigate: (cellId: string, direction: 'up' | 'down' | 'left' | 'right') => void
|
||||||
onDeleteColumn?: (zoneIndex: number, colIndex: number) => void
|
onDeleteColumn?: (zoneIndex: number, colIndex: number) => void
|
||||||
onAddColumn?: (zoneIndex: number, afterColIndex: number) => void
|
onAddColumn?: (zoneIndex: number, afterColIndex: number) => void
|
||||||
|
onDeleteRow?: (zoneIndex: number, rowIndex: number) => void
|
||||||
|
onAddRow?: (zoneIndex: number, afterRowIndex: number) => void
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Gutter width for row numbers (px). */
|
/** Gutter width for row numbers (px). */
|
||||||
const ROW_NUM_WIDTH = 36
|
const ROW_NUM_WIDTH = 36
|
||||||
|
|
||||||
/** Minimum column width in px so columns remain usable. */
|
/** Minimum column width in px so columns remain usable. */
|
||||||
const MIN_COL_WIDTH = 40
|
const MIN_COL_WIDTH = 80
|
||||||
|
|
||||||
/** Minimum row height in px. */
|
/** Minimum row height in px. */
|
||||||
const MIN_ROW_HEIGHT = 26
|
const MIN_ROW_HEIGHT = 26
|
||||||
@@ -29,13 +33,17 @@ export function GridTable({
|
|||||||
zone,
|
zone,
|
||||||
layoutMetrics,
|
layoutMetrics,
|
||||||
selectedCell,
|
selectedCell,
|
||||||
|
selectedCells,
|
||||||
onSelectCell,
|
onSelectCell,
|
||||||
|
onToggleCellSelection,
|
||||||
onCellTextChange,
|
onCellTextChange,
|
||||||
onToggleColumnBold,
|
onToggleColumnBold,
|
||||||
onToggleRowHeader,
|
onToggleRowHeader,
|
||||||
onNavigate,
|
onNavigate,
|
||||||
onDeleteColumn,
|
onDeleteColumn,
|
||||||
onAddColumn,
|
onAddColumn,
|
||||||
|
onDeleteRow,
|
||||||
|
onAddRow,
|
||||||
}: GridTableProps) {
|
}: GridTableProps) {
|
||||||
const containerRef = useRef<HTMLDivElement>(null)
|
const containerRef = useRef<HTMLDivElement>(null)
|
||||||
const [containerWidth, setContainerWidth] = useState(0)
|
const [containerWidth, setContainerWidth] = useState(0)
|
||||||
@@ -113,12 +121,18 @@ export function GridTable({
|
|||||||
} else if (e.key === 'Enter' && !e.shiftKey) {
|
} else if (e.key === 'Enter' && !e.shiftKey) {
|
||||||
e.preventDefault()
|
e.preventDefault()
|
||||||
onNavigate(cellId, 'down')
|
onNavigate(cellId, 'down')
|
||||||
} else if (e.key === 'ArrowUp' && e.altKey) {
|
} else if (e.key === 'ArrowUp') {
|
||||||
e.preventDefault()
|
e.preventDefault()
|
||||||
onNavigate(cellId, 'up')
|
onNavigate(cellId, 'up')
|
||||||
} else if (e.key === 'ArrowDown' && e.altKey) {
|
} else if (e.key === 'ArrowDown') {
|
||||||
e.preventDefault()
|
e.preventDefault()
|
||||||
onNavigate(cellId, 'down')
|
onNavigate(cellId, 'down')
|
||||||
|
} else if (e.key === 'ArrowLeft' && e.altKey) {
|
||||||
|
e.preventDefault()
|
||||||
|
onNavigate(cellId, 'left')
|
||||||
|
} else if (e.key === 'ArrowRight' && e.altKey) {
|
||||||
|
e.preventDefault()
|
||||||
|
onNavigate(cellId, 'right')
|
||||||
} else if (e.key === 'Escape') {
|
} else if (e.key === 'Escape') {
|
||||||
;(e.target as HTMLElement).blur()
|
;(e.target as HTMLElement).blur()
|
||||||
}
|
}
|
||||||
@@ -323,7 +337,7 @@ export function GridTable({
|
|||||||
<div key={row.index} style={{ display: 'contents' }}>
|
<div key={row.index} style={{ display: 'contents' }}>
|
||||||
{/* Row number cell */}
|
{/* Row number cell */}
|
||||||
<div
|
<div
|
||||||
className={`relative sticky left-0 z-10 flex items-center justify-center text-[10px] border-b border-r border-gray-200 dark:border-gray-700 cursor-pointer select-none transition-colors hover:bg-gray-100 dark:hover:bg-gray-700 ${
|
className={`group/rowhdr relative sticky left-0 z-10 flex items-center justify-center text-[10px] border-b border-r border-gray-200 dark:border-gray-700 cursor-pointer select-none transition-colors hover:bg-gray-100 dark:hover:bg-gray-700 ${
|
||||||
row.is_header
|
row.is_header
|
||||||
? 'bg-blue-50 dark:bg-blue-900/20 text-blue-600 dark:text-blue-400 font-medium'
|
? 'bg-blue-50 dark:bg-blue-900/20 text-blue-600 dark:text-blue-400 font-medium'
|
||||||
: row.is_footer
|
: row.is_footer
|
||||||
@@ -332,11 +346,41 @@ export function GridTable({
|
|||||||
}`}
|
}`}
|
||||||
style={{ height: `${rowH}px` }}
|
style={{ height: `${rowH}px` }}
|
||||||
onClick={() => onToggleRowHeader(zone.zone_index, row.index)}
|
onClick={() => onToggleRowHeader(zone.zone_index, row.index)}
|
||||||
title={`Zeile ${row.index + 1} — Klick fuer Header-Toggle`}
|
title={`Zeile ${row.index + 1} — Klick: ${row.is_header ? 'Footer' : row.is_footer ? 'Normal' : 'Header'}`}
|
||||||
>
|
>
|
||||||
{row.index + 1}
|
{row.index + 1}
|
||||||
{row.is_header && <span className="block text-[8px]">H</span>}
|
{row.is_header && <span className="block text-[8px]">H</span>}
|
||||||
{row.is_footer && <span className="block text-[8px]">F</span>}
|
{row.is_footer && <span className="block text-[8px]">F</span>}
|
||||||
|
{/* Delete row button (visible on hover) */}
|
||||||
|
{onDeleteRow && zone.rows.length > 1 && (
|
||||||
|
<button
|
||||||
|
className="absolute top-0 left-0 w-4 h-4 flex items-center justify-center bg-red-500 text-white rounded-br text-[9px] leading-none opacity-0 group-hover/rowhdr:opacity-100 transition-opacity z-30"
|
||||||
|
onClick={(e) => {
|
||||||
|
e.stopPropagation()
|
||||||
|
if (confirm(`Zeile ${row.index + 1} loeschen?`)) {
|
||||||
|
onDeleteRow(zone.zone_index, row.index)
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
title={`Zeile ${row.index + 1} loeschen`}
|
||||||
|
>
|
||||||
|
x
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
{/* Add row button (visible on hover, below this row) */}
|
||||||
|
{onAddRow && (
|
||||||
|
<button
|
||||||
|
className="absolute -bottom-[7px] left-0 w-full h-[14px] flex items-center justify-center text-teal-500 opacity-0 group-hover/rowhdr:opacity-100 transition-opacity z-30 hover:bg-teal-100 dark:hover:bg-teal-900/40 rounded"
|
||||||
|
onClick={(e) => {
|
||||||
|
e.stopPropagation()
|
||||||
|
onAddRow(zone.zone_index, row.index)
|
||||||
|
}}
|
||||||
|
title={`Zeile nach ${row.index + 1} einfuegen`}
|
||||||
|
>
|
||||||
|
<svg className="w-3 h-3" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2.5}>
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" d="M12 4v16m8-8H4" />
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
{/* Bottom-edge resize handle */}
|
{/* Bottom-edge resize handle */}
|
||||||
<div
|
<div
|
||||||
className="absolute bottom-0 left-0 w-full h-[4px] cursor-row-resize hover:bg-teal-400/40 z-20"
|
className="absolute bottom-0 left-0 w-full h-[4px] cursor-row-resize hover:bg-teal-400/40 z-20"
|
||||||
@@ -398,6 +442,7 @@ export function GridTable({
|
|||||||
const isSelected = selectedCell === cellId
|
const isSelected = selectedCell === cellId
|
||||||
const isBold = col.bold || cell?.is_bold
|
const isBold = col.bold || cell?.is_bold
|
||||||
const isLowConf = cell && cell.confidence > 0 && cell.confidence < 60
|
const isLowConf = cell && cell.confidence > 0 && cell.confidence < 60
|
||||||
|
const isMultiSelected = selectedCells?.has(cellId)
|
||||||
const cellColor = getCellColor(cell)
|
const cellColor = getCellColor(cell)
|
||||||
// Show per-word colored display only when word_boxes
|
// Show per-word colored display only when word_boxes
|
||||||
// match the cell text. Post-processing steps (e.g. 5h
|
// match the cell text. Post-processing steps (e.g. 5h
|
||||||
@@ -417,9 +462,9 @@ export function GridTable({
|
|||||||
key={col.index}
|
key={col.index}
|
||||||
className={`relative border-b border-r border-gray-200 dark:border-gray-700 flex items-center ${
|
className={`relative border-b border-r border-gray-200 dark:border-gray-700 flex items-center ${
|
||||||
isSelected ? 'ring-2 ring-teal-500 ring-inset z-10' : ''
|
isSelected ? 'ring-2 ring-teal-500 ring-inset z-10' : ''
|
||||||
} ${isLowConf ? 'bg-amber-50/50 dark:bg-amber-900/10' : ''} ${
|
} ${isMultiSelected ? 'bg-teal-50/60 dark:bg-teal-900/20' : ''} ${
|
||||||
row.is_header ? 'bg-blue-50/50 dark:bg-blue-900/10' : ''
|
isLowConf && !isMultiSelected ? 'bg-amber-50/50 dark:bg-amber-900/10' : ''
|
||||||
}`}
|
} ${row.is_header && !isMultiSelected ? 'bg-blue-50/50 dark:bg-blue-900/10' : ''}`}
|
||||||
style={{ height: `${rowH}px` }}
|
style={{ height: `${rowH}px` }}
|
||||||
>
|
>
|
||||||
{cellColor && (
|
{cellColor && (
|
||||||
@@ -433,9 +478,13 @@ export function GridTable({
|
|||||||
{hasColoredWords && !isSelected ? (
|
{hasColoredWords && !isSelected ? (
|
||||||
<div
|
<div
|
||||||
className={`w-full px-2 cursor-text truncate ${isBold ? 'font-bold' : 'font-normal'}`}
|
className={`w-full px-2 cursor-text truncate ${isBold ? 'font-bold' : 'font-normal'}`}
|
||||||
onClick={() => {
|
onClick={(e) => {
|
||||||
onSelectCell(cellId)
|
if ((e.metaKey || e.ctrlKey) && onToggleCellSelection) {
|
||||||
setTimeout(() => document.getElementById(`cell-${cellId}`)?.focus(), 0)
|
onToggleCellSelection(cellId)
|
||||||
|
} else {
|
||||||
|
onSelectCell(cellId)
|
||||||
|
setTimeout(() => document.getElementById(`cell-${cellId}`)?.focus(), 0)
|
||||||
|
}
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
{cell!.word_boxes!.map((wb, i) => (
|
{cell!.word_boxes!.map((wb, i) => (
|
||||||
@@ -457,10 +506,14 @@ export function GridTable({
|
|||||||
id={`cell-${cellId}`}
|
id={`cell-${cellId}`}
|
||||||
type="text"
|
type="text"
|
||||||
value={cell?.text ?? ''}
|
value={cell?.text ?? ''}
|
||||||
onChange={(e) => {
|
onChange={(e) => onCellTextChange(cellId, e.target.value)}
|
||||||
if (cell) onCellTextChange(cellId, e.target.value)
|
|
||||||
}}
|
|
||||||
onFocus={() => onSelectCell(cellId)}
|
onFocus={() => onSelectCell(cellId)}
|
||||||
|
onClick={(e) => {
|
||||||
|
if ((e.metaKey || e.ctrlKey) && onToggleCellSelection) {
|
||||||
|
e.preventDefault()
|
||||||
|
onToggleCellSelection(cellId)
|
||||||
|
}
|
||||||
|
}}
|
||||||
onKeyDown={(e) => handleKeyDown(e, cellId)}
|
onKeyDown={(e) => handleKeyDown(e, cellId)}
|
||||||
className={`w-full px-2 bg-transparent border-0 outline-none ${
|
className={`w-full px-2 bg-transparent border-0 outline-none ${
|
||||||
isBold ? 'font-bold' : 'font-normal'
|
isBold ? 'font-bold' : 'font-normal'
|
||||||
|
|||||||
386
admin-lehrer/components/grid-editor/ImageLayoutEditor.tsx
Normal file
386
admin-lehrer/components/grid-editor/ImageLayoutEditor.tsx
Normal file
@@ -0,0 +1,386 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ImageLayoutEditor — SVG overlay on the original scan image.
|
||||||
|
*
|
||||||
|
* Shows draggable vertical column dividers and horizontal guidelines
|
||||||
|
* (margins, header/footer zones). Double-click to add a column,
|
||||||
|
* click the × on a divider to remove it.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { useCallback, useRef } from 'react'
|
||||||
|
import type { GridZone, LayoutDividers } from './types'
|
||||||
|
|
||||||
|
interface ImageLayoutEditorProps {
|
||||||
|
imageUrl: string
|
||||||
|
zones: GridZone[]
|
||||||
|
imageWidth: number
|
||||||
|
layoutDividers?: LayoutDividers
|
||||||
|
zoom: number
|
||||||
|
onZoomChange: (zoom: number) => void
|
||||||
|
onColumnDividerMove: (zoneIndex: number, boundaryIndex: number, newXPct: number) => void
|
||||||
|
onHorizontalsChange: (horizontals: LayoutDividers['horizontals']) => void
|
||||||
|
onCommitUndo: () => void
|
||||||
|
onSplitColumnAt: (zoneIndex: number, xPct: number) => void
|
||||||
|
onDeleteColumn: (zoneIndex: number, colIndex: number) => void
|
||||||
|
}
|
||||||
|
|
||||||
|
const HORIZ_COLORS: Record<string, string> = {
|
||||||
|
top_margin: 'rgba(239, 68, 68, 0.6)',
|
||||||
|
header_bottom: 'rgba(59, 130, 246, 0.6)',
|
||||||
|
footer_top: 'rgba(249, 115, 22, 0.6)',
|
||||||
|
bottom_margin: 'rgba(239, 68, 68, 0.6)',
|
||||||
|
}
|
||||||
|
|
||||||
|
const HORIZ_LABELS: Record<string, string> = {
|
||||||
|
top_margin: 'Rand oben',
|
||||||
|
header_bottom: 'Kopfzeile',
|
||||||
|
footer_top: 'Fusszeile',
|
||||||
|
bottom_margin: 'Rand unten',
|
||||||
|
}
|
||||||
|
|
||||||
|
const HORIZ_DEFAULTS: Record<string, number> = {
|
||||||
|
top_margin: 3,
|
||||||
|
header_bottom: 10,
|
||||||
|
footer_top: 92,
|
||||||
|
bottom_margin: 97,
|
||||||
|
}
|
||||||
|
|
||||||
|
function clamp(val: number, min: number, max: number) {
|
||||||
|
return Math.max(min, Math.min(max, val))
|
||||||
|
}
|
||||||
|
|
||||||
|
export function ImageLayoutEditor({
|
||||||
|
imageUrl,
|
||||||
|
zones,
|
||||||
|
layoutDividers,
|
||||||
|
zoom,
|
||||||
|
onZoomChange,
|
||||||
|
onColumnDividerMove,
|
||||||
|
onHorizontalsChange,
|
||||||
|
onCommitUndo,
|
||||||
|
onSplitColumnAt,
|
||||||
|
onDeleteColumn,
|
||||||
|
}: ImageLayoutEditorProps) {
|
||||||
|
const wrapperRef = useRef<HTMLDivElement>(null)
|
||||||
|
const draggingRef = useRef<
|
||||||
|
| { type: 'col'; zoneIndex: number; boundaryIndex: number }
|
||||||
|
| { type: 'horiz'; key: string }
|
||||||
|
| null
|
||||||
|
>(null)
|
||||||
|
const horizontalsRef = useRef(layoutDividers?.horizontals ?? {})
|
||||||
|
horizontalsRef.current = layoutDividers?.horizontals ?? {}
|
||||||
|
|
||||||
|
const horizontals = layoutDividers?.horizontals ?? {}
|
||||||
|
|
||||||
|
// Compute column boundaries for each zone
|
||||||
|
const zoneBoundaries = zones.map((zone) => {
|
||||||
|
const sorted = [...zone.columns].sort((a, b) => a.index - b.index)
|
||||||
|
const boundaries: number[] = []
|
||||||
|
if (sorted.length > 0) {
|
||||||
|
const hasValidPct = sorted.some((c) => c.x_max_pct > 0)
|
||||||
|
if (hasValidPct) {
|
||||||
|
boundaries.push(sorted[0].x_min_pct)
|
||||||
|
for (const col of sorted) {
|
||||||
|
boundaries.push(col.x_max_pct)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Fallback: evenly distribute within zone bbox
|
||||||
|
const zoneX = zone.bbox_pct.x || 0
|
||||||
|
const zoneW = zone.bbox_pct.w || 100
|
||||||
|
for (let i = 0; i <= sorted.length; i++) {
|
||||||
|
boundaries.push(zoneX + (i / sorted.length) * zoneW)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return { zone, boundaries }
|
||||||
|
})
|
||||||
|
|
||||||
|
const startDrag = useCallback(
|
||||||
|
(
|
||||||
|
info: NonNullable<typeof draggingRef.current>,
|
||||||
|
e: React.MouseEvent,
|
||||||
|
) => {
|
||||||
|
e.preventDefault()
|
||||||
|
e.stopPropagation()
|
||||||
|
draggingRef.current = info
|
||||||
|
onCommitUndo()
|
||||||
|
|
||||||
|
const handleMove = (ev: MouseEvent) => {
|
||||||
|
const wrap = wrapperRef.current
|
||||||
|
if (!wrap || !draggingRef.current) return
|
||||||
|
const rect = wrap.getBoundingClientRect()
|
||||||
|
const xPct = clamp(((ev.clientX - rect.left) / rect.width) * 100, 0, 100)
|
||||||
|
const yPct = clamp(((ev.clientY - rect.top) / rect.height) * 100, 0, 100)
|
||||||
|
|
||||||
|
if (draggingRef.current.type === 'col') {
|
||||||
|
onColumnDividerMove(
|
||||||
|
draggingRef.current.zoneIndex,
|
||||||
|
draggingRef.current.boundaryIndex,
|
||||||
|
xPct,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
onHorizontalsChange({
|
||||||
|
...horizontalsRef.current,
|
||||||
|
[draggingRef.current.key]: yPct,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleUp = () => {
|
||||||
|
draggingRef.current = null
|
||||||
|
document.removeEventListener('mousemove', handleMove)
|
||||||
|
document.removeEventListener('mouseup', handleUp)
|
||||||
|
document.body.style.cursor = ''
|
||||||
|
document.body.style.userSelect = ''
|
||||||
|
}
|
||||||
|
|
||||||
|
document.body.style.cursor = info.type === 'col' ? 'col-resize' : 'row-resize'
|
||||||
|
document.body.style.userSelect = 'none'
|
||||||
|
document.addEventListener('mousemove', handleMove)
|
||||||
|
document.addEventListener('mouseup', handleUp)
|
||||||
|
},
|
||||||
|
[onColumnDividerMove, onHorizontalsChange, onCommitUndo],
|
||||||
|
)
|
||||||
|
|
||||||
|
const toggleHorizontal = (key: string) => {
|
||||||
|
const current = horizontals[key as keyof typeof horizontals]
|
||||||
|
if (current != null) {
|
||||||
|
const next = { ...horizontals }
|
||||||
|
delete next[key as keyof typeof next]
|
||||||
|
onHorizontalsChange(next)
|
||||||
|
} else {
|
||||||
|
onCommitUndo()
|
||||||
|
onHorizontalsChange({
|
||||||
|
...horizontals,
|
||||||
|
[key]: HORIZ_DEFAULTS[key],
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleDoubleClick = (e: React.MouseEvent) => {
|
||||||
|
const wrap = wrapperRef.current
|
||||||
|
if (!wrap) return
|
||||||
|
const rect = wrap.getBoundingClientRect()
|
||||||
|
const xPct = clamp(((e.clientX - rect.left) / rect.width) * 100, 0, 100)
|
||||||
|
const yPct = clamp(((e.clientY - rect.top) / rect.height) * 100, 0, 100)
|
||||||
|
|
||||||
|
// Find which zone this click is in
|
||||||
|
for (const { zone } of zoneBoundaries) {
|
||||||
|
const zy = zone.bbox_pct.y || 0
|
||||||
|
const zh = zone.bbox_pct.h || 100
|
||||||
|
if (yPct >= zy && yPct <= zy + zh) {
|
||||||
|
onSplitColumnAt(zone.zone_index, xPct)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Fallback: use first zone
|
||||||
|
if (zones.length > 0) {
|
||||||
|
onSplitColumnAt(zones[0].zone_index, xPct)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 overflow-hidden flex flex-col">
|
||||||
|
{/* Header */}
|
||||||
|
<div className="flex items-center justify-between px-3 py-2 border-b border-gray-100 dark:border-gray-700 bg-gray-50 dark:bg-gray-800/50">
|
||||||
|
<span className="text-xs font-medium text-gray-600 dark:text-gray-400">
|
||||||
|
Layout-Editor
|
||||||
|
</span>
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<button
|
||||||
|
onClick={() => onZoomChange(Math.max(50, zoom - 25))}
|
||||||
|
className="px-2 py-0.5 text-xs bg-gray-200 dark:bg-gray-700 rounded hover:bg-gray-300 dark:hover:bg-gray-600 text-gray-700 dark:text-gray-300"
|
||||||
|
>
|
||||||
|
-
|
||||||
|
</button>
|
||||||
|
<span className="text-xs text-gray-500 dark:text-gray-400 w-10 text-center">
|
||||||
|
{zoom}%
|
||||||
|
</span>
|
||||||
|
<button
|
||||||
|
onClick={() => onZoomChange(Math.min(300, zoom + 25))}
|
||||||
|
className="px-2 py-0.5 text-xs bg-gray-200 dark:bg-gray-700 rounded hover:bg-gray-300 dark:hover:bg-gray-600 text-gray-700 dark:text-gray-300"
|
||||||
|
>
|
||||||
|
+
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={() => onZoomChange(100)}
|
||||||
|
className="px-2 py-0.5 text-xs bg-gray-200 dark:bg-gray-700 rounded hover:bg-gray-300 dark:hover:bg-gray-600 text-gray-700 dark:text-gray-300"
|
||||||
|
>
|
||||||
|
Fit
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Horizontal line toggles */}
|
||||||
|
<div className="flex items-center gap-1.5 px-3 py-1.5 border-b border-gray-100 dark:border-gray-700 bg-gray-50/50 dark:bg-gray-800/30 flex-wrap">
|
||||||
|
{Object.entries(HORIZ_LABELS).map(([key, label]) => {
|
||||||
|
const isActive = horizontals[key as keyof typeof horizontals] != null
|
||||||
|
return (
|
||||||
|
<button
|
||||||
|
key={key}
|
||||||
|
onClick={() => toggleHorizontal(key)}
|
||||||
|
className={`px-2 py-0.5 text-[10px] rounded border transition-colors ${
|
||||||
|
isActive
|
||||||
|
? 'font-medium'
|
||||||
|
: 'border-gray-200 dark:border-gray-700 text-gray-400 dark:text-gray-500 hover:text-gray-600 dark:hover:text-gray-400'
|
||||||
|
}`}
|
||||||
|
style={
|
||||||
|
isActive
|
||||||
|
? {
|
||||||
|
color: HORIZ_COLORS[key],
|
||||||
|
borderColor: HORIZ_COLORS[key] + '80',
|
||||||
|
}
|
||||||
|
: undefined
|
||||||
|
}
|
||||||
|
>
|
||||||
|
{label}
|
||||||
|
</button>
|
||||||
|
)
|
||||||
|
})}
|
||||||
|
<span className="text-[10px] text-gray-400 dark:text-gray-500 ml-auto">
|
||||||
|
Doppelklick = Spalte einfuegen
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Scrollable image with SVG overlay */}
|
||||||
|
<div className="flex-1 overflow-auto p-2">
|
||||||
|
<div
|
||||||
|
ref={wrapperRef}
|
||||||
|
style={{ width: `${zoom}%`, position: 'relative', maxWidth: 'none' }}
|
||||||
|
onDoubleClick={handleDoubleClick}
|
||||||
|
>
|
||||||
|
{/* eslint-disable-next-line @next/next/no-img-element */}
|
||||||
|
<img
|
||||||
|
src={imageUrl}
|
||||||
|
alt="Original scan"
|
||||||
|
style={{ width: '100%', display: 'block' }}
|
||||||
|
draggable={false}
|
||||||
|
/>
|
||||||
|
{/* SVG overlay */}
|
||||||
|
<svg
|
||||||
|
style={{
|
||||||
|
position: 'absolute',
|
||||||
|
top: 0,
|
||||||
|
left: 0,
|
||||||
|
width: '100%',
|
||||||
|
height: '100%',
|
||||||
|
pointerEvents: 'none',
|
||||||
|
}}
|
||||||
|
viewBox="0 0 100 100"
|
||||||
|
preserveAspectRatio="none"
|
||||||
|
>
|
||||||
|
{/* Column boundary lines per zone */}
|
||||||
|
{zoneBoundaries.map(({ zone, boundaries }) =>
|
||||||
|
boundaries.map((xPct, bi) => {
|
||||||
|
const yTop = zone.bbox_pct.y || 0
|
||||||
|
const yBottom = (zone.bbox_pct.y || 0) + (zone.bbox_pct.h || 100)
|
||||||
|
const isEdge = bi === 0 || bi === boundaries.length - 1
|
||||||
|
const isInterior = bi > 0 && bi < boundaries.length - 1
|
||||||
|
return (
|
||||||
|
<g key={`z${zone.zone_index}-b${bi}`}>
|
||||||
|
{/* Wide invisible hit area */}
|
||||||
|
<rect
|
||||||
|
x={xPct - 0.8}
|
||||||
|
y={yTop}
|
||||||
|
width={1.6}
|
||||||
|
height={yBottom - yTop}
|
||||||
|
fill="transparent"
|
||||||
|
style={{ cursor: 'col-resize', pointerEvents: 'all' }}
|
||||||
|
onMouseDown={(e) =>
|
||||||
|
startDrag(
|
||||||
|
{ type: 'col', zoneIndex: zone.zone_index, boundaryIndex: bi },
|
||||||
|
e,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
/>
|
||||||
|
{/* Visible line */}
|
||||||
|
<line
|
||||||
|
x1={xPct}
|
||||||
|
y1={yTop}
|
||||||
|
x2={xPct}
|
||||||
|
y2={yBottom}
|
||||||
|
stroke={isEdge ? 'rgba(20, 184, 166, 0.35)' : 'rgba(20, 184, 166, 0.7)'}
|
||||||
|
strokeWidth={isEdge ? 0.15 : 0.25}
|
||||||
|
strokeDasharray={isEdge ? '0.8,0.4' : '0.5,0.3'}
|
||||||
|
style={{ pointerEvents: 'none' }}
|
||||||
|
/>
|
||||||
|
{/* Delete button for interior dividers */}
|
||||||
|
{isInterior && zone.columns.length > 1 && (
|
||||||
|
<g
|
||||||
|
style={{ pointerEvents: 'all', cursor: 'pointer' }}
|
||||||
|
onClick={(e) => {
|
||||||
|
e.stopPropagation()
|
||||||
|
onDeleteColumn(zone.zone_index, bi)
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<circle
|
||||||
|
cx={xPct}
|
||||||
|
cy={Math.max(yTop + 1.5, 1.5)}
|
||||||
|
r={1.2}
|
||||||
|
fill="rgba(239, 68, 68, 0.8)"
|
||||||
|
/>
|
||||||
|
<text
|
||||||
|
x={xPct}
|
||||||
|
y={Math.max(yTop + 1.5, 1.5) + 0.5}
|
||||||
|
textAnchor="middle"
|
||||||
|
fill="white"
|
||||||
|
fontSize="1.4"
|
||||||
|
fontWeight="bold"
|
||||||
|
style={{ pointerEvents: 'none' }}
|
||||||
|
>
|
||||||
|
x
|
||||||
|
</text>
|
||||||
|
</g>
|
||||||
|
)}
|
||||||
|
</g>
|
||||||
|
)
|
||||||
|
}),
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Horizontal guideline lines */}
|
||||||
|
{Object.entries(horizontals).map(([key, yPct]) => {
|
||||||
|
if (yPct == null) return null
|
||||||
|
const color = HORIZ_COLORS[key] ?? 'rgba(156, 163, 175, 0.6)'
|
||||||
|
return (
|
||||||
|
<g key={`horiz-${key}`}>
|
||||||
|
{/* Wide invisible hit area */}
|
||||||
|
<rect
|
||||||
|
x={0}
|
||||||
|
y={yPct - 0.6}
|
||||||
|
width={100}
|
||||||
|
height={1.2}
|
||||||
|
fill="transparent"
|
||||||
|
style={{ cursor: 'row-resize', pointerEvents: 'all' }}
|
||||||
|
onMouseDown={(e) => startDrag({ type: 'horiz', key }, e)}
|
||||||
|
/>
|
||||||
|
{/* Visible line */}
|
||||||
|
<line
|
||||||
|
x1={0}
|
||||||
|
y1={yPct}
|
||||||
|
x2={100}
|
||||||
|
y2={yPct}
|
||||||
|
stroke={color}
|
||||||
|
strokeWidth={0.2}
|
||||||
|
strokeDasharray="1,0.5"
|
||||||
|
style={{ pointerEvents: 'none' }}
|
||||||
|
/>
|
||||||
|
{/* Label */}
|
||||||
|
<text
|
||||||
|
x={1}
|
||||||
|
y={yPct - 0.5}
|
||||||
|
fill={color}
|
||||||
|
fontSize="1.6"
|
||||||
|
style={{ pointerEvents: 'none' }}
|
||||||
|
>
|
||||||
|
{HORIZ_LABELS[key]}
|
||||||
|
</text>
|
||||||
|
</g>
|
||||||
|
)
|
||||||
|
})}
|
||||||
|
</svg>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
@@ -11,6 +11,15 @@ export interface LayoutMetrics {
|
|||||||
font_size_suggestion_px: number
|
font_size_suggestion_px: number
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Dictionary detection result from backend analysis. */
|
||||||
|
export interface DictionaryDetection {
|
||||||
|
is_dictionary: boolean
|
||||||
|
confidence: number
|
||||||
|
signals: Record<string, unknown>
|
||||||
|
article_col_index: number | null
|
||||||
|
headword_col_index: number | null
|
||||||
|
}
|
||||||
|
|
||||||
/** A complete structured grid with zones, ready for the Excel-like editor. */
|
/** A complete structured grid with zones, ready for the Excel-like editor. */
|
||||||
export interface StructuredGrid {
|
export interface StructuredGrid {
|
||||||
session_id: string
|
session_id: string
|
||||||
@@ -21,8 +30,10 @@ export interface StructuredGrid {
|
|||||||
summary: GridSummary
|
summary: GridSummary
|
||||||
formatting: GridFormatting
|
formatting: GridFormatting
|
||||||
layout_metrics?: LayoutMetrics
|
layout_metrics?: LayoutMetrics
|
||||||
|
dictionary_detection?: DictionaryDetection
|
||||||
duration_seconds: number
|
duration_seconds: number
|
||||||
edited?: boolean
|
edited?: boolean
|
||||||
|
layout_dividers?: LayoutDividers
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface GridSummary {
|
export interface GridSummary {
|
||||||
@@ -103,6 +114,16 @@ export interface GridEditorCell {
|
|||||||
is_bold: boolean
|
is_bold: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Layout dividers for the visual column/margin editor on the original image. */
|
||||||
|
export interface LayoutDividers {
|
||||||
|
horizontals: {
|
||||||
|
top_margin?: number
|
||||||
|
header_bottom?: number
|
||||||
|
footer_top?: number
|
||||||
|
bottom_margin?: number
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/** Cell formatting applied by the user in the editor. */
|
/** Cell formatting applied by the user in the editor. */
|
||||||
export interface CellFormatting {
|
export interface CellFormatting {
|
||||||
bold: boolean
|
bold: boolean
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import { useCallback, useRef, useState } from 'react'
|
import { useCallback, useRef, useState } from 'react'
|
||||||
import type { StructuredGrid, GridZone } from './types'
|
import type { StructuredGrid, GridZone, LayoutDividers } from './types'
|
||||||
|
|
||||||
const KLAUSUR_API = '/klausur-api'
|
const KLAUSUR_API = '/klausur-api'
|
||||||
const MAX_UNDO = 50
|
const MAX_UNDO = 50
|
||||||
@@ -134,12 +134,40 @@ export function useGridEditor(sessionId: string | null) {
|
|||||||
if (!prev) return prev
|
if (!prev) return prev
|
||||||
return {
|
return {
|
||||||
...prev,
|
...prev,
|
||||||
zones: prev.zones.map((zone) => ({
|
zones: prev.zones.map((zone) => {
|
||||||
...zone,
|
// Check if cell exists
|
||||||
cells: zone.cells.map((cell) =>
|
const existing = zone.cells.find((c) => c.cell_id === cellId)
|
||||||
cell.cell_id === cellId ? { ...cell, text: newText } : cell,
|
if (existing) {
|
||||||
),
|
return {
|
||||||
})),
|
...zone,
|
||||||
|
cells: zone.cells.map((cell) =>
|
||||||
|
cell.cell_id === cellId ? { ...cell, text: newText } : cell,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Cell doesn't exist — create it if the cellId belongs to this zone
|
||||||
|
// cellId format: Z{zone}_R{row}_C{col}
|
||||||
|
const match = cellId.match(/^Z(\d+)_R(\d+)_C(\d+)$/)
|
||||||
|
if (!match || parseInt(match[1]) !== zone.zone_index) return zone
|
||||||
|
const rowIndex = parseInt(match[2])
|
||||||
|
const colIndex = parseInt(match[3])
|
||||||
|
const col = zone.columns.find((c) => c.index === colIndex)
|
||||||
|
const newCell = {
|
||||||
|
cell_id: cellId,
|
||||||
|
zone_index: zone.zone_index,
|
||||||
|
row_index: rowIndex,
|
||||||
|
col_index: colIndex,
|
||||||
|
col_type: col?.label ?? '',
|
||||||
|
text: newText,
|
||||||
|
confidence: 0,
|
||||||
|
bbox_px: { x: 0, y: 0, w: 0, h: 0 },
|
||||||
|
bbox_pct: { x: 0, y: 0, w: 0, h: 0 },
|
||||||
|
word_boxes: [],
|
||||||
|
ocr_engine: 'manual',
|
||||||
|
is_bold: false,
|
||||||
|
}
|
||||||
|
return { ...zone, cells: [...zone.cells, newCell] }
|
||||||
|
}),
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
setDirty(true)
|
setDirty(true)
|
||||||
@@ -192,6 +220,7 @@ export function useGridEditor(sessionId: string | null) {
|
|||||||
if (!grid) return
|
if (!grid) return
|
||||||
pushUndo(grid.zones)
|
pushUndo(grid.zones)
|
||||||
|
|
||||||
|
// Cycle: normal → header → footer → normal
|
||||||
setGrid((prev) => {
|
setGrid((prev) => {
|
||||||
if (!prev) return prev
|
if (!prev) return prev
|
||||||
return {
|
return {
|
||||||
@@ -200,9 +229,16 @@ export function useGridEditor(sessionId: string | null) {
|
|||||||
if (zone.zone_index !== zoneIndex) return zone
|
if (zone.zone_index !== zoneIndex) return zone
|
||||||
return {
|
return {
|
||||||
...zone,
|
...zone,
|
||||||
rows: zone.rows.map((r) =>
|
rows: zone.rows.map((r) => {
|
||||||
r.index === rowIndex ? { ...r, is_header: !r.is_header } : r,
|
if (r.index !== rowIndex) return r
|
||||||
),
|
if (!r.is_header && !r.is_footer) {
|
||||||
|
return { ...r, is_header: true, is_footer: false }
|
||||||
|
} else if (r.is_header) {
|
||||||
|
return { ...r, is_header: false, is_footer: true }
|
||||||
|
} else {
|
||||||
|
return { ...r, is_header: false, is_footer: false }
|
||||||
|
}
|
||||||
|
}),
|
||||||
}
|
}
|
||||||
}),
|
}),
|
||||||
}
|
}
|
||||||
@@ -229,9 +265,23 @@ export function useGridEditor(sessionId: string | null) {
|
|||||||
...prev,
|
...prev,
|
||||||
zones: prev.zones.map((z) => {
|
zones: prev.zones.map((z) => {
|
||||||
if (z.zone_index !== zoneIndex) return z
|
if (z.zone_index !== zoneIndex) return z
|
||||||
|
const deletedCol = z.columns.find((c) => c.index === colIndex)
|
||||||
const newColumns = z.columns
|
const newColumns = z.columns
|
||||||
.filter((c) => c.index !== colIndex)
|
.filter((c) => c.index !== colIndex)
|
||||||
.map((c, i) => ({ ...c, index: i, label: `column_${i + 1}` }))
|
.map((c, i) => {
|
||||||
|
const result = { ...c, index: i, label: `column_${i + 1}` }
|
||||||
|
// Merge x-boundary: previous column absorbs deleted column's space
|
||||||
|
if (deletedCol) {
|
||||||
|
if (c.index === colIndex - 1) {
|
||||||
|
result.x_max_pct = deletedCol.x_max_pct
|
||||||
|
result.x_max_px = deletedCol.x_max_px
|
||||||
|
} else if (colIndex === 0 && c.index === 1) {
|
||||||
|
result.x_min_pct = deletedCol.x_min_pct
|
||||||
|
result.x_min_px = deletedCol.x_min_px
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
})
|
||||||
const newCells = z.cells
|
const newCells = z.cells
|
||||||
.filter((c) => c.col_index !== colIndex)
|
.filter((c) => c.col_index !== colIndex)
|
||||||
.map((c) => {
|
.map((c) => {
|
||||||
@@ -337,6 +387,323 @@ export function useGridEditor(sessionId: string | null) {
|
|||||||
[grid, pushUndo],
|
[grid, pushUndo],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------
|
||||||
|
// Row management
|
||||||
|
// ------------------------------------------------------------------
|
||||||
|
|
||||||
|
const deleteRow = useCallback(
|
||||||
|
(zoneIndex: number, rowIndex: number) => {
|
||||||
|
if (!grid) return
|
||||||
|
const zone = grid.zones.find((z) => z.zone_index === zoneIndex)
|
||||||
|
if (!zone || zone.rows.length <= 1) return // keep at least 1 row
|
||||||
|
pushUndo(grid.zones)
|
||||||
|
|
||||||
|
setGrid((prev) => {
|
||||||
|
if (!prev) return prev
|
||||||
|
return {
|
||||||
|
...prev,
|
||||||
|
zones: prev.zones.map((z) => {
|
||||||
|
if (z.zone_index !== zoneIndex) return z
|
||||||
|
const newRows = z.rows
|
||||||
|
.filter((r) => r.index !== rowIndex)
|
||||||
|
.map((r, i) => ({ ...r, index: i }))
|
||||||
|
const newCells = z.cells
|
||||||
|
.filter((c) => c.row_index !== rowIndex)
|
||||||
|
.map((c) => {
|
||||||
|
const newRI = c.row_index > rowIndex ? c.row_index - 1 : c.row_index
|
||||||
|
return {
|
||||||
|
...c,
|
||||||
|
row_index: newRI,
|
||||||
|
cell_id: `Z${zoneIndex}_R${String(newRI).padStart(2, '0')}_C${c.col_index}`,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return { ...z, rows: newRows, cells: newCells }
|
||||||
|
}),
|
||||||
|
summary: {
|
||||||
|
...prev.summary,
|
||||||
|
total_rows: prev.summary.total_rows - 1,
|
||||||
|
total_cells: prev.zones.reduce(
|
||||||
|
(sum, z) =>
|
||||||
|
sum +
|
||||||
|
(z.zone_index === zoneIndex
|
||||||
|
? z.cells.filter((c) => c.row_index !== rowIndex).length
|
||||||
|
: z.cells.length),
|
||||||
|
0,
|
||||||
|
),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
})
|
||||||
|
setDirty(true)
|
||||||
|
},
|
||||||
|
[grid, pushUndo],
|
||||||
|
)
|
||||||
|
|
||||||
|
const addRow = useCallback(
|
||||||
|
(zoneIndex: number, afterRowIndex: number) => {
|
||||||
|
if (!grid) return
|
||||||
|
const zone = grid.zones.find((z) => z.zone_index === zoneIndex)
|
||||||
|
if (!zone) return
|
||||||
|
pushUndo(grid.zones)
|
||||||
|
|
||||||
|
const newRowIndex = afterRowIndex + 1
|
||||||
|
|
||||||
|
setGrid((prev) => {
|
||||||
|
if (!prev) return prev
|
||||||
|
return {
|
||||||
|
...prev,
|
||||||
|
zones: prev.zones.map((z) => {
|
||||||
|
if (z.zone_index !== zoneIndex) return z
|
||||||
|
// Shift existing rows
|
||||||
|
const shiftedRows = z.rows.map((r) =>
|
||||||
|
r.index > afterRowIndex ? { ...r, index: r.index + 1 } : r,
|
||||||
|
)
|
||||||
|
// Insert new row
|
||||||
|
const refRow = z.rows.find((r) => r.index === afterRowIndex) || z.rows[z.rows.length - 1]
|
||||||
|
const newRow = {
|
||||||
|
index: newRowIndex,
|
||||||
|
y_min_px: refRow.y_max_px,
|
||||||
|
y_max_px: refRow.y_max_px + (refRow.y_max_px - refRow.y_min_px),
|
||||||
|
y_min_pct: refRow.y_max_pct,
|
||||||
|
y_max_pct: Math.min(100, refRow.y_max_pct + (refRow.y_max_pct - refRow.y_min_pct)),
|
||||||
|
is_header: false,
|
||||||
|
is_footer: false,
|
||||||
|
}
|
||||||
|
const allRows = [...shiftedRows, newRow].sort((a, b) => a.index - b.index)
|
||||||
|
|
||||||
|
// Shift existing cells
|
||||||
|
const shiftedCells = z.cells.map((c) => {
|
||||||
|
if (c.row_index > afterRowIndex) {
|
||||||
|
const newRI = c.row_index + 1
|
||||||
|
return {
|
||||||
|
...c,
|
||||||
|
row_index: newRI,
|
||||||
|
cell_id: `Z${zoneIndex}_R${String(newRI).padStart(2, '0')}_C${c.col_index}`,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return c
|
||||||
|
})
|
||||||
|
// Create empty cells for each column
|
||||||
|
const newCells = z.columns.map((col) => ({
|
||||||
|
cell_id: `Z${zoneIndex}_R${String(newRowIndex).padStart(2, '0')}_C${col.index}`,
|
||||||
|
zone_index: zoneIndex,
|
||||||
|
row_index: newRowIndex,
|
||||||
|
col_index: col.index,
|
||||||
|
col_type: col.label,
|
||||||
|
text: '',
|
||||||
|
confidence: 0,
|
||||||
|
bbox_px: { x: 0, y: 0, w: 0, h: 0 },
|
||||||
|
bbox_pct: { x: 0, y: 0, w: 0, h: 0 },
|
||||||
|
word_boxes: [],
|
||||||
|
ocr_engine: 'manual',
|
||||||
|
is_bold: false,
|
||||||
|
}))
|
||||||
|
|
||||||
|
return { ...z, rows: allRows, cells: [...shiftedCells, ...newCells] }
|
||||||
|
}),
|
||||||
|
summary: {
|
||||||
|
...prev.summary,
|
||||||
|
total_rows: prev.summary.total_rows + 1,
|
||||||
|
total_cells: prev.summary.total_cells + (zone?.columns.length ?? 0),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
})
|
||||||
|
setDirty(true)
|
||||||
|
},
|
||||||
|
[grid, pushUndo],
|
||||||
|
)
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------
|
||||||
|
// Layout editing (image overlay)
|
||||||
|
// ------------------------------------------------------------------
|
||||||
|
|
||||||
|
/** Capture current state for undo — call once at drag start. */
|
||||||
|
const commitUndoPoint = useCallback(() => {
|
||||||
|
if (!grid) return
|
||||||
|
pushUndo(grid.zones)
|
||||||
|
}, [grid, pushUndo])
|
||||||
|
|
||||||
|
/** Move a column boundary. boundaryIndex 0 = left edge of col 0, etc. */
|
||||||
|
const updateColumnDivider = useCallback(
|
||||||
|
(zoneIndex: number, boundaryIndex: number, newXPct: number) => {
|
||||||
|
if (!grid) return
|
||||||
|
setGrid((prev) => {
|
||||||
|
if (!prev) return prev
|
||||||
|
const imgW = prev.image_width || 1
|
||||||
|
const newPx = Math.round((newXPct / 100) * imgW)
|
||||||
|
return {
|
||||||
|
...prev,
|
||||||
|
zones: prev.zones.map((z) => {
|
||||||
|
if (z.zone_index !== zoneIndex) return z
|
||||||
|
return {
|
||||||
|
...z,
|
||||||
|
columns: z.columns.map((col) => {
|
||||||
|
// Right edge of the column before this boundary
|
||||||
|
if (col.index === boundaryIndex - 1) {
|
||||||
|
return { ...col, x_max_pct: newXPct, x_max_px: newPx }
|
||||||
|
}
|
||||||
|
// Left edge of the column at this boundary
|
||||||
|
if (col.index === boundaryIndex) {
|
||||||
|
return { ...col, x_min_pct: newXPct, x_min_px: newPx }
|
||||||
|
}
|
||||||
|
return col
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
setDirty(true)
|
||||||
|
},
|
||||||
|
[grid],
|
||||||
|
)
|
||||||
|
|
||||||
|
/** Update horizontal layout guidelines (margins, header, footer). */
|
||||||
|
const updateLayoutHorizontals = useCallback(
|
||||||
|
(horizontals: LayoutDividers['horizontals']) => {
|
||||||
|
if (!grid) return
|
||||||
|
setGrid((prev) => {
|
||||||
|
if (!prev) return prev
|
||||||
|
return {
|
||||||
|
...prev,
|
||||||
|
layout_dividers: {
|
||||||
|
...(prev.layout_dividers || { horizontals: {} }),
|
||||||
|
horizontals,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
})
|
||||||
|
setDirty(true)
|
||||||
|
},
|
||||||
|
[grid],
|
||||||
|
)
|
||||||
|
|
||||||
|
/** Split a column at a given x percentage, creating a new column. */
|
||||||
|
const splitColumnAt = useCallback(
|
||||||
|
(zoneIndex: number, xPct: number) => {
|
||||||
|
if (!grid) return
|
||||||
|
const zone = grid.zones.find((z) => z.zone_index === zoneIndex)
|
||||||
|
if (!zone) return
|
||||||
|
|
||||||
|
const sorted = [...zone.columns].sort((a, b) => a.index - b.index)
|
||||||
|
const targetCol = sorted.find((c) => c.x_min_pct <= xPct && c.x_max_pct >= xPct)
|
||||||
|
if (!targetCol) return
|
||||||
|
|
||||||
|
pushUndo(grid.zones)
|
||||||
|
const newColIndex = targetCol.index + 1
|
||||||
|
const imgW = grid.image_width || 1
|
||||||
|
|
||||||
|
setGrid((prev) => {
|
||||||
|
if (!prev) return prev
|
||||||
|
return {
|
||||||
|
...prev,
|
||||||
|
zones: prev.zones.map((z) => {
|
||||||
|
if (z.zone_index !== zoneIndex) return z
|
||||||
|
const leftCol = {
|
||||||
|
...targetCol,
|
||||||
|
x_max_pct: xPct,
|
||||||
|
x_max_px: Math.round((xPct / 100) * imgW),
|
||||||
|
}
|
||||||
|
const rightCol = {
|
||||||
|
index: newColIndex,
|
||||||
|
label: `column_${newColIndex + 1}`,
|
||||||
|
x_min_pct: xPct,
|
||||||
|
x_max_pct: targetCol.x_max_pct,
|
||||||
|
x_min_px: Math.round((xPct / 100) * imgW),
|
||||||
|
x_max_px: targetCol.x_max_px,
|
||||||
|
bold: false,
|
||||||
|
}
|
||||||
|
const updatedCols = z.columns.map((c) => {
|
||||||
|
if (c.index === targetCol.index) return leftCol
|
||||||
|
if (c.index > targetCol.index) return { ...c, index: c.index + 1, label: `column_${c.index + 2}` }
|
||||||
|
return c
|
||||||
|
})
|
||||||
|
const allCols = [...updatedCols, rightCol].sort((a, b) => a.index - b.index)
|
||||||
|
const shiftedCells = z.cells.map((c) => {
|
||||||
|
if (c.col_index > targetCol.index) {
|
||||||
|
const newCI = c.col_index + 1
|
||||||
|
return {
|
||||||
|
...c,
|
||||||
|
col_index: newCI,
|
||||||
|
cell_id: `Z${zoneIndex}_R${String(c.row_index).padStart(2, '0')}_C${newCI}`,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return c
|
||||||
|
})
|
||||||
|
const newCells = z.rows.map((row) => ({
|
||||||
|
cell_id: `Z${zoneIndex}_R${String(row.index).padStart(2, '0')}_C${newColIndex}`,
|
||||||
|
zone_index: zoneIndex,
|
||||||
|
row_index: row.index,
|
||||||
|
col_index: newColIndex,
|
||||||
|
col_type: `column_${newColIndex + 1}`,
|
||||||
|
text: '',
|
||||||
|
confidence: 0,
|
||||||
|
bbox_px: { x: 0, y: 0, w: 0, h: 0 },
|
||||||
|
bbox_pct: { x: 0, y: 0, w: 0, h: 0 },
|
||||||
|
word_boxes: [],
|
||||||
|
ocr_engine: 'manual',
|
||||||
|
is_bold: false,
|
||||||
|
}))
|
||||||
|
return { ...z, columns: allCols, cells: [...shiftedCells, ...newCells] }
|
||||||
|
}),
|
||||||
|
summary: {
|
||||||
|
...prev.summary,
|
||||||
|
total_columns: prev.summary.total_columns + 1,
|
||||||
|
total_cells: prev.summary.total_cells + (zone.rows.length),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
})
|
||||||
|
setDirty(true)
|
||||||
|
},
|
||||||
|
[grid, pushUndo],
|
||||||
|
)
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------
|
||||||
|
// Multi-select & bulk formatting
|
||||||
|
// ------------------------------------------------------------------
|
||||||
|
|
||||||
|
const [selectedCells, setSelectedCells] = useState<Set<string>>(new Set())
|
||||||
|
|
||||||
|
const toggleCellSelection = useCallback(
|
||||||
|
(cellId: string) => {
|
||||||
|
setSelectedCells((prev) => {
|
||||||
|
const next = new Set(prev)
|
||||||
|
if (next.has(cellId)) next.delete(cellId)
|
||||||
|
else next.add(cellId)
|
||||||
|
return next
|
||||||
|
})
|
||||||
|
},
|
||||||
|
[],
|
||||||
|
)
|
||||||
|
|
||||||
|
const clearCellSelection = useCallback(() => {
|
||||||
|
setSelectedCells(new Set())
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
/** Toggle bold on all selected cells (and their columns). */
|
||||||
|
const toggleSelectedBold = useCallback(() => {
|
||||||
|
if (!grid || selectedCells.size === 0) return
|
||||||
|
pushUndo(grid.zones)
|
||||||
|
|
||||||
|
// Determine if we're turning bold on or off (majority rule)
|
||||||
|
const cells = grid.zones.flatMap((z) => z.cells)
|
||||||
|
const selectedArr = cells.filter((c) => selectedCells.has(c.cell_id))
|
||||||
|
const boldCount = selectedArr.filter((c) => c.is_bold).length
|
||||||
|
const newBold = boldCount < selectedArr.length / 2
|
||||||
|
|
||||||
|
setGrid((prev) => {
|
||||||
|
if (!prev) return prev
|
||||||
|
return {
|
||||||
|
...prev,
|
||||||
|
zones: prev.zones.map((zone) => ({
|
||||||
|
...zone,
|
||||||
|
cells: zone.cells.map((cell) =>
|
||||||
|
selectedCells.has(cell.cell_id) ? { ...cell, is_bold: newBold } : cell,
|
||||||
|
),
|
||||||
|
})),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
setDirty(true)
|
||||||
|
setSelectedCells(new Set())
|
||||||
|
}, [grid, selectedCells, pushUndo])
|
||||||
|
|
||||||
// ------------------------------------------------------------------
|
// ------------------------------------------------------------------
|
||||||
// Undo / Redo
|
// Undo / Redo
|
||||||
// ------------------------------------------------------------------
|
// ------------------------------------------------------------------
|
||||||
@@ -368,20 +735,37 @@ export function useGridEditor(sessionId: string | null) {
|
|||||||
(cellId: string, direction: 'up' | 'down' | 'left' | 'right'): string | null => {
|
(cellId: string, direction: 'up' | 'down' | 'left' | 'right'): string | null => {
|
||||||
if (!grid) return null
|
if (!grid) return null
|
||||||
for (const zone of grid.zones) {
|
for (const zone of grid.zones) {
|
||||||
|
// Find the cell or derive row/col from cellId pattern
|
||||||
const cell = zone.cells.find((c) => c.cell_id === cellId)
|
const cell = zone.cells.find((c) => c.cell_id === cellId)
|
||||||
if (!cell) continue
|
let currentRow: number, currentCol: number
|
||||||
|
if (cell) {
|
||||||
|
currentRow = cell.row_index
|
||||||
|
currentCol = cell.col_index
|
||||||
|
} else {
|
||||||
|
// Try to parse from cellId: Z{zone}_R{row}_C{col}
|
||||||
|
const match = cellId.match(/^Z(\d+)_R(\d+)_C(\d+)$/)
|
||||||
|
if (!match || parseInt(match[1]) !== zone.zone_index) continue
|
||||||
|
currentRow = parseInt(match[2])
|
||||||
|
currentCol = parseInt(match[3])
|
||||||
|
}
|
||||||
|
|
||||||
let targetRow = cell.row_index
|
let targetRow = currentRow
|
||||||
let targetCol = cell.col_index
|
let targetCol = currentCol
|
||||||
if (direction === 'up') targetRow--
|
if (direction === 'up') targetRow--
|
||||||
if (direction === 'down') targetRow++
|
if (direction === 'down') targetRow++
|
||||||
if (direction === 'left') targetCol--
|
if (direction === 'left') targetCol--
|
||||||
if (direction === 'right') targetCol++
|
if (direction === 'right') targetCol++
|
||||||
|
|
||||||
|
// Check bounds
|
||||||
|
const hasRow = zone.rows.some((r) => r.index === targetRow)
|
||||||
|
const hasCol = zone.columns.some((c) => c.index === targetCol)
|
||||||
|
if (!hasRow || !hasCol) return null
|
||||||
|
|
||||||
|
// Return existing cell ID or construct one
|
||||||
const target = zone.cells.find(
|
const target = zone.cells.find(
|
||||||
(c) => c.row_index === targetRow && c.col_index === targetCol,
|
(c) => c.row_index === targetRow && c.col_index === targetCol,
|
||||||
)
|
)
|
||||||
return target?.cell_id ?? null
|
return target?.cell_id ?? `Z${zone.zone_index}_R${String(targetRow).padStart(2, '0')}_C${targetCol}`
|
||||||
}
|
}
|
||||||
return null
|
return null
|
||||||
},
|
},
|
||||||
@@ -396,6 +780,7 @@ export function useGridEditor(sessionId: string | null) {
|
|||||||
dirty,
|
dirty,
|
||||||
selectedCell,
|
selectedCell,
|
||||||
selectedZone,
|
selectedZone,
|
||||||
|
selectedCells,
|
||||||
setSelectedCell,
|
setSelectedCell,
|
||||||
setSelectedZone,
|
setSelectedZone,
|
||||||
buildGrid,
|
buildGrid,
|
||||||
@@ -411,5 +796,14 @@ export function useGridEditor(sessionId: string | null) {
|
|||||||
getAdjacentCell,
|
getAdjacentCell,
|
||||||
deleteColumn,
|
deleteColumn,
|
||||||
addColumn,
|
addColumn,
|
||||||
|
deleteRow,
|
||||||
|
addRow,
|
||||||
|
commitUndoPoint,
|
||||||
|
updateColumnDivider,
|
||||||
|
updateLayoutHorizontals,
|
||||||
|
splitColumnAt,
|
||||||
|
toggleCellSelection,
|
||||||
|
clearCellSelection,
|
||||||
|
toggleSelectedBold,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,20 +8,22 @@
|
|||||||
* the GT marking flow in the parent page.
|
* the GT marking flow in the parent page.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { useCallback, useEffect, useState } from 'react'
|
import { useCallback, useEffect, useRef, useState, type MutableRefObject } from 'react'
|
||||||
import { useGridEditor } from '@/components/grid-editor/useGridEditor'
|
import { useGridEditor } from '@/components/grid-editor/useGridEditor'
|
||||||
import type { GridZone } from '@/components/grid-editor/types'
|
import type { GridZone, LayoutDividers } from '@/components/grid-editor/types'
|
||||||
import { GridToolbar } from '@/components/grid-editor/GridToolbar'
|
import { GridToolbar } from '@/components/grid-editor/GridToolbar'
|
||||||
import { GridTable } from '@/components/grid-editor/GridTable'
|
import { GridTable } from '@/components/grid-editor/GridTable'
|
||||||
|
import { ImageLayoutEditor } from '@/components/grid-editor/ImageLayoutEditor'
|
||||||
|
|
||||||
const KLAUSUR_API = '/klausur-api'
|
const KLAUSUR_API = '/klausur-api'
|
||||||
|
|
||||||
interface StepGridReviewProps {
|
interface StepGridReviewProps {
|
||||||
sessionId: string | null
|
sessionId: string | null
|
||||||
onNext?: () => void
|
onNext?: () => void
|
||||||
|
saveRef?: MutableRefObject<(() => Promise<void>) | null>
|
||||||
}
|
}
|
||||||
|
|
||||||
export function StepGridReview({ sessionId, onNext }: StepGridReviewProps) {
|
export function StepGridReview({ sessionId, onNext, saveRef }: StepGridReviewProps) {
|
||||||
const {
|
const {
|
||||||
grid,
|
grid,
|
||||||
loading,
|
loading,
|
||||||
@@ -29,6 +31,7 @@ export function StepGridReview({ sessionId, onNext }: StepGridReviewProps) {
|
|||||||
error,
|
error,
|
||||||
dirty,
|
dirty,
|
||||||
selectedCell,
|
selectedCell,
|
||||||
|
selectedCells,
|
||||||
setSelectedCell,
|
setSelectedCell,
|
||||||
buildGrid,
|
buildGrid,
|
||||||
loadGrid,
|
loadGrid,
|
||||||
@@ -43,12 +46,31 @@ export function StepGridReview({ sessionId, onNext }: StepGridReviewProps) {
|
|||||||
getAdjacentCell,
|
getAdjacentCell,
|
||||||
deleteColumn,
|
deleteColumn,
|
||||||
addColumn,
|
addColumn,
|
||||||
|
deleteRow,
|
||||||
|
addRow,
|
||||||
|
commitUndoPoint,
|
||||||
|
updateColumnDivider,
|
||||||
|
updateLayoutHorizontals,
|
||||||
|
splitColumnAt,
|
||||||
|
toggleCellSelection,
|
||||||
|
clearCellSelection,
|
||||||
|
toggleSelectedBold,
|
||||||
} = useGridEditor(sessionId)
|
} = useGridEditor(sessionId)
|
||||||
|
|
||||||
const [showImage, setShowImage] = useState(true)
|
const [showImage, setShowImage] = useState(true)
|
||||||
const [zoom, setZoom] = useState(100)
|
const [zoom, setZoom] = useState(100)
|
||||||
const [acceptedRows, setAcceptedRows] = useState<Set<string>>(new Set())
|
const [acceptedRows, setAcceptedRows] = useState<Set<string>>(new Set())
|
||||||
|
|
||||||
|
// Expose save function to parent via ref (for GT marking auto-save)
|
||||||
|
useEffect(() => {
|
||||||
|
if (saveRef) {
|
||||||
|
saveRef.current = async () => {
|
||||||
|
if (dirty) await saveGrid()
|
||||||
|
}
|
||||||
|
return () => { saveRef.current = null }
|
||||||
|
}
|
||||||
|
}, [saveRef, dirty, saveGrid])
|
||||||
|
|
||||||
// Load grid on mount
|
// Load grid on mount
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (sessionId) loadGrid()
|
if (sessionId) loadGrid()
|
||||||
@@ -71,11 +93,18 @@ export function StepGridReview({ sessionId, onNext }: StepGridReviewProps) {
|
|||||||
} else if ((e.metaKey || e.ctrlKey) && e.key === 's') {
|
} else if ((e.metaKey || e.ctrlKey) && e.key === 's') {
|
||||||
e.preventDefault()
|
e.preventDefault()
|
||||||
saveGrid()
|
saveGrid()
|
||||||
|
} else if ((e.metaKey || e.ctrlKey) && e.key === 'b') {
|
||||||
|
e.preventDefault()
|
||||||
|
if (selectedCells.size > 0) {
|
||||||
|
toggleSelectedBold()
|
||||||
|
}
|
||||||
|
} else if (e.key === 'Escape') {
|
||||||
|
clearCellSelection()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
window.addEventListener('keydown', handler)
|
window.addEventListener('keydown', handler)
|
||||||
return () => window.removeEventListener('keydown', handler)
|
return () => window.removeEventListener('keydown', handler)
|
||||||
}, [undo, redo, saveGrid])
|
}, [undo, redo, saveGrid, selectedCells, toggleSelectedBold, clearCellSelection])
|
||||||
|
|
||||||
const handleNavigate = useCallback(
|
const handleNavigate = useCallback(
|
||||||
(cellId: string, direction: 'up' | 'down' | 'left' | 'right') => {
|
(cellId: string, direction: 'up' | 'down' | 'left' | 'right') => {
|
||||||
@@ -195,6 +224,11 @@ export function StepGridReview({ sessionId, onNext }: StepGridReviewProps) {
|
|||||||
{grid.summary.total_zones} Zone(n), {grid.summary.total_columns} Spalten,{' '}
|
{grid.summary.total_zones} Zone(n), {grid.summary.total_columns} Spalten,{' '}
|
||||||
{grid.summary.total_rows} Zeilen, {grid.summary.total_cells} Zellen
|
{grid.summary.total_rows} Zeilen, {grid.summary.total_cells} Zellen
|
||||||
</span>
|
</span>
|
||||||
|
{grid.dictionary_detection?.is_dictionary && (
|
||||||
|
<span className="px-2 py-0.5 rounded-full bg-blue-50 dark:bg-blue-900/20 text-blue-600 dark:text-blue-400 border border-blue-200 dark:border-blue-800">
|
||||||
|
Woerterbuch ({Math.round(grid.dictionary_detection.confidence * 100)}%)
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
{lowConfCells.length > 0 && (
|
{lowConfCells.length > 0 && (
|
||||||
<span className="px-2 py-0.5 rounded-full bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 border border-red-200 dark:border-red-800">
|
<span className="px-2 py-0.5 rounded-full bg-red-50 dark:bg-red-900/20 text-red-600 dark:text-red-400 border border-red-200 dark:border-red-800">
|
||||||
{lowConfCells.length} niedrige Konfidenz
|
{lowConfCells.length} niedrige Konfidenz
|
||||||
@@ -249,47 +283,21 @@ export function StepGridReview({ sessionId, onNext }: StepGridReviewProps) {
|
|||||||
className={showImage ? 'grid grid-cols-2 gap-3' : ''}
|
className={showImage ? 'grid grid-cols-2 gap-3' : ''}
|
||||||
style={{ minHeight: '55vh' }}
|
style={{ minHeight: '55vh' }}
|
||||||
>
|
>
|
||||||
{/* Left: Original Image */}
|
{/* Left: Original Image with Layout Editor */}
|
||||||
{showImage && (
|
{showImage && (
|
||||||
<div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 overflow-hidden flex flex-col">
|
<ImageLayoutEditor
|
||||||
<div className="flex items-center justify-between px-3 py-2 border-b border-gray-100 dark:border-gray-700 bg-gray-50 dark:bg-gray-800/50">
|
imageUrl={imageUrl}
|
||||||
<span className="text-xs font-medium text-gray-600 dark:text-gray-400">
|
zones={grid.zones}
|
||||||
Original Scan (zugeschnitten)
|
imageWidth={grid.image_width}
|
||||||
</span>
|
layoutDividers={grid.layout_dividers}
|
||||||
<div className="flex items-center gap-2">
|
zoom={zoom}
|
||||||
<button
|
onZoomChange={setZoom}
|
||||||
onClick={() => setZoom((z) => Math.max(50, z - 25))}
|
onColumnDividerMove={updateColumnDivider}
|
||||||
className="px-2 py-0.5 text-xs bg-gray-200 dark:bg-gray-700 rounded hover:bg-gray-300 dark:hover:bg-gray-600 text-gray-700 dark:text-gray-300"
|
onHorizontalsChange={updateLayoutHorizontals}
|
||||||
>
|
onCommitUndo={commitUndoPoint}
|
||||||
-
|
onSplitColumnAt={splitColumnAt}
|
||||||
</button>
|
onDeleteColumn={deleteColumn}
|
||||||
<span className="text-xs text-gray-500 dark:text-gray-400 w-10 text-center">
|
/>
|
||||||
{zoom}%
|
|
||||||
</span>
|
|
||||||
<button
|
|
||||||
onClick={() => setZoom((z) => Math.min(300, z + 25))}
|
|
||||||
className="px-2 py-0.5 text-xs bg-gray-200 dark:bg-gray-700 rounded hover:bg-gray-300 dark:hover:bg-gray-600 text-gray-700 dark:text-gray-300"
|
|
||||||
>
|
|
||||||
+
|
|
||||||
</button>
|
|
||||||
<button
|
|
||||||
onClick={() => setZoom(100)}
|
|
||||||
className="px-2 py-0.5 text-xs bg-gray-200 dark:bg-gray-700 rounded hover:bg-gray-300 dark:hover:bg-gray-600 text-gray-700 dark:text-gray-300"
|
|
||||||
>
|
|
||||||
Fit
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div className="flex-1 overflow-auto p-2">
|
|
||||||
{/* eslint-disable-next-line @next/next/no-img-element */}
|
|
||||||
<img
|
|
||||||
src={imageUrl}
|
|
||||||
alt="Original scan"
|
|
||||||
style={{ width: `${zoom}%`, maxWidth: 'none' }}
|
|
||||||
className="block"
|
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{/* Right: Grid with row-accept buttons */}
|
{/* Right: Grid with row-accept buttons */}
|
||||||
@@ -370,13 +378,17 @@ export function StepGridReview({ sessionId, onNext }: StepGridReviewProps) {
|
|||||||
zone={zone}
|
zone={zone}
|
||||||
layoutMetrics={grid.layout_metrics}
|
layoutMetrics={grid.layout_metrics}
|
||||||
selectedCell={selectedCell}
|
selectedCell={selectedCell}
|
||||||
|
selectedCells={selectedCells}
|
||||||
onSelectCell={setSelectedCell}
|
onSelectCell={setSelectedCell}
|
||||||
|
onToggleCellSelection={toggleCellSelection}
|
||||||
onCellTextChange={updateCellText}
|
onCellTextChange={updateCellText}
|
||||||
onToggleColumnBold={toggleColumnBold}
|
onToggleColumnBold={toggleColumnBold}
|
||||||
onToggleRowHeader={toggleRowHeader}
|
onToggleRowHeader={toggleRowHeader}
|
||||||
onNavigate={handleNavigate}
|
onNavigate={handleNavigate}
|
||||||
onDeleteColumn={deleteColumn}
|
onDeleteColumn={deleteColumn}
|
||||||
onAddColumn={addColumn}
|
onAddColumn={addColumn}
|
||||||
|
onDeleteRow={deleteRow}
|
||||||
|
onAddRow={addRow}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
))}
|
))}
|
||||||
@@ -388,11 +400,34 @@ export function StepGridReview({ sessionId, onNext }: StepGridReviewProps) {
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{/* Multi-select toolbar */}
|
||||||
|
{selectedCells.size > 0 && (
|
||||||
|
<div className="flex items-center gap-3 px-3 py-2 bg-teal-50 dark:bg-teal-900/20 border border-teal-200 dark:border-teal-800 rounded-lg text-xs">
|
||||||
|
<span className="text-teal-700 dark:text-teal-300 font-medium">
|
||||||
|
{selectedCells.size} Zellen markiert
|
||||||
|
</span>
|
||||||
|
<button
|
||||||
|
onClick={toggleSelectedBold}
|
||||||
|
className="px-2.5 py-1 bg-teal-600 text-white rounded hover:bg-teal-700 transition-colors font-medium"
|
||||||
|
>
|
||||||
|
B Fett umschalten
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={clearCellSelection}
|
||||||
|
className="px-2 py-1 text-teal-600 dark:text-teal-400 hover:text-teal-800 dark:hover:text-teal-200 transition-colors"
|
||||||
|
>
|
||||||
|
Auswahl aufheben (Esc)
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
{/* Tips + Next */}
|
{/* Tips + Next */}
|
||||||
<div className="flex items-center justify-between">
|
<div className="flex items-center justify-between">
|
||||||
<div className="text-[11px] text-gray-400 dark:text-gray-500 flex items-center gap-4">
|
<div className="text-[11px] text-gray-400 dark:text-gray-500 flex items-center gap-4">
|
||||||
<span>Tab: naechste Zelle</span>
|
<span>Tab: naechste Zelle</span>
|
||||||
<span>Enter: Zeile runter</span>
|
<span>Pfeiltasten: Navigation</span>
|
||||||
|
<span>Ctrl+Klick: Mehrfachauswahl</span>
|
||||||
|
<span>Ctrl+B: Fett</span>
|
||||||
<span>Ctrl+Z/Y: Undo/Redo</span>
|
<span>Ctrl+Z/Y: Undo/Redo</span>
|
||||||
<span>Ctrl+S: Speichern</span>
|
<span>Ctrl+S: Speichern</span>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -2275,6 +2275,324 @@ def _score_role(geom: ColumnGeometry) -> Dict[str, float]:
|
|||||||
return {k: round(v, 3) for k, v in scores.items()}
|
return {k: round(v, 3) for k, v in scores.items()}
|
||||||
|
|
||||||
|
|
||||||
|
# --- Dictionary / Wörterbuch Detection ---
|
||||||
|
|
||||||
|
# Article words that appear as a dedicated column in dictionaries
|
||||||
|
_DICT_ARTICLE_WORDS = {
|
||||||
|
# German articles
|
||||||
|
"die", "der", "das", "dem", "den", "des", "ein", "eine", "einem", "einer",
|
||||||
|
# English articles / infinitive marker
|
||||||
|
"the", "a", "an", "to",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _score_dictionary_signals(
|
||||||
|
geometries: List[ColumnGeometry],
|
||||||
|
document_category: Optional[str] = None,
|
||||||
|
margin_strip_detected: bool = False,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Score dictionary-specific patterns across all columns.
|
||||||
|
|
||||||
|
Combines 4 independent signals to determine if the page is a dictionary:
|
||||||
|
1. Alphabetical ordering of words in each column
|
||||||
|
2. Article column detection (der/die/das, to)
|
||||||
|
3. First-letter uniformity (most headwords share a letter)
|
||||||
|
4. Decorative A-Z margin strip (detected upstream)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
geometries: List of ColumnGeometry with words.
|
||||||
|
document_category: User-selected category (e.g. 'woerterbuch').
|
||||||
|
margin_strip_detected: Whether a decorative A-Z margin strip was found.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with 'is_dictionary', 'confidence', 'article_col_index',
|
||||||
|
'headword_col_index', and 'signals' sub-dict.
|
||||||
|
"""
|
||||||
|
result: Dict[str, Any] = {
|
||||||
|
"is_dictionary": False,
|
||||||
|
"confidence": 0.0,
|
||||||
|
"article_col_index": None,
|
||||||
|
"headword_col_index": None,
|
||||||
|
"signals": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
if not geometries or len(geometries) < 2:
|
||||||
|
return result
|
||||||
|
|
||||||
|
# --- Signal 1: Alphabetical ordering per column (weight 0.35) ---
|
||||||
|
best_alpha_score = 0.0
|
||||||
|
best_alpha_col = -1
|
||||||
|
for geom in geometries:
|
||||||
|
texts = [
|
||||||
|
w["text"].strip().lower()
|
||||||
|
for w in sorted(geom.words, key=lambda w: w.get("top", 0))
|
||||||
|
if w.get("conf", 0) > 30 and len(w["text"].strip()) >= 2
|
||||||
|
]
|
||||||
|
if len(texts) < 5:
|
||||||
|
continue
|
||||||
|
# Deduplicate consecutive identical words (OCR double-reads)
|
||||||
|
deduped = [texts[0]]
|
||||||
|
for t in texts[1:]:
|
||||||
|
if t != deduped[-1]:
|
||||||
|
deduped.append(t)
|
||||||
|
if len(deduped) < 5:
|
||||||
|
continue
|
||||||
|
# Count consecutive pairs in alphabetical order
|
||||||
|
ordered_pairs = sum(
|
||||||
|
1 for i in range(len(deduped) - 1)
|
||||||
|
if deduped[i] <= deduped[i + 1]
|
||||||
|
)
|
||||||
|
alpha_score = ordered_pairs / (len(deduped) - 1)
|
||||||
|
if alpha_score > best_alpha_score:
|
||||||
|
best_alpha_score = alpha_score
|
||||||
|
best_alpha_col = geom.index
|
||||||
|
|
||||||
|
result["signals"]["alphabetical_score"] = round(best_alpha_score, 3)
|
||||||
|
result["signals"]["alphabetical_col"] = best_alpha_col
|
||||||
|
|
||||||
|
# --- Signal 2: Article detection (weight 0.25) ---
|
||||||
|
# Check three patterns:
|
||||||
|
# (a) Dedicated narrow article column (der/die/das only)
|
||||||
|
# (b) Inline articles: multi-word texts starting with "der X", "die X"
|
||||||
|
# (c) High article word frequency: many individual words ARE articles
|
||||||
|
# (common when OCR splits "der Zustand" into separate word_boxes)
|
||||||
|
best_article_density = 0.0
|
||||||
|
best_article_col = -1
|
||||||
|
best_inline_article_ratio = 0.0
|
||||||
|
best_article_word_ratio = 0.0
|
||||||
|
|
||||||
|
for geom in geometries:
|
||||||
|
texts = [
|
||||||
|
w["text"].strip().lower()
|
||||||
|
for w in geom.words
|
||||||
|
if w.get("conf", 0) > 30 and len(w["text"].strip()) > 0
|
||||||
|
]
|
||||||
|
if len(texts) < 3:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# (a) Dedicated article column: narrow, mostly article words
|
||||||
|
article_count = sum(1 for t in texts if t in _DICT_ARTICLE_WORDS)
|
||||||
|
if geom.width_ratio <= 0.20:
|
||||||
|
density = article_count / len(texts)
|
||||||
|
if density > best_article_density:
|
||||||
|
best_article_density = density
|
||||||
|
best_article_col = geom.index
|
||||||
|
|
||||||
|
# (b) Inline articles: "der Zustand", "die Zutat", etc.
|
||||||
|
inline_count = sum(
|
||||||
|
1 for t in texts
|
||||||
|
if any(t.startswith(art + " ") for art in _DICT_ARTICLE_WORDS)
|
||||||
|
)
|
||||||
|
inline_ratio = inline_count / len(texts)
|
||||||
|
if inline_ratio > best_inline_article_ratio:
|
||||||
|
best_inline_article_ratio = inline_ratio
|
||||||
|
|
||||||
|
# (c) Article word frequency in any column (for OCR-split word_boxes)
|
||||||
|
# In dictionaries, articles appear frequently among headwords
|
||||||
|
# Require at least 10% articles and >= 3 article words
|
||||||
|
if article_count >= 3:
|
||||||
|
art_ratio = article_count / len(texts)
|
||||||
|
# Only count if column has enough non-article words too
|
||||||
|
# (pure article column is handled by (a))
|
||||||
|
non_art = len(texts) - article_count
|
||||||
|
if non_art >= 3 and art_ratio > best_article_word_ratio:
|
||||||
|
best_article_word_ratio = art_ratio
|
||||||
|
|
||||||
|
# Use the strongest signal
|
||||||
|
effective_article_score = max(
|
||||||
|
best_article_density,
|
||||||
|
best_inline_article_ratio,
|
||||||
|
best_article_word_ratio * 0.8, # slight discount for raw word ratio
|
||||||
|
)
|
||||||
|
|
||||||
|
result["signals"]["article_density"] = round(best_article_density, 3)
|
||||||
|
result["signals"]["inline_article_ratio"] = round(best_inline_article_ratio, 3)
|
||||||
|
result["signals"]["article_word_ratio"] = round(best_article_word_ratio, 3)
|
||||||
|
result["signals"]["article_col"] = best_article_col
|
||||||
|
|
||||||
|
# --- Signal 3: First-letter uniformity (weight 0.25) ---
|
||||||
|
best_uniformity = 0.0
|
||||||
|
best_uniform_col = -1
|
||||||
|
has_letter_transition = False
|
||||||
|
for geom in geometries:
|
||||||
|
texts = [
|
||||||
|
w["text"].strip().lower()
|
||||||
|
for w in sorted(geom.words, key=lambda w: w.get("top", 0))
|
||||||
|
if w.get("conf", 0) > 30 and len(w["text"].strip()) >= 2
|
||||||
|
]
|
||||||
|
if len(texts) < 5:
|
||||||
|
continue
|
||||||
|
# Count first letters
|
||||||
|
first_letters = [t[0] for t in texts if t[0].isalpha()]
|
||||||
|
if not first_letters:
|
||||||
|
continue
|
||||||
|
from collections import Counter
|
||||||
|
letter_counts = Counter(first_letters)
|
||||||
|
most_common_letter, most_common_count = letter_counts.most_common(1)[0]
|
||||||
|
uniformity = most_common_count / len(first_letters)
|
||||||
|
|
||||||
|
# Check for orderly letter transitions (A→B or Y→Z)
|
||||||
|
# Group consecutive words by first letter, check if groups are in order
|
||||||
|
groups = []
|
||||||
|
current_letter = first_letters[0]
|
||||||
|
for fl in first_letters:
|
||||||
|
if fl != current_letter:
|
||||||
|
groups.append(current_letter)
|
||||||
|
current_letter = fl
|
||||||
|
groups.append(current_letter)
|
||||||
|
if len(groups) >= 2 and len(groups) <= 5:
|
||||||
|
# Check if groups are alphabetically ordered
|
||||||
|
if all(groups[i] <= groups[i + 1] for i in range(len(groups) - 1)):
|
||||||
|
has_letter_transition = True
|
||||||
|
# Boost uniformity for orderly transitions
|
||||||
|
uniformity = max(uniformity, 0.70)
|
||||||
|
|
||||||
|
if uniformity > best_uniformity:
|
||||||
|
best_uniformity = uniformity
|
||||||
|
best_uniform_col = geom.index
|
||||||
|
|
||||||
|
result["signals"]["first_letter_uniformity"] = round(best_uniformity, 3)
|
||||||
|
result["signals"]["uniform_col"] = best_uniform_col
|
||||||
|
result["signals"]["has_letter_transition"] = has_letter_transition
|
||||||
|
|
||||||
|
# --- Signal 4: Decorative margin strip (weight 0.15) ---
|
||||||
|
result["signals"]["margin_strip_detected"] = margin_strip_detected
|
||||||
|
|
||||||
|
# --- Combine signals ---
|
||||||
|
s1 = min(best_alpha_score, 1.0) * 0.35
|
||||||
|
s2 = min(effective_article_score, 1.0) * 0.25
|
||||||
|
s3 = min(best_uniformity, 1.0) * 0.25
|
||||||
|
s4 = (1.0 if margin_strip_detected else 0.0) * 0.15
|
||||||
|
|
||||||
|
combined = s1 + s2 + s3 + s4
|
||||||
|
|
||||||
|
# Boost if user set document_category to 'woerterbuch'
|
||||||
|
if document_category == "woerterbuch":
|
||||||
|
combined = min(1.0, combined + 0.20)
|
||||||
|
result["signals"]["category_boost"] = True
|
||||||
|
|
||||||
|
result["confidence"] = round(combined, 3)
|
||||||
|
|
||||||
|
# Threshold: combined >= 0.40 to classify as dictionary
|
||||||
|
# (at least 2 strong signals or 3 moderate ones)
|
||||||
|
if combined >= 0.40:
|
||||||
|
result["is_dictionary"] = True
|
||||||
|
# Identify headword column: best alphabetical OR best uniform
|
||||||
|
if best_alpha_col >= 0 and best_alpha_score >= 0.60:
|
||||||
|
result["headword_col_index"] = best_alpha_col
|
||||||
|
elif best_uniform_col >= 0 and best_uniformity >= 0.50:
|
||||||
|
result["headword_col_index"] = best_uniform_col
|
||||||
|
if best_article_col >= 0 and best_article_density >= 0.30:
|
||||||
|
result["article_col_index"] = best_article_col
|
||||||
|
# If inline articles are strong but no dedicated column, note it
|
||||||
|
if best_inline_article_ratio >= 0.30 and result["article_col_index"] is None:
|
||||||
|
result["signals"]["inline_articles_detected"] = True
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"DictionaryDetection: combined=%.3f is_dict=%s signals=%s",
|
||||||
|
combined, result["is_dictionary"], result["signals"],
|
||||||
|
)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _classify_dictionary_columns(
|
||||||
|
geometries: List[ColumnGeometry],
|
||||||
|
dict_signals: Dict[str, Any],
|
||||||
|
lang_scores: List[Dict[str, float]],
|
||||||
|
content_h: int,
|
||||||
|
) -> Optional[List[PageRegion]]:
|
||||||
|
"""Classify columns for a detected dictionary page.
|
||||||
|
|
||||||
|
Assigns column_headword, column_article, column_ipa, and
|
||||||
|
column_de/column_en based on dictionary signals and language scores.
|
||||||
|
|
||||||
|
Returns None if classification fails.
|
||||||
|
"""
|
||||||
|
if not dict_signals.get("is_dictionary"):
|
||||||
|
return None
|
||||||
|
|
||||||
|
regions: List[PageRegion] = []
|
||||||
|
assigned = set()
|
||||||
|
article_idx = dict_signals.get("article_col_index")
|
||||||
|
headword_idx = dict_signals.get("headword_col_index")
|
||||||
|
|
||||||
|
# 1. Assign article column if detected
|
||||||
|
if article_idx is not None:
|
||||||
|
for geom in geometries:
|
||||||
|
if geom.index == article_idx:
|
||||||
|
regions.append(PageRegion(
|
||||||
|
type="column_article",
|
||||||
|
x=geom.x, y=geom.y,
|
||||||
|
width=geom.width, height=content_h,
|
||||||
|
classification_confidence=round(
|
||||||
|
dict_signals["signals"].get("article_density", 0.5), 2),
|
||||||
|
classification_method="dictionary",
|
||||||
|
))
|
||||||
|
assigned.add(geom.index)
|
||||||
|
break
|
||||||
|
|
||||||
|
# 2. Assign headword column
|
||||||
|
if headword_idx is not None and headword_idx not in assigned:
|
||||||
|
for geom in geometries:
|
||||||
|
if geom.index == headword_idx:
|
||||||
|
regions.append(PageRegion(
|
||||||
|
type="column_headword",
|
||||||
|
x=geom.x, y=geom.y,
|
||||||
|
width=geom.width, height=content_h,
|
||||||
|
classification_confidence=round(
|
||||||
|
dict_signals["confidence"], 2),
|
||||||
|
classification_method="dictionary",
|
||||||
|
))
|
||||||
|
assigned.add(geom.index)
|
||||||
|
break
|
||||||
|
|
||||||
|
# 3. Assign remaining columns by language + content
|
||||||
|
remaining = [g for g in geometries if g.index not in assigned]
|
||||||
|
for geom in remaining:
|
||||||
|
ls = lang_scores[geom.index] if geom.index < len(lang_scores) else {"eng": 0, "deu": 0}
|
||||||
|
|
||||||
|
# Check if column contains IPA (brackets like [, /, ˈ)
|
||||||
|
ipa_chars = sum(
|
||||||
|
1 for w in geom.words
|
||||||
|
if any(c in (w.get("text") or "") for c in "[]/ˈˌːɪəɒʊæɑɔ")
|
||||||
|
)
|
||||||
|
ipa_ratio = ipa_chars / max(len(geom.words), 1)
|
||||||
|
|
||||||
|
if ipa_ratio > 0.25:
|
||||||
|
col_type = "column_ipa"
|
||||||
|
conf = round(min(1.0, ipa_ratio), 2)
|
||||||
|
elif ls["deu"] > ls["eng"] and ls["deu"] > 0.05:
|
||||||
|
col_type = "column_de"
|
||||||
|
conf = round(ls["deu"], 2)
|
||||||
|
elif ls["eng"] > ls["deu"] and ls["eng"] > 0.05:
|
||||||
|
col_type = "column_en"
|
||||||
|
conf = round(ls["eng"], 2)
|
||||||
|
else:
|
||||||
|
# Positional fallback: leftmost unassigned = EN, next = DE
|
||||||
|
left_unassigned = sorted(
|
||||||
|
[g for g in remaining if g.index not in assigned],
|
||||||
|
key=lambda g: g.x,
|
||||||
|
)
|
||||||
|
if geom == left_unassigned[0] if left_unassigned else None:
|
||||||
|
col_type = "column_en"
|
||||||
|
else:
|
||||||
|
col_type = "column_de"
|
||||||
|
conf = 0.4
|
||||||
|
|
||||||
|
regions.append(PageRegion(
|
||||||
|
type=col_type,
|
||||||
|
x=geom.x, y=geom.y,
|
||||||
|
width=geom.width, height=content_h,
|
||||||
|
classification_confidence=conf,
|
||||||
|
classification_method="dictionary",
|
||||||
|
))
|
||||||
|
assigned.add(geom.index)
|
||||||
|
|
||||||
|
regions.sort(key=lambda r: r.x)
|
||||||
|
return regions
|
||||||
|
|
||||||
|
|
||||||
def _build_margin_regions(
|
def _build_margin_regions(
|
||||||
all_regions: List[PageRegion],
|
all_regions: List[PageRegion],
|
||||||
left_x: int,
|
left_x: int,
|
||||||
@@ -2418,9 +2736,12 @@ def classify_column_types(geometries: List[ColumnGeometry],
|
|||||||
bottom_y: int,
|
bottom_y: int,
|
||||||
left_x: int = 0,
|
left_x: int = 0,
|
||||||
right_x: int = 0,
|
right_x: int = 0,
|
||||||
inv: Optional[np.ndarray] = None) -> List[PageRegion]:
|
inv: Optional[np.ndarray] = None,
|
||||||
|
document_category: Optional[str] = None,
|
||||||
|
margin_strip_detected: bool = False) -> List[PageRegion]:
|
||||||
"""Classify column types using a 3-level fallback chain.
|
"""Classify column types using a 3-level fallback chain.
|
||||||
|
|
||||||
|
Level 0: Dictionary detection (if signals are strong enough)
|
||||||
Level 1: Content-based (language + role scoring)
|
Level 1: Content-based (language + role scoring)
|
||||||
Level 2: Position + language (old rules enhanced with language detection)
|
Level 2: Position + language (old rules enhanced with language detection)
|
||||||
Level 3: Pure position (exact old code, no regression)
|
Level 3: Pure position (exact old code, no regression)
|
||||||
@@ -2434,6 +2755,8 @@ def classify_column_types(geometries: List[ColumnGeometry],
|
|||||||
bottom_y: Bottom Y of content area.
|
bottom_y: Bottom Y of content area.
|
||||||
left_x: Left content bound (from _find_content_bounds).
|
left_x: Left content bound (from _find_content_bounds).
|
||||||
right_x: Right content bound (from _find_content_bounds).
|
right_x: Right content bound (from _find_content_bounds).
|
||||||
|
document_category: User-selected category (e.g. 'woerterbuch').
|
||||||
|
margin_strip_detected: Whether a decorative A-Z margin strip was found.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of PageRegion with types, confidence, and method.
|
List of PageRegion with types, confidence, and method.
|
||||||
@@ -2499,6 +2822,22 @@ def classify_column_types(geometries: List[ColumnGeometry],
|
|||||||
logger.info(f"ClassifyColumns: role scores: "
|
logger.info(f"ClassifyColumns: role scores: "
|
||||||
f"{[(g.index, rs) for g, rs in zip(geometries, role_scores)]}")
|
f"{[(g.index, rs) for g, rs in zip(geometries, role_scores)]}")
|
||||||
|
|
||||||
|
# --- Level 0: Dictionary detection ---
|
||||||
|
dict_signals = _score_dictionary_signals(
|
||||||
|
geometries,
|
||||||
|
document_category=document_category,
|
||||||
|
margin_strip_detected=margin_strip_detected,
|
||||||
|
)
|
||||||
|
if dict_signals["is_dictionary"]:
|
||||||
|
regions = _classify_dictionary_columns(
|
||||||
|
geometries, dict_signals, lang_scores, content_h,
|
||||||
|
)
|
||||||
|
if regions is not None:
|
||||||
|
logger.info("ClassifyColumns: Level 0 (dictionary) succeeded, confidence=%.3f",
|
||||||
|
dict_signals["confidence"])
|
||||||
|
_add_header_footer(regions, top_y, bottom_y, img_w, img_h, inv=inv)
|
||||||
|
return _with_margins(ignore_regions + regions)
|
||||||
|
|
||||||
# --- Level 1: Content-based classification ---
|
# --- Level 1: Content-based classification ---
|
||||||
regions = _classify_by_content(geometries, lang_scores, role_scores, content_w, content_h)
|
regions = _classify_by_content(geometries, lang_scores, role_scores, content_w, content_h)
|
||||||
if regions is not None:
|
if regions is not None:
|
||||||
|
|||||||
@@ -85,7 +85,7 @@ ENGLISH_FUNCTION_WORDS = {'the', 'a', 'an', 'is', 'are', 'was', 'were', 'to', 'o
|
|||||||
@dataclass
|
@dataclass
|
||||||
class PageRegion:
|
class PageRegion:
|
||||||
"""A detected region on the page."""
|
"""A detected region on the page."""
|
||||||
type: str # 'column_en', 'column_de', 'column_example', 'page_ref', 'column_marker', 'column_text', 'header', 'footer', 'margin_top', 'margin_bottom'
|
type: str # 'column_en', 'column_de', 'column_example', 'page_ref', 'column_marker', 'column_text', 'header', 'footer', 'margin_top', 'margin_bottom', 'column_headword', 'column_article', 'column_ipa'
|
||||||
x: int
|
x: int
|
||||||
y: int
|
y: int
|
||||||
width: int
|
width: int
|
||||||
|
|||||||
@@ -1201,7 +1201,7 @@ def _filter_decorative_margin(
|
|||||||
img_w: int,
|
img_w: int,
|
||||||
log: Any,
|
log: Any,
|
||||||
session_id: str,
|
session_id: str,
|
||||||
) -> None:
|
) -> Dict[str, Any]:
|
||||||
"""Remove words that belong to a decorative alphabet strip on a margin.
|
"""Remove words that belong to a decorative alphabet strip on a margin.
|
||||||
|
|
||||||
Some vocabulary worksheets have a vertical A–Z alphabet graphic along
|
Some vocabulary worksheets have a vertical A–Z alphabet graphic along
|
||||||
@@ -1220,9 +1220,13 @@ def _filter_decorative_margin(
|
|||||||
artifacts like "Vv" that belong to the same decorative element.
|
artifacts like "Vv" that belong to the same decorative element.
|
||||||
|
|
||||||
Modifies *words* in place.
|
Modifies *words* in place.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with 'found' (bool), 'side' (str), 'letters_detected' (int).
|
||||||
"""
|
"""
|
||||||
|
no_strip: Dict[str, Any] = {"found": False, "side": "", "letters_detected": 0}
|
||||||
if not words or img_w <= 0:
|
if not words or img_w <= 0:
|
||||||
return
|
return no_strip
|
||||||
|
|
||||||
margin_cutoff = img_w * 0.30
|
margin_cutoff = img_w * 0.30
|
||||||
# Phase 1: find candidate strips using single-char words
|
# Phase 1: find candidate strips using single-char words
|
||||||
@@ -1278,6 +1282,9 @@ def _filter_decorative_margin(
|
|||||||
"(strip x=%d-%d)",
|
"(strip x=%d-%d)",
|
||||||
session_id, removed, side, strip_x_lo, strip_x_hi,
|
session_id, removed, side, strip_x_lo, strip_x_hi,
|
||||||
)
|
)
|
||||||
|
return {"found": True, "side": side, "letters_detected": len(strip)}
|
||||||
|
|
||||||
|
return no_strip
|
||||||
|
|
||||||
|
|
||||||
def _filter_footer_words(
|
def _filter_footer_words(
|
||||||
@@ -1427,7 +1434,11 @@ async def _build_grid_core(session_id: str, session: dict) -> dict:
|
|||||||
# Some worksheets have a decorative alphabet strip along one margin
|
# Some worksheets have a decorative alphabet strip along one margin
|
||||||
# (A-Z in a graphic). OCR reads these as single-char words aligned
|
# (A-Z in a graphic). OCR reads these as single-char words aligned
|
||||||
# vertically. Detect and remove them before grid building.
|
# vertically. Detect and remove them before grid building.
|
||||||
_filter_decorative_margin(all_words, img_w, logger, session_id)
|
margin_strip_info = _filter_decorative_margin(all_words, img_w, logger, session_id)
|
||||||
|
margin_strip_detected = margin_strip_info.get("found", False)
|
||||||
|
|
||||||
|
# Read document_category from session (user-selected or auto-detected)
|
||||||
|
document_category = session.get("document_category")
|
||||||
|
|
||||||
# 2c. Filter footer rows (page numbers at the very bottom).
|
# 2c. Filter footer rows (page numbers at the very bottom).
|
||||||
# Isolated short text in the bottom 5% of the page is typically a
|
# Isolated short text in the bottom 5% of the page is typically a
|
||||||
@@ -1997,18 +2008,21 @@ async def _build_grid_core(session_id: str, session: dict) -> dict:
|
|||||||
removed_pipes, z.get("zone_index", 0),
|
removed_pipes, z.get("zone_index", 0),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Also strip pipe chars from word_box text and cell text that may remain
|
# Strip pipe chars ONLY from word_boxes/cells where the pipe is an
|
||||||
# from OCR reading syllable-separation marks (e.g. "zu|trau|en" → "zutrauen").
|
# OCR column-divider artifact. Preserve pipes that are embedded in
|
||||||
|
# words as syllable separators (e.g. "zu|trau|en") — these are
|
||||||
|
# intentional and used in dictionary Ground Truth.
|
||||||
for z in zones_data:
|
for z in zones_data:
|
||||||
for cell in z.get("cells", []):
|
for cell in z.get("cells", []):
|
||||||
for wb in cell.get("word_boxes", []):
|
for wb in cell.get("word_boxes", []):
|
||||||
wbt = wb.get("text", "")
|
wbt = wb.get("text", "")
|
||||||
if "|" in wbt:
|
# Only strip if the ENTIRE word_box is just pipe(s)
|
||||||
wb["text"] = wbt.replace("|", "")
|
# (handled by _PIPE_RE above) — leave embedded pipes alone
|
||||||
text = cell.get("text", "")
|
text = cell.get("text", "")
|
||||||
if "|" in text:
|
if "|" in text:
|
||||||
cleaned = text.replace("|", "").strip()
|
# Only strip leading/trailing pipes (OCR artifacts at cell edges)
|
||||||
if cleaned != text:
|
cleaned = text.strip("|").strip()
|
||||||
|
if cleaned != text.strip():
|
||||||
cell["text"] = cleaned
|
cell["text"] = cleaned
|
||||||
|
|
||||||
# 4e. Detect and remove page-border decoration strips.
|
# 4e. Detect and remove page-border decoration strips.
|
||||||
@@ -2668,6 +2682,63 @@ async def _build_grid_core(session_id: str, session: dict) -> dict:
|
|||||||
)
|
)
|
||||||
font_size_suggestion = max(10, int(avg_row_height * 0.6))
|
font_size_suggestion = max(10, int(avg_row_height * 0.6))
|
||||||
|
|
||||||
|
# --- Dictionary detection on assembled grid ---
|
||||||
|
# Build lightweight ColumnGeometry-like structures from zone columns for
|
||||||
|
# dictionary signal scoring.
|
||||||
|
from cv_layout import _score_dictionary_signals
|
||||||
|
dict_detection: Dict[str, Any] = {"is_dictionary": False, "confidence": 0.0}
|
||||||
|
try:
|
||||||
|
from cv_vocab_types import ColumnGeometry
|
||||||
|
for z in zones_data:
|
||||||
|
zone_cells = z.get("cells", [])
|
||||||
|
zone_cols = z.get("columns", [])
|
||||||
|
if len(zone_cols) < 2 or len(zone_cells) < 10:
|
||||||
|
continue
|
||||||
|
# Build pseudo-ColumnGeometry per column
|
||||||
|
pseudo_geoms = []
|
||||||
|
for col in zone_cols:
|
||||||
|
ci = col["index"]
|
||||||
|
col_cells = [c for c in zone_cells if c.get("col_index") == ci]
|
||||||
|
# Flatten word_boxes into word dicts compatible with _score_language
|
||||||
|
col_words = []
|
||||||
|
for cell in col_cells:
|
||||||
|
for wb in cell.get("word_boxes") or []:
|
||||||
|
col_words.append({
|
||||||
|
"text": wb.get("text", ""),
|
||||||
|
"conf": wb.get("conf", 0),
|
||||||
|
"top": wb.get("top", 0),
|
||||||
|
"left": wb.get("left", 0),
|
||||||
|
"height": wb.get("height", 0),
|
||||||
|
"width": wb.get("width", 0),
|
||||||
|
})
|
||||||
|
# Fallback: use cell text if no word_boxes
|
||||||
|
if not cell.get("word_boxes") and cell.get("text"):
|
||||||
|
col_words.append({
|
||||||
|
"text": cell["text"],
|
||||||
|
"conf": cell.get("confidence", 50),
|
||||||
|
"top": cell.get("bbox_px", {}).get("y", 0),
|
||||||
|
"left": cell.get("bbox_px", {}).get("x", 0),
|
||||||
|
"height": cell.get("bbox_px", {}).get("h", 20),
|
||||||
|
"width": cell.get("bbox_px", {}).get("w", 50),
|
||||||
|
})
|
||||||
|
col_w = col.get("x_max_px", 0) - col.get("x_min_px", 0)
|
||||||
|
pseudo_geoms.append(ColumnGeometry(
|
||||||
|
index=ci, x=col.get("x_min_px", 0), y=0,
|
||||||
|
width=max(col_w, 1), height=img_h,
|
||||||
|
word_count=len(col_words), words=col_words,
|
||||||
|
width_ratio=col_w / max(img_w, 1),
|
||||||
|
))
|
||||||
|
if len(pseudo_geoms) >= 2:
|
||||||
|
dd = _score_dictionary_signals(
|
||||||
|
pseudo_geoms,
|
||||||
|
document_category=document_category,
|
||||||
|
margin_strip_detected=margin_strip_detected,
|
||||||
|
)
|
||||||
|
if dd["confidence"] > dict_detection["confidence"]:
|
||||||
|
dict_detection = dd
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Dictionary detection failed: %s", e)
|
||||||
|
|
||||||
result = {
|
result = {
|
||||||
"session_id": session_id,
|
"session_id": session_id,
|
||||||
"image_width": img_w,
|
"image_width": img_w,
|
||||||
@@ -2693,6 +2764,13 @@ async def _build_grid_core(session_id: str, session: dict) -> dict:
|
|||||||
"avg_row_height_px": round(avg_row_height, 1),
|
"avg_row_height_px": round(avg_row_height, 1),
|
||||||
"font_size_suggestion_px": font_size_suggestion,
|
"font_size_suggestion_px": font_size_suggestion,
|
||||||
},
|
},
|
||||||
|
"dictionary_detection": {
|
||||||
|
"is_dictionary": dict_detection.get("is_dictionary", False),
|
||||||
|
"confidence": dict_detection.get("confidence", 0.0),
|
||||||
|
"signals": dict_detection.get("signals", {}),
|
||||||
|
"article_col_index": dict_detection.get("article_col_index"),
|
||||||
|
"headword_col_index": dict_detection.get("headword_col_index"),
|
||||||
|
},
|
||||||
"duration_seconds": round(duration, 2),
|
"duration_seconds": round(duration, 2),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2722,8 +2800,8 @@ async def build_grid(session_id: str):
|
|||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
raise HTTPException(status_code=400, detail=str(e))
|
raise HTTPException(status_code=400, detail=str(e))
|
||||||
|
|
||||||
# Persist to DB
|
# Persist to DB and advance current_step to 11 (reconstruction complete)
|
||||||
await update_session_db(session_id, grid_editor_result=result)
|
await update_session_db(session_id, grid_editor_result=result, current_step=11)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"build-grid session %s: %d zones, %d cols, %d rows, %d cells, "
|
"build-grid session %s: %d zones, %d cols, %d rows, %d cells, "
|
||||||
@@ -2772,7 +2850,7 @@ async def save_grid(session_id: str, request: Request):
|
|||||||
"edited": True,
|
"edited": True,
|
||||||
}
|
}
|
||||||
|
|
||||||
await update_session_db(session_id, grid_editor_result=result)
|
await update_session_db(session_id, grid_editor_result=result, current_step=11)
|
||||||
|
|
||||||
logger.info("save-grid session %s: %d zones saved", session_id, len(body["zones"]))
|
logger.info("save-grid session %s: %d zones saved", session_id, len(body["zones"]))
|
||||||
|
|
||||||
|
|||||||
@@ -256,7 +256,7 @@ async def mark_ground_truth(
|
|||||||
# Merge into existing ground_truth JSONB
|
# Merge into existing ground_truth JSONB
|
||||||
gt = session.get("ground_truth") or {}
|
gt = session.get("ground_truth") or {}
|
||||||
gt["build_grid_reference"] = reference
|
gt["build_grid_reference"] = reference
|
||||||
await update_session_db(session_id, ground_truth=gt)
|
await update_session_db(session_id, ground_truth=gt, current_step=11)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"Ground truth marked for session %s: %d cells",
|
"Ground truth marked for session %s: %d cells",
|
||||||
|
|||||||
@@ -178,6 +178,18 @@ async def get_session_info(session_id: str):
|
|||||||
result["word_result"] = session["word_result"]
|
result["word_result"] = session["word_result"]
|
||||||
if session.get("doc_type_result"):
|
if session.get("doc_type_result"):
|
||||||
result["doc_type_result"] = session["doc_type_result"]
|
result["doc_type_result"] = session["doc_type_result"]
|
||||||
|
if session.get("structure_result"):
|
||||||
|
result["structure_result"] = session["structure_result"]
|
||||||
|
if session.get("grid_editor_result"):
|
||||||
|
# Include summary only to keep response small
|
||||||
|
gr = session["grid_editor_result"]
|
||||||
|
result["grid_editor_result"] = {
|
||||||
|
"summary": gr.get("summary", {}),
|
||||||
|
"zones_count": len(gr.get("zones", [])),
|
||||||
|
"edited": gr.get("edited", False),
|
||||||
|
}
|
||||||
|
if session.get("ground_truth"):
|
||||||
|
result["ground_truth"] = session["ground_truth"]
|
||||||
|
|
||||||
# Sub-session info
|
# Sub-session info
|
||||||
if session.get("parent_session_id"):
|
if session.get("parent_session_id"):
|
||||||
|
|||||||
Reference in New Issue
Block a user