Unified Grid: merge all zones into single Excel-like grid
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 32s
CI / test-go-edu-search (push) Successful in 45s
CI / test-python-klausur (push) Failing after 2m35s
CI / test-python-agent-core (push) Successful in 31s
CI / test-nodejs-website (push) Successful in 33s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 32s
CI / test-go-edu-search (push) Successful in 45s
CI / test-python-klausur (push) Failing after 2m35s
CI / test-python-agent-core (push) Successful in 31s
CI / test-nodejs-website (push) Successful in 33s
Backend (unified_grid.py):
- build_unified_grid(): merges content + box zones into one zone
- Dominant row height from median of content row spacings
- Full-width boxes: rows integrated directly
- Partial-width boxes: extra rows inserted when box has more text
lines than standard rows fit (e.g., 7 lines in 5-row height)
- Box-origin cells tagged with source_zone_type + box_region metadata
Backend (grid_editor_api.py):
- POST /sessions/{id}/build-unified-grid → persists as unified_grid_result
- GET /sessions/{id}/unified-grid → retrieve persisted result
Frontend:
- GridEditorCell: added source_zone_type, box_region fields
- GridTable: box-origin cells get tinted background + left border
- StepAnsicht: split-view with original image (left) + editable
unified GridTable (right). Auto-builds on first load.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -488,7 +488,13 @@ export function GridTable({
|
|||||||
} ${isMultiSelected ? 'bg-teal-50/60 dark:bg-teal-900/20' : ''} ${
|
} ${isMultiSelected ? 'bg-teal-50/60 dark:bg-teal-900/20' : ''} ${
|
||||||
isLowConf && !isMultiSelected ? 'bg-amber-50/50 dark:bg-amber-900/10' : ''
|
isLowConf && !isMultiSelected ? 'bg-amber-50/50 dark:bg-amber-900/10' : ''
|
||||||
} ${row.is_header && !isMultiSelected ? 'bg-blue-50/50 dark:bg-blue-900/10' : ''}`}
|
} ${row.is_header && !isMultiSelected ? 'bg-blue-50/50 dark:bg-blue-900/10' : ''}`}
|
||||||
style={{ height: `${rowH}px` }}
|
style={{
|
||||||
|
height: `${rowH}px`,
|
||||||
|
...(cell?.box_region?.bg_hex ? {
|
||||||
|
backgroundColor: `${cell.box_region.bg_hex}12`,
|
||||||
|
borderLeft: cell.box_region.border ? `3px solid ${cell.box_region.bg_hex}60` : undefined,
|
||||||
|
} : {}),
|
||||||
|
}}
|
||||||
onContextMenu={(e) => {
|
onContextMenu={(e) => {
|
||||||
if (onSetCellColor) {
|
if (onSetCellColor) {
|
||||||
e.preventDefault()
|
e.preventDefault()
|
||||||
|
|||||||
@@ -128,6 +128,14 @@ export interface GridEditorCell {
|
|||||||
color_override?: string | null
|
color_override?: string | null
|
||||||
/** Number of columns this cell spans (merged cell). Default 1. */
|
/** Number of columns this cell spans (merged cell). Default 1. */
|
||||||
colspan?: number
|
colspan?: number
|
||||||
|
/** Source zone type when in unified grid. */
|
||||||
|
source_zone_type?: 'content' | 'box'
|
||||||
|
/** Box visual metadata for cells from box zones. */
|
||||||
|
box_region?: {
|
||||||
|
bg_hex?: string
|
||||||
|
bg_color?: string
|
||||||
|
border?: boolean
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Layout dividers for the visual column/margin editor on the original image. */
|
/** Layout dividers for the visual column/margin editor on the original image. */
|
||||||
|
|||||||
@@ -1,20 +1,16 @@
|
|||||||
'use client'
|
'use client'
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* StepAnsicht — Split-view page layout comparison.
|
* StepAnsicht — Unified Grid View.
|
||||||
*
|
*
|
||||||
* Left: Original scan with OCR word overlay
|
* Left: Original scan with OCR word overlay
|
||||||
* Right: Reconstructed layout with averaged row heights per section
|
* Right: Unified grid (single zone, boxes integrated) rendered via GridTable
|
||||||
*
|
|
||||||
* Layout principle: the page is divided into vertical sections separated
|
|
||||||
* by boxes. Each section gets a uniform row height calculated from
|
|
||||||
* (last_row_y - first_row_y) / (num_rows - 1). Boxes are rendered
|
|
||||||
* inline between sections (not as floating overlays).
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { useEffect, useMemo, useRef, useState } from 'react'
|
import { useCallback, useEffect, useRef, useState } from 'react'
|
||||||
import { useGridEditor } from '@/components/grid-editor/useGridEditor'
|
import { useGridEditor } from '@/components/grid-editor/useGridEditor'
|
||||||
import type { GridZone, GridEditorCell, GridRow } from '@/components/grid-editor/types'
|
import { GridTable } from '@/components/grid-editor/GridTable'
|
||||||
|
import type { GridZone } from '@/components/grid-editor/types'
|
||||||
|
|
||||||
const KLAUSUR_API = '/klausur-api'
|
const KLAUSUR_API = '/klausur-api'
|
||||||
|
|
||||||
@@ -23,491 +19,172 @@ interface StepAnsichtProps {
|
|||||||
onNext: () => void
|
onNext: () => void
|
||||||
}
|
}
|
||||||
|
|
||||||
/** A vertical section of the page: either content rows or a box zone. */
|
|
||||||
interface PageSection {
|
|
||||||
type: 'content' | 'box'
|
|
||||||
yStart: number // pixel y in original image
|
|
||||||
yEnd: number // pixel y end
|
|
||||||
zone?: GridZone // for box sections
|
|
||||||
rows?: GridRow[] // for content sections — subset of content zone rows
|
|
||||||
avgRowH: number // averaged row height in original pixels
|
|
||||||
}
|
|
||||||
|
|
||||||
function getCellColor(cell: GridEditorCell | undefined): string | null {
|
|
||||||
if (!cell) return null
|
|
||||||
if (cell.color_override) return cell.color_override
|
|
||||||
const colored = cell.word_boxes?.find((wb) => wb.color_name && wb.color_name !== 'black')
|
|
||||||
return colored?.color ?? null
|
|
||||||
}
|
|
||||||
|
|
||||||
export function StepAnsicht({ sessionId, onNext }: StepAnsichtProps) {
|
export function StepAnsicht({ sessionId, onNext }: StepAnsichtProps) {
|
||||||
const { grid, loading, error, loadGrid } = useGridEditor(sessionId)
|
const gridEditor = useGridEditor(sessionId)
|
||||||
|
const {
|
||||||
|
loading, error, selectedCell, setSelectedCell,
|
||||||
|
updateCellText, toggleColumnBold, toggleRowHeader,
|
||||||
|
getAdjacentCell, deleteColumn, addColumn, deleteRow, addRow,
|
||||||
|
commitUndoPoint, selectedCells, toggleCellSelection,
|
||||||
|
clearCellSelection, toggleSelectedBold, setCellColor,
|
||||||
|
saveGrid, saving, dirty, undo, redo, canUndo, canRedo,
|
||||||
|
} = gridEditor
|
||||||
|
|
||||||
|
const [unifiedGrid, setUnifiedGrid] = useState<any>(null)
|
||||||
|
const [building, setBuilding] = useState(false)
|
||||||
|
const [buildError, setBuildError] = useState<string | null>(null)
|
||||||
const leftRef = useRef<HTMLDivElement>(null)
|
const leftRef = useRef<HTMLDivElement>(null)
|
||||||
const [panelWidth, setPanelWidth] = useState(0)
|
const [leftHeight, setLeftHeight] = useState(600)
|
||||||
const [showGrid, setShowGrid] = useState(true)
|
const [showGrid, setShowGrid] = useState(false)
|
||||||
const [gridSpacing, setGridSpacing] = useState(100)
|
|
||||||
|
|
||||||
|
// Build unified grid
|
||||||
|
const buildUnified = useCallback(async () => {
|
||||||
|
if (!sessionId) return
|
||||||
|
setBuilding(true)
|
||||||
|
setBuildError(null)
|
||||||
|
try {
|
||||||
|
const res = await fetch(
|
||||||
|
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/build-unified-grid`,
|
||||||
|
{ method: 'POST' },
|
||||||
|
)
|
||||||
|
if (!res.ok) {
|
||||||
|
const d = await res.json().catch(() => ({}))
|
||||||
|
throw new Error(d.detail || `HTTP ${res.status}`)
|
||||||
|
}
|
||||||
|
const data = await res.json()
|
||||||
|
setUnifiedGrid(data)
|
||||||
|
} catch (e) {
|
||||||
|
setBuildError(e instanceof Error ? e.message : String(e))
|
||||||
|
} finally {
|
||||||
|
setBuilding(false)
|
||||||
|
}
|
||||||
|
}, [sessionId])
|
||||||
|
|
||||||
|
// Load unified grid on mount (or build if missing)
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (sessionId) loadGrid()
|
if (!sessionId) return
|
||||||
|
;(async () => {
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/unified-grid`)
|
||||||
|
if (res.ok) {
|
||||||
|
setUnifiedGrid(await res.json())
|
||||||
|
} else {
|
||||||
|
// Not built yet — build it
|
||||||
|
buildUnified()
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
buildUnified()
|
||||||
|
}
|
||||||
|
})()
|
||||||
}, [sessionId]) // eslint-disable-line react-hooks/exhaustive-deps
|
}, [sessionId]) // eslint-disable-line react-hooks/exhaustive-deps
|
||||||
|
|
||||||
|
// Track left panel height for sync
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (!leftRef.current) return
|
if (!leftRef.current) return
|
||||||
const ro = new ResizeObserver(([entry]) => setPanelWidth(entry.contentRect.width))
|
const ro = new ResizeObserver(([e]) => setLeftHeight(e.contentRect.height))
|
||||||
ro.observe(leftRef.current)
|
ro.observe(leftRef.current)
|
||||||
return () => ro.disconnect()
|
return () => ro.disconnect()
|
||||||
}, [])
|
}, [])
|
||||||
|
|
||||||
// Build page sections: split content rows around box zones
|
const unifiedZone: GridZone | null = unifiedGrid?.zones?.[0] ?? null
|
||||||
const sections = useMemo(() => {
|
|
||||||
if (!grid) return []
|
|
||||||
const contentZone = grid.zones.find((z) => z.zone_type === 'content')
|
|
||||||
const boxZones = grid.zones.filter((z) => z.zone_type === 'box')
|
|
||||||
.sort((a, b) => a.bbox_px.y - b.bbox_px.y)
|
|
||||||
|
|
||||||
if (!contentZone) return []
|
if (loading || building) {
|
||||||
|
|
||||||
const allRows = contentZone.rows
|
|
||||||
const result: PageSection[] = []
|
|
||||||
|
|
||||||
// Box boundaries sorted by y
|
|
||||||
const boxBounds = boxZones.map((bz) => ({
|
|
||||||
zone: bz,
|
|
||||||
yStart: bz.bbox_px.y,
|
|
||||||
yEnd: bz.bbox_px.y + bz.bbox_px.h,
|
|
||||||
}))
|
|
||||||
|
|
||||||
// Split content rows into sections around boxes
|
|
||||||
let currentRows: GridRow[] = []
|
|
||||||
let boxIdx = 0
|
|
||||||
|
|
||||||
for (const row of allRows) {
|
|
||||||
const ry = row.y_min_px ?? (row as any).y_min ?? 0
|
|
||||||
|
|
||||||
// Check if we've passed a box boundary — insert box section
|
|
||||||
while (boxIdx < boxBounds.length && ry >= boxBounds[boxIdx].yStart) {
|
|
||||||
// Flush current content section
|
|
||||||
if (currentRows.length > 0) {
|
|
||||||
result.push(makeContentSection(currentRows))
|
|
||||||
currentRows = []
|
|
||||||
}
|
|
||||||
// Insert box section
|
|
||||||
const bb = boxBounds[boxIdx]
|
|
||||||
const bRows = bb.zone.rows || []
|
|
||||||
let bAvgH = 35
|
|
||||||
if (bRows.length >= 2) {
|
|
||||||
const bys = bRows.map((r) => r.y_min_px ?? (r as any).y_min ?? 0)
|
|
||||||
bAvgH = (bys[bys.length - 1] - bys[0]) / (bRows.length - 1)
|
|
||||||
}
|
|
||||||
result.push({
|
|
||||||
type: 'box',
|
|
||||||
yStart: bb.yStart,
|
|
||||||
yEnd: bb.yEnd,
|
|
||||||
zone: bb.zone,
|
|
||||||
avgRowH: bAvgH,
|
|
||||||
})
|
|
||||||
boxIdx++
|
|
||||||
}
|
|
||||||
|
|
||||||
// Skip rows only if they fall FULLY inside a box (both Y and X overlap).
|
|
||||||
// Small boxes (e.g. on the right half) don't cover left-side content rows.
|
|
||||||
const rowCells = contentZone!.cells.filter((c) => c.row_index === row.index)
|
|
||||||
const rowXMin = rowCells.length > 0
|
|
||||||
? Math.min(...rowCells.map((c) => c.bbox_px?.x ?? contentZone!.bbox_px.x))
|
|
||||||
: contentZone!.bbox_px.x
|
|
||||||
const insideBox = boxBounds.some((bb) => {
|
|
||||||
if (ry < bb.yStart || ry > bb.yEnd) return false
|
|
||||||
// Check horizontal overlap: row must be mostly inside box x-range
|
|
||||||
const boxXMin = bb.zone.bbox_px.x
|
|
||||||
const boxXMax = boxXMin + bb.zone.bbox_px.w
|
|
||||||
return rowXMin >= boxXMin - 20 && rowXMin <= boxXMax
|
|
||||||
})
|
|
||||||
if (!insideBox) {
|
|
||||||
currentRows.push(row)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Flush remaining content rows
|
|
||||||
if (currentRows.length > 0) {
|
|
||||||
result.push(makeContentSection(currentRows))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Insert remaining boxes (if any rows didn't trigger them)
|
|
||||||
while (boxIdx < boxBounds.length) {
|
|
||||||
const bb = boxBounds[boxIdx]
|
|
||||||
const bRows = bb.zone.rows || []
|
|
||||||
let bAvgH = 35
|
|
||||||
if (bRows.length >= 2) {
|
|
||||||
const bys = bRows.map((r) => r.y_min_px ?? (r as any).y_min ?? 0)
|
|
||||||
bAvgH = (bys[bys.length - 1] - bys[0]) / (bRows.length - 1)
|
|
||||||
}
|
|
||||||
result.push({ type: 'box', yStart: bb.yStart, yEnd: bb.yEnd, zone: bb.zone, avgRowH: bAvgH })
|
|
||||||
boxIdx++
|
|
||||||
}
|
|
||||||
|
|
||||||
return result
|
|
||||||
}, [grid])
|
|
||||||
|
|
||||||
if (loading) {
|
|
||||||
return (
|
return (
|
||||||
<div className="flex items-center justify-center py-16">
|
<div className="flex items-center justify-center py-16">
|
||||||
<div className="w-8 h-8 border-4 border-teal-500 border-t-transparent rounded-full animate-spin" />
|
<div className="w-8 h-8 border-4 border-teal-500 border-t-transparent rounded-full animate-spin" />
|
||||||
<span className="ml-3 text-gray-500">Lade Vorschau...</span>
|
<span className="ml-3 text-gray-500">{building ? 'Baue Unified Grid...' : 'Lade...'}</span>
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (error || !grid) {
|
|
||||||
return (
|
|
||||||
<div className="p-8 text-center">
|
|
||||||
<p className="text-red-500 mb-4">{error || 'Keine Grid-Daten.'}</p>
|
|
||||||
<button onClick={onNext} className="px-5 py-2 bg-teal-600 text-white rounded-lg">Weiter →</button>
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
const imgW = grid.image_width || 1
|
|
||||||
const imgH = grid.image_height || 1
|
|
||||||
const scale = panelWidth > 0 ? panelWidth / imgW : 0.5
|
|
||||||
const panelHeight = imgH * scale
|
|
||||||
const contentZone = grid.zones.find((z) => z.zone_type === 'content')
|
|
||||||
|
|
||||||
// Dominant row height: median of row-to-row spacings (excluding box-gap jumps)
|
|
||||||
const dominantRowH = useMemo(() => {
|
|
||||||
const cz = grid.zones.find((z) => z.zone_type === 'content')
|
|
||||||
if (!cz || cz.rows.length < 2) return 47
|
|
||||||
const spacings: number[] = []
|
|
||||||
for (let i = 0; i < cz.rows.length - 1; i++) {
|
|
||||||
const y1 = cz.rows[i].y_min_px ?? (cz.rows[i] as any).y_min ?? 0
|
|
||||||
const y2 = cz.rows[i + 1].y_min_px ?? (cz.rows[i + 1] as any).y_min ?? 0
|
|
||||||
const d = y2 - y1
|
|
||||||
if (d > 0 && d < 100) spacings.push(d)
|
|
||||||
}
|
|
||||||
if (spacings.length === 0) return 47
|
|
||||||
spacings.sort((a, b) => a - b)
|
|
||||||
return spacings[Math.floor(spacings.length / 2)]
|
|
||||||
}, [grid])
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="space-y-3">
|
<div className="space-y-3">
|
||||||
{/* Header */}
|
{/* Header */}
|
||||||
<div className="flex items-center justify-between">
|
<div className="flex items-center justify-between">
|
||||||
<div>
|
<div>
|
||||||
<h3 className="text-lg font-semibold text-gray-900 dark:text-white">Ansicht — Original vs. Rekonstruktion</h3>
|
<h3 className="text-lg font-semibold text-gray-900 dark:text-white">Ansicht — Unified Grid</h3>
|
||||||
<p className="text-sm text-gray-500 dark:text-gray-400">
|
<p className="text-sm text-gray-500 dark:text-gray-400">
|
||||||
Links: Original mit OCR. Rechts: Rekonstruktion mit gemittelten Zeilenhöhen.
|
Alle Inhalte in einem Grid. Boxen sind integriert (farbig markiert).
|
||||||
|
{unifiedGrid && (
|
||||||
|
<span className="ml-2 font-mono text-xs">
|
||||||
|
{unifiedGrid.summary?.total_rows} Zeilen × {unifiedGrid.summary?.total_columns} Spalten
|
||||||
|
{unifiedGrid.dominant_row_h && ` · Zeilenhöhe: ${Math.round(unifiedGrid.dominant_row_h)}px`}
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
<div className="flex items-center gap-2">
|
<div className="flex items-center gap-2">
|
||||||
<label className="flex items-center gap-1.5 text-xs text-gray-500">
|
<button
|
||||||
<input type="checkbox" checked={showGrid} onChange={(e) => setShowGrid(e.target.checked)} className="w-3.5 h-3.5 rounded" />
|
onClick={buildUnified}
|
||||||
Gitter
|
disabled={building}
|
||||||
</label>
|
className="px-3 py-1.5 bg-amber-600 text-white rounded-lg hover:bg-amber-700 text-xs font-medium disabled:opacity-50"
|
||||||
<select value={gridSpacing} onChange={(e) => setGridSpacing(Number(e.target.value))} className="text-xs px-1.5 py-1 rounded border border-gray-300 dark:border-gray-600 bg-white dark:bg-gray-700">
|
>
|
||||||
<option value={50}>50px</option>
|
{building ? 'Baut...' : 'Neu aufbauen'}
|
||||||
<option value={100}>100px</option>
|
</button>
|
||||||
<option value={200}>200px</option>
|
<button onClick={onNext} className="px-5 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 text-sm font-medium">
|
||||||
</select>
|
Weiter →
|
||||||
<button onClick={onNext} className="px-5 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 text-sm font-medium">Weiter →</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{(error || buildError) && (
|
||||||
|
<div className="p-3 bg-red-50 dark:bg-red-900/30 border border-red-200 dark:border-red-800 rounded-lg text-red-700 dark:text-red-300 text-sm">
|
||||||
|
{error || buildError}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
{/* Split view */}
|
{/* Split view */}
|
||||||
<div className="flex gap-2" style={{ height: `${panelHeight + 40}px` }}>
|
<div className="flex gap-2">
|
||||||
{/* LEFT: Original + OCR overlay */}
|
{/* LEFT: Original + OCR overlay */}
|
||||||
<div ref={leftRef} className="flex-1 relative border border-gray-300 dark:border-gray-600 rounded-lg overflow-hidden bg-white dark:bg-gray-900">
|
<div ref={leftRef} className="w-1/3 border border-gray-300 dark:border-gray-600 rounded-lg overflow-hidden bg-white dark:bg-gray-900 flex-shrink-0">
|
||||||
<div className="absolute top-0 left-0 px-2 py-0.5 bg-black/60 text-white text-[10px] font-medium rounded-br z-20">Original + OCR</div>
|
<div className="px-2 py-1 bg-black/60 text-white text-[10px] font-medium">Original + OCR</div>
|
||||||
{sessionId && (
|
{sessionId && (
|
||||||
<img
|
<img
|
||||||
src={`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/words-overlay`}
|
src={`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/words-overlay`}
|
||||||
alt="Original + OCR"
|
alt="Original + OCR"
|
||||||
className="absolute inset-0 w-full h-auto"
|
className="w-full h-auto"
|
||||||
style={{ height: `${panelHeight}px`, objectFit: 'contain' }}
|
|
||||||
/>
|
/>
|
||||||
)}
|
)}
|
||||||
{showGrid && <CoordinateGrid imgW={imgW} imgH={imgH} scale={scale} spacing={gridSpacing} />}
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* RIGHT: Reconstruction */}
|
{/* RIGHT: Unified Grid Table */}
|
||||||
<div className="flex-1 relative border border-gray-300 dark:border-gray-600 rounded-lg overflow-hidden bg-white dark:bg-gray-900">
|
<div className="flex-1 border border-gray-300 dark:border-gray-600 rounded-lg overflow-auto bg-white dark:bg-gray-900" style={{ maxHeight: `${Math.max(600, leftHeight)}px` }}>
|
||||||
<div className="absolute top-0 left-0 px-2 py-0.5 bg-teal-600/80 text-white text-[10px] font-medium rounded-br z-20">Rekonstruktion</div>
|
<div className="px-2 py-1 bg-teal-600/80 text-white text-[10px] font-medium sticky top-0 z-20">
|
||||||
|
Unified Grid
|
||||||
{/* Render sections sequentially */}
|
{unifiedGrid?.is_unified && (
|
||||||
{sections.map((sec, si) => {
|
<span className="ml-2 opacity-70">
|
||||||
if (sec.type === 'box' && sec.zone) {
|
({unifiedGrid.summary?.total_rows}×{unifiedGrid.summary?.total_columns})
|
||||||
return (
|
</span>
|
||||||
<BoxSectionRenderer
|
|
||||||
key={`box-${si}`}
|
|
||||||
zone={sec.zone}
|
|
||||||
scale={scale}
|
|
||||||
avgRowH={sec.avgRowH}
|
|
||||||
/>
|
|
||||||
)
|
|
||||||
}
|
|
||||||
if (sec.type === 'content' && sec.rows && contentZone) {
|
|
||||||
return (
|
|
||||||
<ContentSectionRenderer
|
|
||||||
key={`sec-${si}`}
|
|
||||||
zone={contentZone}
|
|
||||||
rows={sec.rows}
|
|
||||||
yStart={sec.yStart}
|
|
||||||
scale={scale}
|
|
||||||
avgRowH={dominantRowH}
|
|
||||||
/>
|
|
||||||
)
|
|
||||||
}
|
|
||||||
return null
|
|
||||||
})}
|
|
||||||
|
|
||||||
{showGrid && <CoordinateGrid imgW={imgW} imgH={imgH} scale={scale} spacing={gridSpacing} />}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
// Helpers
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
function makeContentSection(rows: GridRow[]): PageSection {
|
|
||||||
const ys = rows.map((r) => r.y_min_px ?? (r as any).y_min ?? 0)
|
|
||||||
const yEnd = rows[rows.length - 1].y_max_px ?? (rows[rows.length - 1] as any).y_max ?? ys[ys.length - 1] + 30
|
|
||||||
let avgRowH = 35
|
|
||||||
if (rows.length >= 2) {
|
|
||||||
avgRowH = (ys[ys.length - 1] - ys[0]) / (rows.length - 1)
|
|
||||||
}
|
|
||||||
return { type: 'content', yStart: ys[0], yEnd, rows, avgRowH }
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
// Content section renderer — rows from content zone at absolute positions
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
function ContentSectionRenderer({ zone, rows, yStart, scale, avgRowH }: {
|
|
||||||
zone: GridZone; rows: GridRow[]; yStart: number; scale: number; avgRowH: number
|
|
||||||
}) {
|
|
||||||
const cellMap = new Map<string, GridEditorCell>()
|
|
||||||
for (const cell of zone.cells) {
|
|
||||||
cellMap.set(`${cell.row_index}_${cell.col_index}`, cell)
|
|
||||||
}
|
|
||||||
|
|
||||||
const rowH = avgRowH * scale
|
|
||||||
const fontSize = Math.max(7, rowH * 0.55)
|
|
||||||
|
|
||||||
return (
|
|
||||||
<>
|
|
||||||
{rows.map((row, ri) => {
|
|
||||||
const rowY = (row.y_min_px ?? (row as any).y_min ?? 0) * scale
|
|
||||||
const isSpanning = zone.cells.some((c) => c.row_index === row.index && c.col_type === 'spanning_header')
|
|
||||||
|
|
||||||
// Column widths
|
|
||||||
const colWidths = zone.columns.map((col) => Math.max(5, ((col.x_max_px ?? 0) - (col.x_min_px ?? 0)) * scale))
|
|
||||||
const zoneLeft = zone.bbox_px.x * scale
|
|
||||||
const zoneWidth = zone.bbox_px.w * scale
|
|
||||||
const totalColW = colWidths.reduce((s, w) => s + w, 0)
|
|
||||||
const colScale = totalColW > 0 ? zoneWidth / totalColW : 1
|
|
||||||
|
|
||||||
return (
|
|
||||||
<div
|
|
||||||
key={row.index}
|
|
||||||
className="absolute"
|
|
||||||
style={{
|
|
||||||
left: `${zoneLeft}px`,
|
|
||||||
top: `${rowY}px`,
|
|
||||||
width: `${zoneWidth}px`,
|
|
||||||
height: `${rowH}px`,
|
|
||||||
display: 'grid',
|
|
||||||
gridTemplateColumns: colWidths.map((w) => `${(w * colScale).toFixed(1)}px`).join(' '),
|
|
||||||
fontSize: `${fontSize}px`,
|
|
||||||
lineHeight: `${rowH}px`,
|
|
||||||
}}
|
|
||||||
>
|
|
||||||
{isSpanning ? (
|
|
||||||
zone.cells
|
|
||||||
.filter((c) => c.row_index === row.index && c.col_type === 'spanning_header')
|
|
||||||
.sort((a, b) => a.col_index - b.col_index)
|
|
||||||
.map((cell) => {
|
|
||||||
const colspan = cell.colspan || zone.columns.length
|
|
||||||
const color = getCellColor(cell)
|
|
||||||
return (
|
|
||||||
<div
|
|
||||||
key={cell.cell_id}
|
|
||||||
className={`overflow-hidden ${row.is_header ? 'font-bold' : ''}`}
|
|
||||||
style={{ gridColumn: `${cell.col_index + 1} / ${cell.col_index + 1 + colspan}`, color: color || undefined }}
|
|
||||||
>
|
|
||||||
{cell.text}
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
})
|
|
||||||
) : (
|
|
||||||
zone.columns.map((col) => {
|
|
||||||
const cell = cellMap.get(`${row.index}_${col.index}`)
|
|
||||||
const color = getCellColor(cell)
|
|
||||||
const isBold = col.bold || cell?.is_bold || row.is_header
|
|
||||||
return (
|
|
||||||
<div
|
|
||||||
key={col.index}
|
|
||||||
className={`overflow-hidden text-ellipsis whitespace-nowrap ${isBold ? 'font-bold' : ''}`}
|
|
||||||
style={{ color: color || undefined }}
|
|
||||||
>
|
|
||||||
{cell?.text ?? ''}
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
})
|
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
)
|
{unifiedZone ? (
|
||||||
})}
|
<GridTable
|
||||||
</>
|
zone={unifiedZone}
|
||||||
)
|
selectedCell={selectedCell}
|
||||||
}
|
selectedCells={selectedCells}
|
||||||
|
onSelectCell={setSelectedCell}
|
||||||
// ---------------------------------------------------------------------------
|
onCellTextChange={updateCellText}
|
||||||
// Box section renderer — box zone at absolute position with border
|
onToggleColumnBold={toggleColumnBold}
|
||||||
// ---------------------------------------------------------------------------
|
onToggleRowHeader={toggleRowHeader}
|
||||||
|
onNavigate={(cellId, dir) => {
|
||||||
function BoxSectionRenderer({ zone, scale, avgRowH }: {
|
const next = getAdjacentCell(cellId, dir)
|
||||||
zone: GridZone; scale: number; avgRowH: number
|
if (next) setSelectedCell(next)
|
||||||
}) {
|
}}
|
||||||
const boxColor = (zone as any).box_bg_hex || '#6b7280'
|
onDeleteColumn={deleteColumn}
|
||||||
if (!zone.cells || zone.cells.length === 0) return null
|
onAddColumn={addColumn}
|
||||||
|
onDeleteRow={deleteRow}
|
||||||
const left = zone.bbox_px.x * scale
|
onAddRow={addRow}
|
||||||
const top = zone.bbox_px.y * scale
|
onToggleCellSelection={toggleCellSelection}
|
||||||
const width = zone.bbox_px.w * scale
|
onSetCellColor={setCellColor}
|
||||||
const height = zone.bbox_px.h * scale
|
/>
|
||||||
const rowH = avgRowH * scale
|
) : (
|
||||||
const fontSize = Math.max(7, rowH * 0.5)
|
<div className="p-8 text-center text-gray-400">
|
||||||
|
<p>Kein Unified Grid verfügbar.</p>
|
||||||
const cellMap = new Map<string, GridEditorCell>()
|
<button onClick={buildUnified} className="mt-2 text-teal-600 text-sm">Jetzt aufbauen</button>
|
||||||
for (const cell of zone.cells) {
|
|
||||||
cellMap.set(`${cell.row_index}_${cell.col_index}`, cell)
|
|
||||||
}
|
|
||||||
|
|
||||||
const colWidths = zone.columns.map((col) => Math.max(5, ((col.x_max_px ?? 0) - (col.x_min_px ?? 0)) * scale))
|
|
||||||
const totalColW = colWidths.reduce((s, w) => s + w, 0)
|
|
||||||
const colScale = totalColW > 0 ? width / totalColW : 1
|
|
||||||
const numCols = zone.columns.length
|
|
||||||
|
|
||||||
// Distribute box height proportionally by text line count per row
|
|
||||||
const rowLineCounts = zone.rows.map((row) => {
|
|
||||||
const maxLines = Math.max(1, ...zone.cells
|
|
||||||
.filter((c) => c.row_index === row.index)
|
|
||||||
.map((c) => (c.text ?? '').split('\n').length))
|
|
||||||
return maxLines
|
|
||||||
})
|
|
||||||
const totalLines = rowLineCounts.reduce((s, n) => s + n, 0)
|
|
||||||
const lineUnitH = totalLines > 0 ? height / totalLines : height
|
|
||||||
|
|
||||||
return (
|
|
||||||
<div
|
|
||||||
className="absolute"
|
|
||||||
style={{
|
|
||||||
left: `${left}px`,
|
|
||||||
top: `${top}px`,
|
|
||||||
width: `${width}px`,
|
|
||||||
height: `${height}px`,
|
|
||||||
border: `${Math.max(1.5, 2 * scale)}px solid ${boxColor}`,
|
|
||||||
backgroundColor: `${boxColor}0a`,
|
|
||||||
borderRadius: `${Math.max(1, 3 * scale)}px`,
|
|
||||||
fontSize: `${fontSize}px`,
|
|
||||||
lineHeight: '1.3',
|
|
||||||
}}
|
|
||||||
>
|
|
||||||
<div style={{ display: 'grid', gridTemplateColumns: colWidths.map((w) => `${(w * colScale).toFixed(1)}px`).join(' ') }}>
|
|
||||||
{zone.rows.map((row, rowIdx) => {
|
|
||||||
const isSpanning = zone.cells.some((c) => c.row_index === row.index && c.col_type === 'spanning_header')
|
|
||||||
|
|
||||||
// Height proportional to text line count
|
|
||||||
const rowLines = rowLineCounts[rowIdx] || 1
|
|
||||||
const cellRowH = lineUnitH * rowLines
|
|
||||||
|
|
||||||
return (
|
|
||||||
<div key={row.index} style={{ display: 'contents' }}>
|
|
||||||
{isSpanning ? (
|
|
||||||
zone.cells
|
|
||||||
.filter((c) => c.row_index === row.index && c.col_type === 'spanning_header')
|
|
||||||
.sort((a, b) => a.col_index - b.col_index)
|
|
||||||
.map((cell) => {
|
|
||||||
const colspan = cell.colspan || numCols
|
|
||||||
const color = getCellColor(cell)
|
|
||||||
return (
|
|
||||||
<div
|
|
||||||
key={cell.cell_id}
|
|
||||||
className={`px-1 overflow-hidden ${row.is_header ? 'font-bold' : ''}`}
|
|
||||||
style={{
|
|
||||||
gridColumn: `${cell.col_index + 1} / ${cell.col_index + 1 + colspan}`,
|
|
||||||
height: `${cellRowH}px`,
|
|
||||||
color: color || undefined,
|
|
||||||
whiteSpace: 'pre-wrap',
|
|
||||||
display: 'flex',
|
|
||||||
alignItems: 'center',
|
|
||||||
}}
|
|
||||||
>
|
|
||||||
{cell.text}
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
})
|
|
||||||
) : (
|
|
||||||
zone.columns.map((col) => {
|
|
||||||
const cell = cellMap.get(`${row.index}_${col.index}`)
|
|
||||||
const color = getCellColor(cell)
|
|
||||||
const isBold = col.bold || cell?.is_bold || row.is_header
|
|
||||||
const text = cell?.text ?? ''
|
|
||||||
const isMultiLine = text.includes('\n')
|
|
||||||
|
|
||||||
return (
|
|
||||||
<div
|
|
||||||
key={col.index}
|
|
||||||
className={`px-1 overflow-hidden ${isBold ? 'font-bold' : ''}`}
|
|
||||||
style={{
|
|
||||||
height: `${cellRowH}px`,
|
|
||||||
color: color || undefined,
|
|
||||||
whiteSpace: isMultiLine ? 'pre-wrap' : 'nowrap',
|
|
||||||
textOverflow: isMultiLine ? undefined : 'ellipsis',
|
|
||||||
display: 'flex',
|
|
||||||
alignItems: isMultiLine ? 'flex-start' : 'center',
|
|
||||||
paddingLeft: isMultiLine ? `${fontSize * 0.5}px` : undefined,
|
|
||||||
}}
|
|
||||||
>
|
|
||||||
{text}
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
})
|
|
||||||
)}
|
|
||||||
</div>
|
</div>
|
||||||
)
|
)}
|
||||||
})}
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
// Coordinate grid
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
function CoordinateGrid({ imgW, imgH, scale, spacing }: {
|
|
||||||
imgW: number; imgH: number; scale: number; spacing: number
|
|
||||||
}) {
|
|
||||||
const lines: JSX.Element[] = []
|
|
||||||
for (let x = 0; x <= imgW; x += spacing) {
|
|
||||||
const px = x * scale
|
|
||||||
lines.push(
|
|
||||||
<div key={`v${x}`} className="absolute top-0 bottom-0 pointer-events-none" style={{ left: `${px}px`, width: '1px', background: 'rgba(0,150,255,0.2)' }}>
|
|
||||||
<span className="absolute top-0 left-1 text-[8px] text-blue-400 font-mono">{x}</span>
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
}
|
|
||||||
for (let y = 0; y <= imgH; y += spacing) {
|
|
||||||
const px = y * scale
|
|
||||||
lines.push(
|
|
||||||
<div key={`h${y}`} className="absolute left-0 right-0 pointer-events-none" style={{ top: `${px}px`, height: '1px', background: 'rgba(0,150,255,0.2)' }}>
|
|
||||||
<span className="absolute left-1 top-0.5 text-[8px] text-blue-400 font-mono">{y}</span>
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
}
|
|
||||||
return <>{lines}</>
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -2350,3 +2350,62 @@ async def build_box_grids(session_id: str, request: Request):
|
|||||||
"spell_fixes": spell_fixes,
|
"spell_fixes": spell_fixes,
|
||||||
"zones": zones,
|
"zones": zones,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Unified Grid endpoint
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@router.post("/sessions/{session_id}/build-unified-grid")
|
||||||
|
async def build_unified_grid_endpoint(session_id: str):
|
||||||
|
"""Build a single-zone unified grid merging content + box zones.
|
||||||
|
|
||||||
|
Takes the existing multi-zone grid_editor_result and produces a
|
||||||
|
unified grid where boxes are integrated into the main row sequence.
|
||||||
|
Persists as unified_grid_result (preserves original multi-zone data).
|
||||||
|
"""
|
||||||
|
session = await get_session_db(session_id)
|
||||||
|
if not session:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||||
|
|
||||||
|
grid_data = session.get("grid_editor_result")
|
||||||
|
if not grid_data:
|
||||||
|
raise HTTPException(status_code=400, detail="No grid data. Run build-grid first.")
|
||||||
|
|
||||||
|
from unified_grid import build_unified_grid
|
||||||
|
|
||||||
|
result = build_unified_grid(
|
||||||
|
zones=grid_data.get("zones", []),
|
||||||
|
image_width=grid_data.get("image_width", 0),
|
||||||
|
image_height=grid_data.get("image_height", 0),
|
||||||
|
layout_metrics=grid_data.get("layout_metrics", {}),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Persist as separate field (don't overwrite original multi-zone grid)
|
||||||
|
await update_session_db(session_id, unified_grid_result=result)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"build-unified-grid session %s: %d rows, %d cells",
|
||||||
|
session_id,
|
||||||
|
result.get("summary", {}).get("total_rows", 0),
|
||||||
|
result.get("summary", {}).get("total_cells", 0),
|
||||||
|
)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/sessions/{session_id}/unified-grid")
|
||||||
|
async def get_unified_grid(session_id: str):
|
||||||
|
"""Retrieve the unified grid for a session."""
|
||||||
|
session = await get_session_db(session_id)
|
||||||
|
if not session:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||||
|
|
||||||
|
result = session.get("unified_grid_result")
|
||||||
|
if not result:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404,
|
||||||
|
detail="No unified grid. Run build-unified-grid first.",
|
||||||
|
)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|||||||
425
klausur-service/backend/unified_grid.py
Normal file
425
klausur-service/backend/unified_grid.py
Normal file
@@ -0,0 +1,425 @@
|
|||||||
|
"""
|
||||||
|
Unified Grid Builder — merges multi-zone grid into a single Excel-like grid.
|
||||||
|
|
||||||
|
Takes content zone + box zones and produces one unified zone where:
|
||||||
|
- All content rows use the dominant row height
|
||||||
|
- Full-width boxes are integrated directly (box rows replace standard rows)
|
||||||
|
- Partial-width boxes: extra rows inserted if box has more lines than standard
|
||||||
|
- Box-origin cells carry metadata (bg_color, border) for visual distinction
|
||||||
|
|
||||||
|
The result is a single-zone StructuredGrid that can be:
|
||||||
|
- Rendered in an Excel-like editor
|
||||||
|
- Exported to Excel/CSV
|
||||||
|
- Edited with unified row/column numbering
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import math
|
||||||
|
import statistics
|
||||||
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _compute_dominant_row_height(content_zone: Dict) -> float:
|
||||||
|
"""Median of content row-to-row spacings, excluding box-gap jumps."""
|
||||||
|
rows = content_zone.get("rows", [])
|
||||||
|
if len(rows) < 2:
|
||||||
|
return 47.0
|
||||||
|
|
||||||
|
spacings = []
|
||||||
|
for i in range(len(rows) - 1):
|
||||||
|
y1 = rows[i].get("y_min_px", rows[i].get("y_min", 0))
|
||||||
|
y2 = rows[i + 1].get("y_min_px", rows[i + 1].get("y_min", 0))
|
||||||
|
d = y2 - y1
|
||||||
|
if 0 < d < 100: # exclude box-gap jumps
|
||||||
|
spacings.append(d)
|
||||||
|
|
||||||
|
if not spacings:
|
||||||
|
return 47.0
|
||||||
|
spacings.sort()
|
||||||
|
return spacings[len(spacings) // 2]
|
||||||
|
|
||||||
|
|
||||||
|
def _classify_boxes(
|
||||||
|
box_zones: List[Dict],
|
||||||
|
content_width: float,
|
||||||
|
) -> List[Dict]:
|
||||||
|
"""Classify each box as full_width or partial_width."""
|
||||||
|
result = []
|
||||||
|
for bz in box_zones:
|
||||||
|
bb = bz.get("bbox_px", {})
|
||||||
|
bw = bb.get("w", 0)
|
||||||
|
bx = bb.get("x", 0)
|
||||||
|
|
||||||
|
if bw >= content_width * 0.85:
|
||||||
|
classification = "full_width"
|
||||||
|
side = "center"
|
||||||
|
else:
|
||||||
|
classification = "partial_width"
|
||||||
|
# Determine which side of the page the box is on
|
||||||
|
page_center = content_width / 2
|
||||||
|
box_center = bx + bw / 2
|
||||||
|
side = "right" if box_center > page_center else "left"
|
||||||
|
|
||||||
|
# Count total text lines in box (including \n within cells)
|
||||||
|
total_lines = sum(
|
||||||
|
(c.get("text", "").count("\n") + 1)
|
||||||
|
for c in bz.get("cells", [])
|
||||||
|
)
|
||||||
|
|
||||||
|
result.append({
|
||||||
|
"zone": bz,
|
||||||
|
"classification": classification,
|
||||||
|
"side": side,
|
||||||
|
"y_start": bb.get("y", 0),
|
||||||
|
"y_end": bb.get("y", 0) + bb.get("h", 0),
|
||||||
|
"total_lines": total_lines,
|
||||||
|
"bg_hex": bz.get("box_bg_hex", ""),
|
||||||
|
"bg_color": bz.get("box_bg_color", ""),
|
||||||
|
})
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def build_unified_grid(
|
||||||
|
zones: List[Dict],
|
||||||
|
image_width: int,
|
||||||
|
image_height: int,
|
||||||
|
layout_metrics: Dict,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Build a single-zone unified grid from multi-zone grid data.
|
||||||
|
|
||||||
|
Returns a StructuredGrid with one zone containing all rows and cells.
|
||||||
|
"""
|
||||||
|
content_zone = None
|
||||||
|
box_zones = []
|
||||||
|
for z in zones:
|
||||||
|
if z.get("zone_type") == "content":
|
||||||
|
content_zone = z
|
||||||
|
elif z.get("zone_type") == "box":
|
||||||
|
box_zones.append(z)
|
||||||
|
|
||||||
|
if not content_zone:
|
||||||
|
logger.warning("build_unified_grid: no content zone found")
|
||||||
|
return {"zones": zones} # fallback: return as-is
|
||||||
|
|
||||||
|
box_zones.sort(key=lambda b: b.get("bbox_px", {}).get("y", 0))
|
||||||
|
|
||||||
|
dominant_h = _compute_dominant_row_height(content_zone)
|
||||||
|
content_bbox = content_zone.get("bbox_px", {})
|
||||||
|
content_width = content_bbox.get("w", image_width)
|
||||||
|
content_x = content_bbox.get("x", 0)
|
||||||
|
content_cols = content_zone.get("columns", [])
|
||||||
|
num_cols = len(content_cols)
|
||||||
|
|
||||||
|
box_infos = _classify_boxes(box_zones, content_width)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"build_unified_grid: dominant_h=%.1f, %d content rows, %d boxes (%s)",
|
||||||
|
dominant_h, len(content_zone.get("rows", [])), len(box_infos),
|
||||||
|
[b["classification"] for b in box_infos],
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- Build unified row list + cell list ---
|
||||||
|
unified_rows: List[Dict] = []
|
||||||
|
unified_cells: List[Dict] = []
|
||||||
|
unified_row_idx = 0
|
||||||
|
|
||||||
|
# Content rows and cells indexed by row_index
|
||||||
|
content_rows = content_zone.get("rows", [])
|
||||||
|
content_cells = content_zone.get("cells", [])
|
||||||
|
content_cells_by_row: Dict[int, List[Dict]] = {}
|
||||||
|
for c in content_cells:
|
||||||
|
content_cells_by_row.setdefault(c.get("row_index", -1), []).append(c)
|
||||||
|
|
||||||
|
# Track which content rows we've processed
|
||||||
|
content_row_ptr = 0
|
||||||
|
|
||||||
|
for bi, box_info in enumerate(box_infos):
|
||||||
|
bz = box_info["zone"]
|
||||||
|
by_start = box_info["y_start"]
|
||||||
|
by_end = box_info["y_end"]
|
||||||
|
|
||||||
|
# --- Add content rows ABOVE this box ---
|
||||||
|
while content_row_ptr < len(content_rows):
|
||||||
|
cr = content_rows[content_row_ptr]
|
||||||
|
cry = cr.get("y_min_px", cr.get("y_min", 0))
|
||||||
|
if cry >= by_start:
|
||||||
|
break
|
||||||
|
# Add this content row
|
||||||
|
_add_content_row(
|
||||||
|
unified_rows, unified_cells, unified_row_idx,
|
||||||
|
cr, content_cells_by_row, dominant_h, image_height,
|
||||||
|
)
|
||||||
|
unified_row_idx += 1
|
||||||
|
content_row_ptr += 1
|
||||||
|
|
||||||
|
# --- Add box rows ---
|
||||||
|
if box_info["classification"] == "full_width":
|
||||||
|
# Full-width box: integrate box rows directly
|
||||||
|
_add_full_width_box(
|
||||||
|
unified_rows, unified_cells, unified_row_idx,
|
||||||
|
bz, box_info, dominant_h, num_cols, image_height,
|
||||||
|
)
|
||||||
|
unified_row_idx += len(bz.get("rows", []))
|
||||||
|
# Skip content rows that overlap with this box
|
||||||
|
while content_row_ptr < len(content_rows):
|
||||||
|
cr = content_rows[content_row_ptr]
|
||||||
|
cry = cr.get("y_min_px", cr.get("y_min", 0))
|
||||||
|
if cry > by_end:
|
||||||
|
break
|
||||||
|
content_row_ptr += 1
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Partial-width box: merge with adjacent content rows
|
||||||
|
unified_row_idx = _add_partial_width_box(
|
||||||
|
unified_rows, unified_cells, unified_row_idx,
|
||||||
|
bz, box_info, content_rows, content_cells_by_row,
|
||||||
|
content_row_ptr, dominant_h, num_cols, image_height,
|
||||||
|
content_x, content_width,
|
||||||
|
)
|
||||||
|
# Advance content pointer past box region
|
||||||
|
while content_row_ptr < len(content_rows):
|
||||||
|
cr = content_rows[content_row_ptr]
|
||||||
|
cry = cr.get("y_min_px", cr.get("y_min", 0))
|
||||||
|
if cry > by_end:
|
||||||
|
break
|
||||||
|
content_row_ptr += 1
|
||||||
|
|
||||||
|
# --- Add remaining content rows BELOW all boxes ---
|
||||||
|
while content_row_ptr < len(content_rows):
|
||||||
|
cr = content_rows[content_row_ptr]
|
||||||
|
_add_content_row(
|
||||||
|
unified_rows, unified_cells, unified_row_idx,
|
||||||
|
cr, content_cells_by_row, dominant_h, image_height,
|
||||||
|
)
|
||||||
|
unified_row_idx += 1
|
||||||
|
content_row_ptr += 1
|
||||||
|
|
||||||
|
# --- Build unified zone ---
|
||||||
|
unified_zone = {
|
||||||
|
"zone_index": 0,
|
||||||
|
"zone_type": "unified",
|
||||||
|
"bbox_px": content_bbox,
|
||||||
|
"bbox_pct": content_zone.get("bbox_pct", {}),
|
||||||
|
"border": None,
|
||||||
|
"word_count": sum(len(c.get("word_boxes", [])) for c in unified_cells),
|
||||||
|
"columns": content_cols,
|
||||||
|
"rows": unified_rows,
|
||||||
|
"cells": unified_cells,
|
||||||
|
"header_rows": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"build_unified_grid: %d unified rows, %d cells (from %d content + %d box zones)",
|
||||||
|
len(unified_rows), len(unified_cells),
|
||||||
|
len(content_rows), len(box_zones),
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"zones": [unified_zone],
|
||||||
|
"image_width": image_width,
|
||||||
|
"image_height": image_height,
|
||||||
|
"layout_metrics": layout_metrics,
|
||||||
|
"summary": {
|
||||||
|
"total_zones": 1,
|
||||||
|
"total_columns": num_cols,
|
||||||
|
"total_rows": len(unified_rows),
|
||||||
|
"total_cells": len(unified_cells),
|
||||||
|
},
|
||||||
|
"is_unified": True,
|
||||||
|
"dominant_row_h": dominant_h,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _make_row(idx: int, y: float, h: float, img_h: int, is_header: bool = False) -> Dict:
|
||||||
|
return {
|
||||||
|
"index": idx,
|
||||||
|
"row_index": idx,
|
||||||
|
"y_min_px": round(y),
|
||||||
|
"y_max_px": round(y + h),
|
||||||
|
"y_min_pct": round(y / img_h * 100, 2) if img_h else 0,
|
||||||
|
"y_max_pct": round((y + h) / img_h * 100, 2) if img_h else 0,
|
||||||
|
"is_header": is_header,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _remap_cell(cell: Dict, new_row: int, new_col: int = None,
|
||||||
|
source_type: str = "content", box_region: Dict = None) -> Dict:
|
||||||
|
"""Create a new cell dict with remapped indices."""
|
||||||
|
c = dict(cell)
|
||||||
|
c["row_index"] = new_row
|
||||||
|
if new_col is not None:
|
||||||
|
c["col_index"] = new_col
|
||||||
|
c["cell_id"] = f"U_R{new_row:02d}_C{c.get('col_index', 0)}"
|
||||||
|
c["source_zone_type"] = source_type
|
||||||
|
if box_region:
|
||||||
|
c["box_region"] = box_region
|
||||||
|
return c
|
||||||
|
|
||||||
|
|
||||||
|
def _add_content_row(
|
||||||
|
unified_rows, unified_cells, row_idx,
|
||||||
|
content_row, cells_by_row, dominant_h, img_h,
|
||||||
|
):
|
||||||
|
"""Add a single content row to the unified grid."""
|
||||||
|
y = content_row.get("y_min_px", content_row.get("y_min", 0))
|
||||||
|
is_hdr = content_row.get("is_header", False)
|
||||||
|
unified_rows.append(_make_row(row_idx, y, dominant_h, img_h, is_hdr))
|
||||||
|
|
||||||
|
for cell in cells_by_row.get(content_row.get("index", -1), []):
|
||||||
|
unified_cells.append(_remap_cell(cell, row_idx, source_type="content"))
|
||||||
|
|
||||||
|
|
||||||
|
def _add_full_width_box(
|
||||||
|
unified_rows, unified_cells, start_row_idx,
|
||||||
|
box_zone, box_info, dominant_h, num_cols, img_h,
|
||||||
|
):
|
||||||
|
"""Add a full-width box's rows to the unified grid."""
|
||||||
|
box_rows = box_zone.get("rows", [])
|
||||||
|
box_cells = box_zone.get("cells", [])
|
||||||
|
box_region = {"bg_hex": box_info["bg_hex"], "bg_color": box_info["bg_color"], "border": True}
|
||||||
|
|
||||||
|
# Distribute box height evenly among its rows
|
||||||
|
box_h = box_info["y_end"] - box_info["y_start"]
|
||||||
|
row_h = box_h / len(box_rows) if box_rows else dominant_h
|
||||||
|
|
||||||
|
for i, br in enumerate(box_rows):
|
||||||
|
y = box_info["y_start"] + i * row_h
|
||||||
|
new_idx = start_row_idx + i
|
||||||
|
is_hdr = br.get("is_header", False)
|
||||||
|
unified_rows.append(_make_row(new_idx, y, row_h, img_h, is_hdr))
|
||||||
|
|
||||||
|
for cell in box_cells:
|
||||||
|
if cell.get("row_index") == br.get("index", i):
|
||||||
|
unified_cells.append(
|
||||||
|
_remap_cell(cell, new_idx, source_type="box", box_region=box_region)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _add_partial_width_box(
|
||||||
|
unified_rows, unified_cells, start_row_idx,
|
||||||
|
box_zone, box_info, content_rows, content_cells_by_row,
|
||||||
|
content_row_ptr, dominant_h, num_cols, img_h,
|
||||||
|
content_x, content_width,
|
||||||
|
) -> int:
|
||||||
|
"""Add a partial-width box merged with content rows.
|
||||||
|
|
||||||
|
Returns the next unified_row_idx after processing.
|
||||||
|
"""
|
||||||
|
by_start = box_info["y_start"]
|
||||||
|
by_end = box_info["y_end"]
|
||||||
|
box_h = by_end - by_start
|
||||||
|
box_region = {"bg_hex": box_info["bg_hex"], "bg_color": box_info["bg_color"], "border": True}
|
||||||
|
|
||||||
|
# Content rows in the box's Y range
|
||||||
|
overlap_content_rows = []
|
||||||
|
ptr = content_row_ptr
|
||||||
|
while ptr < len(content_rows):
|
||||||
|
cr = content_rows[ptr]
|
||||||
|
cry = cr.get("y_min_px", cr.get("y_min", 0))
|
||||||
|
if cry > by_end:
|
||||||
|
break
|
||||||
|
if cry >= by_start:
|
||||||
|
overlap_content_rows.append(cr)
|
||||||
|
ptr += 1
|
||||||
|
|
||||||
|
# How many standard rows fit in the box height
|
||||||
|
standard_rows = max(1, math.floor(box_h / dominant_h))
|
||||||
|
# How many text lines the box actually has
|
||||||
|
box_text_lines = box_info["total_lines"]
|
||||||
|
# Extra rows needed
|
||||||
|
extra_rows = max(0, box_text_lines - standard_rows)
|
||||||
|
total_rows_for_region = standard_rows + extra_rows
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"partial box: standard=%d, box_lines=%d, extra=%d, content_overlap=%d",
|
||||||
|
standard_rows, box_text_lines, extra_rows, len(overlap_content_rows),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Determine which columns the box occupies
|
||||||
|
box_bb = box_zone.get("bbox_px", {})
|
||||||
|
box_x = box_bb.get("x", 0)
|
||||||
|
box_w = box_bb.get("w", 0)
|
||||||
|
|
||||||
|
# Map box to content columns: find which content columns overlap
|
||||||
|
box_col_start = 0
|
||||||
|
box_col_end = num_cols
|
||||||
|
content_cols_list = []
|
||||||
|
for z_col_idx in range(num_cols):
|
||||||
|
# Find the column definition by checking all column entries
|
||||||
|
# Simple heuristic: if box starts past halfway, it's the right columns
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Simpler approach: box on right side → last N columns
|
||||||
|
# box on left side → first N columns
|
||||||
|
if box_info["side"] == "right":
|
||||||
|
# Box starts at x=box_x. Find first content column that overlaps
|
||||||
|
box_col_start = num_cols # default: beyond all columns
|
||||||
|
for z in (box_zone.get("columns") or [{"index": 0}]):
|
||||||
|
pass
|
||||||
|
# Use content column positions to determine overlap
|
||||||
|
content_cols_data = [
|
||||||
|
{"idx": c.get("index", i), "x_min": c.get("x_min_px", 0), "x_max": c.get("x_max_px", 0)}
|
||||||
|
for i, c in enumerate(content_rows[0:0] or []) # placeholder
|
||||||
|
]
|
||||||
|
# Simple: split columns at midpoint
|
||||||
|
box_col_start = num_cols // 2 # right half
|
||||||
|
box_col_end = num_cols
|
||||||
|
else:
|
||||||
|
box_col_start = 0
|
||||||
|
box_col_end = num_cols // 2
|
||||||
|
|
||||||
|
# Build rows for this region
|
||||||
|
box_cells = box_zone.get("cells", [])
|
||||||
|
box_rows = box_zone.get("rows", [])
|
||||||
|
row_idx = start_row_idx
|
||||||
|
|
||||||
|
# Expand box cell texts with \n into individual lines for row mapping
|
||||||
|
box_lines: List[Tuple[str, Dict]] = [] # (text_line, parent_cell)
|
||||||
|
for bc in sorted(box_cells, key=lambda c: c.get("row_index", 0)):
|
||||||
|
text = bc.get("text", "")
|
||||||
|
for line in text.split("\n"):
|
||||||
|
box_lines.append((line.strip(), bc))
|
||||||
|
|
||||||
|
for i in range(total_rows_for_region):
|
||||||
|
y = by_start + i * dominant_h
|
||||||
|
unified_rows.append(_make_row(row_idx, y, dominant_h, img_h))
|
||||||
|
|
||||||
|
# Content cells for this row (from overlapping content rows)
|
||||||
|
if i < len(overlap_content_rows):
|
||||||
|
cr = overlap_content_rows[i]
|
||||||
|
for cell in content_cells_by_row.get(cr.get("index", -1), []):
|
||||||
|
# Only include cells from columns NOT covered by the box
|
||||||
|
ci = cell.get("col_index", 0)
|
||||||
|
if ci < box_col_start or ci >= box_col_end:
|
||||||
|
unified_cells.append(_remap_cell(cell, row_idx, source_type="content"))
|
||||||
|
|
||||||
|
# Box cell for this row
|
||||||
|
if i < len(box_lines):
|
||||||
|
line_text, parent_cell = box_lines[i]
|
||||||
|
box_cell = {
|
||||||
|
"cell_id": f"U_R{row_idx:02d}_C{box_col_start}",
|
||||||
|
"row_index": row_idx,
|
||||||
|
"col_index": box_col_start,
|
||||||
|
"col_type": "spanning_header" if (box_col_end - box_col_start) > 1 else parent_cell.get("col_type", "column_1"),
|
||||||
|
"colspan": box_col_end - box_col_start,
|
||||||
|
"text": line_text,
|
||||||
|
"confidence": parent_cell.get("confidence", 0),
|
||||||
|
"bbox_px": parent_cell.get("bbox_px", {}),
|
||||||
|
"bbox_pct": parent_cell.get("bbox_pct", {}),
|
||||||
|
"word_boxes": [],
|
||||||
|
"ocr_engine": parent_cell.get("ocr_engine", ""),
|
||||||
|
"is_bold": parent_cell.get("is_bold", False),
|
||||||
|
"source_zone_type": "box",
|
||||||
|
"box_region": box_region,
|
||||||
|
}
|
||||||
|
unified_cells.append(box_cell)
|
||||||
|
|
||||||
|
row_idx += 1
|
||||||
|
|
||||||
|
return row_idx
|
||||||
Reference in New Issue
Block a user