Unified Grid: merge all zones into single Excel-like grid
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 32s
CI / test-go-edu-search (push) Successful in 45s
CI / test-python-klausur (push) Failing after 2m35s
CI / test-python-agent-core (push) Successful in 31s
CI / test-nodejs-website (push) Successful in 33s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 32s
CI / test-go-edu-search (push) Successful in 45s
CI / test-python-klausur (push) Failing after 2m35s
CI / test-python-agent-core (push) Successful in 31s
CI / test-nodejs-website (push) Successful in 33s
Backend (unified_grid.py):
- build_unified_grid(): merges content + box zones into one zone
- Dominant row height from median of content row spacings
- Full-width boxes: rows integrated directly
- Partial-width boxes: extra rows inserted when box has more text
lines than standard rows fit (e.g., 7 lines in 5-row height)
- Box-origin cells tagged with source_zone_type + box_region metadata
Backend (grid_editor_api.py):
- POST /sessions/{id}/build-unified-grid → persists as unified_grid_result
- GET /sessions/{id}/unified-grid → retrieve persisted result
Frontend:
- GridEditorCell: added source_zone_type, box_region fields
- GridTable: box-origin cells get tinted background + left border
- StepAnsicht: split-view with original image (left) + editable
unified GridTable (right). Auto-builds on first load.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -488,7 +488,13 @@ export function GridTable({
|
||||
} ${isMultiSelected ? 'bg-teal-50/60 dark:bg-teal-900/20' : ''} ${
|
||||
isLowConf && !isMultiSelected ? 'bg-amber-50/50 dark:bg-amber-900/10' : ''
|
||||
} ${row.is_header && !isMultiSelected ? 'bg-blue-50/50 dark:bg-blue-900/10' : ''}`}
|
||||
style={{ height: `${rowH}px` }}
|
||||
style={{
|
||||
height: `${rowH}px`,
|
||||
...(cell?.box_region?.bg_hex ? {
|
||||
backgroundColor: `${cell.box_region.bg_hex}12`,
|
||||
borderLeft: cell.box_region.border ? `3px solid ${cell.box_region.bg_hex}60` : undefined,
|
||||
} : {}),
|
||||
}}
|
||||
onContextMenu={(e) => {
|
||||
if (onSetCellColor) {
|
||||
e.preventDefault()
|
||||
|
||||
@@ -128,6 +128,14 @@ export interface GridEditorCell {
|
||||
color_override?: string | null
|
||||
/** Number of columns this cell spans (merged cell). Default 1. */
|
||||
colspan?: number
|
||||
/** Source zone type when in unified grid. */
|
||||
source_zone_type?: 'content' | 'box'
|
||||
/** Box visual metadata for cells from box zones. */
|
||||
box_region?: {
|
||||
bg_hex?: string
|
||||
bg_color?: string
|
||||
border?: boolean
|
||||
}
|
||||
}
|
||||
|
||||
/** Layout dividers for the visual column/margin editor on the original image. */
|
||||
|
||||
@@ -1,20 +1,16 @@
|
||||
'use client'
|
||||
|
||||
/**
|
||||
* StepAnsicht — Split-view page layout comparison.
|
||||
* StepAnsicht — Unified Grid View.
|
||||
*
|
||||
* Left: Original scan with OCR word overlay
|
||||
* Right: Reconstructed layout with averaged row heights per section
|
||||
*
|
||||
* Layout principle: the page is divided into vertical sections separated
|
||||
* by boxes. Each section gets a uniform row height calculated from
|
||||
* (last_row_y - first_row_y) / (num_rows - 1). Boxes are rendered
|
||||
* inline between sections (not as floating overlays).
|
||||
* Right: Unified grid (single zone, boxes integrated) rendered via GridTable
|
||||
*/
|
||||
|
||||
import { useEffect, useMemo, useRef, useState } from 'react'
|
||||
import { useCallback, useEffect, useRef, useState } from 'react'
|
||||
import { useGridEditor } from '@/components/grid-editor/useGridEditor'
|
||||
import type { GridZone, GridEditorCell, GridRow } from '@/components/grid-editor/types'
|
||||
import { GridTable } from '@/components/grid-editor/GridTable'
|
||||
import type { GridZone } from '@/components/grid-editor/types'
|
||||
|
||||
const KLAUSUR_API = '/klausur-api'
|
||||
|
||||
@@ -23,491 +19,172 @@ interface StepAnsichtProps {
|
||||
onNext: () => void
|
||||
}
|
||||
|
||||
/** A vertical section of the page: either content rows or a box zone. */
|
||||
interface PageSection {
|
||||
type: 'content' | 'box'
|
||||
yStart: number // pixel y in original image
|
||||
yEnd: number // pixel y end
|
||||
zone?: GridZone // for box sections
|
||||
rows?: GridRow[] // for content sections — subset of content zone rows
|
||||
avgRowH: number // averaged row height in original pixels
|
||||
}
|
||||
|
||||
function getCellColor(cell: GridEditorCell | undefined): string | null {
|
||||
if (!cell) return null
|
||||
if (cell.color_override) return cell.color_override
|
||||
const colored = cell.word_boxes?.find((wb) => wb.color_name && wb.color_name !== 'black')
|
||||
return colored?.color ?? null
|
||||
}
|
||||
|
||||
export function StepAnsicht({ sessionId, onNext }: StepAnsichtProps) {
|
||||
const { grid, loading, error, loadGrid } = useGridEditor(sessionId)
|
||||
const gridEditor = useGridEditor(sessionId)
|
||||
const {
|
||||
loading, error, selectedCell, setSelectedCell,
|
||||
updateCellText, toggleColumnBold, toggleRowHeader,
|
||||
getAdjacentCell, deleteColumn, addColumn, deleteRow, addRow,
|
||||
commitUndoPoint, selectedCells, toggleCellSelection,
|
||||
clearCellSelection, toggleSelectedBold, setCellColor,
|
||||
saveGrid, saving, dirty, undo, redo, canUndo, canRedo,
|
||||
} = gridEditor
|
||||
|
||||
const [unifiedGrid, setUnifiedGrid] = useState<any>(null)
|
||||
const [building, setBuilding] = useState(false)
|
||||
const [buildError, setBuildError] = useState<string | null>(null)
|
||||
const leftRef = useRef<HTMLDivElement>(null)
|
||||
const [panelWidth, setPanelWidth] = useState(0)
|
||||
const [showGrid, setShowGrid] = useState(true)
|
||||
const [gridSpacing, setGridSpacing] = useState(100)
|
||||
const [leftHeight, setLeftHeight] = useState(600)
|
||||
const [showGrid, setShowGrid] = useState(false)
|
||||
|
||||
// Build unified grid
|
||||
const buildUnified = useCallback(async () => {
|
||||
if (!sessionId) return
|
||||
setBuilding(true)
|
||||
setBuildError(null)
|
||||
try {
|
||||
const res = await fetch(
|
||||
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/build-unified-grid`,
|
||||
{ method: 'POST' },
|
||||
)
|
||||
if (!res.ok) {
|
||||
const d = await res.json().catch(() => ({}))
|
||||
throw new Error(d.detail || `HTTP ${res.status}`)
|
||||
}
|
||||
const data = await res.json()
|
||||
setUnifiedGrid(data)
|
||||
} catch (e) {
|
||||
setBuildError(e instanceof Error ? e.message : String(e))
|
||||
} finally {
|
||||
setBuilding(false)
|
||||
}
|
||||
}, [sessionId])
|
||||
|
||||
// Load unified grid on mount (or build if missing)
|
||||
useEffect(() => {
|
||||
if (sessionId) loadGrid()
|
||||
if (!sessionId) return
|
||||
;(async () => {
|
||||
try {
|
||||
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/unified-grid`)
|
||||
if (res.ok) {
|
||||
setUnifiedGrid(await res.json())
|
||||
} else {
|
||||
// Not built yet — build it
|
||||
buildUnified()
|
||||
}
|
||||
} catch {
|
||||
buildUnified()
|
||||
}
|
||||
})()
|
||||
}, [sessionId]) // eslint-disable-line react-hooks/exhaustive-deps
|
||||
|
||||
// Track left panel height for sync
|
||||
useEffect(() => {
|
||||
if (!leftRef.current) return
|
||||
const ro = new ResizeObserver(([entry]) => setPanelWidth(entry.contentRect.width))
|
||||
const ro = new ResizeObserver(([e]) => setLeftHeight(e.contentRect.height))
|
||||
ro.observe(leftRef.current)
|
||||
return () => ro.disconnect()
|
||||
}, [])
|
||||
|
||||
// Build page sections: split content rows around box zones
|
||||
const sections = useMemo(() => {
|
||||
if (!grid) return []
|
||||
const contentZone = grid.zones.find((z) => z.zone_type === 'content')
|
||||
const boxZones = grid.zones.filter((z) => z.zone_type === 'box')
|
||||
.sort((a, b) => a.bbox_px.y - b.bbox_px.y)
|
||||
const unifiedZone: GridZone | null = unifiedGrid?.zones?.[0] ?? null
|
||||
|
||||
if (!contentZone) return []
|
||||
|
||||
const allRows = contentZone.rows
|
||||
const result: PageSection[] = []
|
||||
|
||||
// Box boundaries sorted by y
|
||||
const boxBounds = boxZones.map((bz) => ({
|
||||
zone: bz,
|
||||
yStart: bz.bbox_px.y,
|
||||
yEnd: bz.bbox_px.y + bz.bbox_px.h,
|
||||
}))
|
||||
|
||||
// Split content rows into sections around boxes
|
||||
let currentRows: GridRow[] = []
|
||||
let boxIdx = 0
|
||||
|
||||
for (const row of allRows) {
|
||||
const ry = row.y_min_px ?? (row as any).y_min ?? 0
|
||||
|
||||
// Check if we've passed a box boundary — insert box section
|
||||
while (boxIdx < boxBounds.length && ry >= boxBounds[boxIdx].yStart) {
|
||||
// Flush current content section
|
||||
if (currentRows.length > 0) {
|
||||
result.push(makeContentSection(currentRows))
|
||||
currentRows = []
|
||||
}
|
||||
// Insert box section
|
||||
const bb = boxBounds[boxIdx]
|
||||
const bRows = bb.zone.rows || []
|
||||
let bAvgH = 35
|
||||
if (bRows.length >= 2) {
|
||||
const bys = bRows.map((r) => r.y_min_px ?? (r as any).y_min ?? 0)
|
||||
bAvgH = (bys[bys.length - 1] - bys[0]) / (bRows.length - 1)
|
||||
}
|
||||
result.push({
|
||||
type: 'box',
|
||||
yStart: bb.yStart,
|
||||
yEnd: bb.yEnd,
|
||||
zone: bb.zone,
|
||||
avgRowH: bAvgH,
|
||||
})
|
||||
boxIdx++
|
||||
}
|
||||
|
||||
// Skip rows only if they fall FULLY inside a box (both Y and X overlap).
|
||||
// Small boxes (e.g. on the right half) don't cover left-side content rows.
|
||||
const rowCells = contentZone!.cells.filter((c) => c.row_index === row.index)
|
||||
const rowXMin = rowCells.length > 0
|
||||
? Math.min(...rowCells.map((c) => c.bbox_px?.x ?? contentZone!.bbox_px.x))
|
||||
: contentZone!.bbox_px.x
|
||||
const insideBox = boxBounds.some((bb) => {
|
||||
if (ry < bb.yStart || ry > bb.yEnd) return false
|
||||
// Check horizontal overlap: row must be mostly inside box x-range
|
||||
const boxXMin = bb.zone.bbox_px.x
|
||||
const boxXMax = boxXMin + bb.zone.bbox_px.w
|
||||
return rowXMin >= boxXMin - 20 && rowXMin <= boxXMax
|
||||
})
|
||||
if (!insideBox) {
|
||||
currentRows.push(row)
|
||||
}
|
||||
}
|
||||
|
||||
// Flush remaining content rows
|
||||
if (currentRows.length > 0) {
|
||||
result.push(makeContentSection(currentRows))
|
||||
}
|
||||
|
||||
// Insert remaining boxes (if any rows didn't trigger them)
|
||||
while (boxIdx < boxBounds.length) {
|
||||
const bb = boxBounds[boxIdx]
|
||||
const bRows = bb.zone.rows || []
|
||||
let bAvgH = 35
|
||||
if (bRows.length >= 2) {
|
||||
const bys = bRows.map((r) => r.y_min_px ?? (r as any).y_min ?? 0)
|
||||
bAvgH = (bys[bys.length - 1] - bys[0]) / (bRows.length - 1)
|
||||
}
|
||||
result.push({ type: 'box', yStart: bb.yStart, yEnd: bb.yEnd, zone: bb.zone, avgRowH: bAvgH })
|
||||
boxIdx++
|
||||
}
|
||||
|
||||
return result
|
||||
}, [grid])
|
||||
|
||||
if (loading) {
|
||||
if (loading || building) {
|
||||
return (
|
||||
<div className="flex items-center justify-center py-16">
|
||||
<div className="w-8 h-8 border-4 border-teal-500 border-t-transparent rounded-full animate-spin" />
|
||||
<span className="ml-3 text-gray-500">Lade Vorschau...</span>
|
||||
<span className="ml-3 text-gray-500">{building ? 'Baue Unified Grid...' : 'Lade...'}</span>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (error || !grid) {
|
||||
return (
|
||||
<div className="p-8 text-center">
|
||||
<p className="text-red-500 mb-4">{error || 'Keine Grid-Daten.'}</p>
|
||||
<button onClick={onNext} className="px-5 py-2 bg-teal-600 text-white rounded-lg">Weiter →</button>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const imgW = grid.image_width || 1
|
||||
const imgH = grid.image_height || 1
|
||||
const scale = panelWidth > 0 ? panelWidth / imgW : 0.5
|
||||
const panelHeight = imgH * scale
|
||||
const contentZone = grid.zones.find((z) => z.zone_type === 'content')
|
||||
|
||||
// Dominant row height: median of row-to-row spacings (excluding box-gap jumps)
|
||||
const dominantRowH = useMemo(() => {
|
||||
const cz = grid.zones.find((z) => z.zone_type === 'content')
|
||||
if (!cz || cz.rows.length < 2) return 47
|
||||
const spacings: number[] = []
|
||||
for (let i = 0; i < cz.rows.length - 1; i++) {
|
||||
const y1 = cz.rows[i].y_min_px ?? (cz.rows[i] as any).y_min ?? 0
|
||||
const y2 = cz.rows[i + 1].y_min_px ?? (cz.rows[i + 1] as any).y_min ?? 0
|
||||
const d = y2 - y1
|
||||
if (d > 0 && d < 100) spacings.push(d)
|
||||
}
|
||||
if (spacings.length === 0) return 47
|
||||
spacings.sort((a, b) => a - b)
|
||||
return spacings[Math.floor(spacings.length / 2)]
|
||||
}, [grid])
|
||||
|
||||
return (
|
||||
<div className="space-y-3">
|
||||
{/* Header */}
|
||||
<div className="flex items-center justify-between">
|
||||
<div>
|
||||
<h3 className="text-lg font-semibold text-gray-900 dark:text-white">Ansicht — Original vs. Rekonstruktion</h3>
|
||||
<h3 className="text-lg font-semibold text-gray-900 dark:text-white">Ansicht — Unified Grid</h3>
|
||||
<p className="text-sm text-gray-500 dark:text-gray-400">
|
||||
Links: Original mit OCR. Rechts: Rekonstruktion mit gemittelten Zeilenhöhen.
|
||||
Alle Inhalte in einem Grid. Boxen sind integriert (farbig markiert).
|
||||
{unifiedGrid && (
|
||||
<span className="ml-2 font-mono text-xs">
|
||||
{unifiedGrid.summary?.total_rows} Zeilen × {unifiedGrid.summary?.total_columns} Spalten
|
||||
{unifiedGrid.dominant_row_h && ` · Zeilenhöhe: ${Math.round(unifiedGrid.dominant_row_h)}px`}
|
||||
</span>
|
||||
)}
|
||||
</p>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<label className="flex items-center gap-1.5 text-xs text-gray-500">
|
||||
<input type="checkbox" checked={showGrid} onChange={(e) => setShowGrid(e.target.checked)} className="w-3.5 h-3.5 rounded" />
|
||||
Gitter
|
||||
</label>
|
||||
<select value={gridSpacing} onChange={(e) => setGridSpacing(Number(e.target.value))} className="text-xs px-1.5 py-1 rounded border border-gray-300 dark:border-gray-600 bg-white dark:bg-gray-700">
|
||||
<option value={50}>50px</option>
|
||||
<option value={100}>100px</option>
|
||||
<option value={200}>200px</option>
|
||||
</select>
|
||||
<button onClick={onNext} className="px-5 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 text-sm font-medium">Weiter →</button>
|
||||
<button
|
||||
onClick={buildUnified}
|
||||
disabled={building}
|
||||
className="px-3 py-1.5 bg-amber-600 text-white rounded-lg hover:bg-amber-700 text-xs font-medium disabled:opacity-50"
|
||||
>
|
||||
{building ? 'Baut...' : 'Neu aufbauen'}
|
||||
</button>
|
||||
<button onClick={onNext} className="px-5 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 text-sm font-medium">
|
||||
Weiter →
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{(error || buildError) && (
|
||||
<div className="p-3 bg-red-50 dark:bg-red-900/30 border border-red-200 dark:border-red-800 rounded-lg text-red-700 dark:text-red-300 text-sm">
|
||||
{error || buildError}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Split view */}
|
||||
<div className="flex gap-2" style={{ height: `${panelHeight + 40}px` }}>
|
||||
<div className="flex gap-2">
|
||||
{/* LEFT: Original + OCR overlay */}
|
||||
<div ref={leftRef} className="flex-1 relative border border-gray-300 dark:border-gray-600 rounded-lg overflow-hidden bg-white dark:bg-gray-900">
|
||||
<div className="absolute top-0 left-0 px-2 py-0.5 bg-black/60 text-white text-[10px] font-medium rounded-br z-20">Original + OCR</div>
|
||||
<div ref={leftRef} className="w-1/3 border border-gray-300 dark:border-gray-600 rounded-lg overflow-hidden bg-white dark:bg-gray-900 flex-shrink-0">
|
||||
<div className="px-2 py-1 bg-black/60 text-white text-[10px] font-medium">Original + OCR</div>
|
||||
{sessionId && (
|
||||
<img
|
||||
src={`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/words-overlay`}
|
||||
alt="Original + OCR"
|
||||
className="absolute inset-0 w-full h-auto"
|
||||
style={{ height: `${panelHeight}px`, objectFit: 'contain' }}
|
||||
className="w-full h-auto"
|
||||
/>
|
||||
)}
|
||||
{showGrid && <CoordinateGrid imgW={imgW} imgH={imgH} scale={scale} spacing={gridSpacing} />}
|
||||
</div>
|
||||
|
||||
{/* RIGHT: Reconstruction */}
|
||||
<div className="flex-1 relative border border-gray-300 dark:border-gray-600 rounded-lg overflow-hidden bg-white dark:bg-gray-900">
|
||||
<div className="absolute top-0 left-0 px-2 py-0.5 bg-teal-600/80 text-white text-[10px] font-medium rounded-br z-20">Rekonstruktion</div>
|
||||
|
||||
{/* Render sections sequentially */}
|
||||
{sections.map((sec, si) => {
|
||||
if (sec.type === 'box' && sec.zone) {
|
||||
return (
|
||||
<BoxSectionRenderer
|
||||
key={`box-${si}`}
|
||||
zone={sec.zone}
|
||||
scale={scale}
|
||||
avgRowH={sec.avgRowH}
|
||||
/>
|
||||
)
|
||||
}
|
||||
if (sec.type === 'content' && sec.rows && contentZone) {
|
||||
return (
|
||||
<ContentSectionRenderer
|
||||
key={`sec-${si}`}
|
||||
zone={contentZone}
|
||||
rows={sec.rows}
|
||||
yStart={sec.yStart}
|
||||
scale={scale}
|
||||
avgRowH={dominantRowH}
|
||||
/>
|
||||
)
|
||||
}
|
||||
return null
|
||||
})}
|
||||
|
||||
{showGrid && <CoordinateGrid imgW={imgW} imgH={imgH} scale={scale} spacing={gridSpacing} />}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function makeContentSection(rows: GridRow[]): PageSection {
|
||||
const ys = rows.map((r) => r.y_min_px ?? (r as any).y_min ?? 0)
|
||||
const yEnd = rows[rows.length - 1].y_max_px ?? (rows[rows.length - 1] as any).y_max ?? ys[ys.length - 1] + 30
|
||||
let avgRowH = 35
|
||||
if (rows.length >= 2) {
|
||||
avgRowH = (ys[ys.length - 1] - ys[0]) / (rows.length - 1)
|
||||
}
|
||||
return { type: 'content', yStart: ys[0], yEnd, rows, avgRowH }
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Content section renderer — rows from content zone at absolute positions
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function ContentSectionRenderer({ zone, rows, yStart, scale, avgRowH }: {
|
||||
zone: GridZone; rows: GridRow[]; yStart: number; scale: number; avgRowH: number
|
||||
}) {
|
||||
const cellMap = new Map<string, GridEditorCell>()
|
||||
for (const cell of zone.cells) {
|
||||
cellMap.set(`${cell.row_index}_${cell.col_index}`, cell)
|
||||
}
|
||||
|
||||
const rowH = avgRowH * scale
|
||||
const fontSize = Math.max(7, rowH * 0.55)
|
||||
|
||||
return (
|
||||
<>
|
||||
{rows.map((row, ri) => {
|
||||
const rowY = (row.y_min_px ?? (row as any).y_min ?? 0) * scale
|
||||
const isSpanning = zone.cells.some((c) => c.row_index === row.index && c.col_type === 'spanning_header')
|
||||
|
||||
// Column widths
|
||||
const colWidths = zone.columns.map((col) => Math.max(5, ((col.x_max_px ?? 0) - (col.x_min_px ?? 0)) * scale))
|
||||
const zoneLeft = zone.bbox_px.x * scale
|
||||
const zoneWidth = zone.bbox_px.w * scale
|
||||
const totalColW = colWidths.reduce((s, w) => s + w, 0)
|
||||
const colScale = totalColW > 0 ? zoneWidth / totalColW : 1
|
||||
|
||||
return (
|
||||
<div
|
||||
key={row.index}
|
||||
className="absolute"
|
||||
style={{
|
||||
left: `${zoneLeft}px`,
|
||||
top: `${rowY}px`,
|
||||
width: `${zoneWidth}px`,
|
||||
height: `${rowH}px`,
|
||||
display: 'grid',
|
||||
gridTemplateColumns: colWidths.map((w) => `${(w * colScale).toFixed(1)}px`).join(' '),
|
||||
fontSize: `${fontSize}px`,
|
||||
lineHeight: `${rowH}px`,
|
||||
}}
|
||||
>
|
||||
{isSpanning ? (
|
||||
zone.cells
|
||||
.filter((c) => c.row_index === row.index && c.col_type === 'spanning_header')
|
||||
.sort((a, b) => a.col_index - b.col_index)
|
||||
.map((cell) => {
|
||||
const colspan = cell.colspan || zone.columns.length
|
||||
const color = getCellColor(cell)
|
||||
return (
|
||||
<div
|
||||
key={cell.cell_id}
|
||||
className={`overflow-hidden ${row.is_header ? 'font-bold' : ''}`}
|
||||
style={{ gridColumn: `${cell.col_index + 1} / ${cell.col_index + 1 + colspan}`, color: color || undefined }}
|
||||
>
|
||||
{cell.text}
|
||||
</div>
|
||||
)
|
||||
})
|
||||
) : (
|
||||
zone.columns.map((col) => {
|
||||
const cell = cellMap.get(`${row.index}_${col.index}`)
|
||||
const color = getCellColor(cell)
|
||||
const isBold = col.bold || cell?.is_bold || row.is_header
|
||||
return (
|
||||
<div
|
||||
key={col.index}
|
||||
className={`overflow-hidden text-ellipsis whitespace-nowrap ${isBold ? 'font-bold' : ''}`}
|
||||
style={{ color: color || undefined }}
|
||||
>
|
||||
{cell?.text ?? ''}
|
||||
</div>
|
||||
)
|
||||
})
|
||||
{/* RIGHT: Unified Grid Table */}
|
||||
<div className="flex-1 border border-gray-300 dark:border-gray-600 rounded-lg overflow-auto bg-white dark:bg-gray-900" style={{ maxHeight: `${Math.max(600, leftHeight)}px` }}>
|
||||
<div className="px-2 py-1 bg-teal-600/80 text-white text-[10px] font-medium sticky top-0 z-20">
|
||||
Unified Grid
|
||||
{unifiedGrid?.is_unified && (
|
||||
<span className="ml-2 opacity-70">
|
||||
({unifiedGrid.summary?.total_rows}×{unifiedGrid.summary?.total_columns})
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</>
|
||||
)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Box section renderer — box zone at absolute position with border
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function BoxSectionRenderer({ zone, scale, avgRowH }: {
|
||||
zone: GridZone; scale: number; avgRowH: number
|
||||
}) {
|
||||
const boxColor = (zone as any).box_bg_hex || '#6b7280'
|
||||
if (!zone.cells || zone.cells.length === 0) return null
|
||||
|
||||
const left = zone.bbox_px.x * scale
|
||||
const top = zone.bbox_px.y * scale
|
||||
const width = zone.bbox_px.w * scale
|
||||
const height = zone.bbox_px.h * scale
|
||||
const rowH = avgRowH * scale
|
||||
const fontSize = Math.max(7, rowH * 0.5)
|
||||
|
||||
const cellMap = new Map<string, GridEditorCell>()
|
||||
for (const cell of zone.cells) {
|
||||
cellMap.set(`${cell.row_index}_${cell.col_index}`, cell)
|
||||
}
|
||||
|
||||
const colWidths = zone.columns.map((col) => Math.max(5, ((col.x_max_px ?? 0) - (col.x_min_px ?? 0)) * scale))
|
||||
const totalColW = colWidths.reduce((s, w) => s + w, 0)
|
||||
const colScale = totalColW > 0 ? width / totalColW : 1
|
||||
const numCols = zone.columns.length
|
||||
|
||||
// Distribute box height proportionally by text line count per row
|
||||
const rowLineCounts = zone.rows.map((row) => {
|
||||
const maxLines = Math.max(1, ...zone.cells
|
||||
.filter((c) => c.row_index === row.index)
|
||||
.map((c) => (c.text ?? '').split('\n').length))
|
||||
return maxLines
|
||||
})
|
||||
const totalLines = rowLineCounts.reduce((s, n) => s + n, 0)
|
||||
const lineUnitH = totalLines > 0 ? height / totalLines : height
|
||||
|
||||
return (
|
||||
<div
|
||||
className="absolute"
|
||||
style={{
|
||||
left: `${left}px`,
|
||||
top: `${top}px`,
|
||||
width: `${width}px`,
|
||||
height: `${height}px`,
|
||||
border: `${Math.max(1.5, 2 * scale)}px solid ${boxColor}`,
|
||||
backgroundColor: `${boxColor}0a`,
|
||||
borderRadius: `${Math.max(1, 3 * scale)}px`,
|
||||
fontSize: `${fontSize}px`,
|
||||
lineHeight: '1.3',
|
||||
}}
|
||||
>
|
||||
<div style={{ display: 'grid', gridTemplateColumns: colWidths.map((w) => `${(w * colScale).toFixed(1)}px`).join(' ') }}>
|
||||
{zone.rows.map((row, rowIdx) => {
|
||||
const isSpanning = zone.cells.some((c) => c.row_index === row.index && c.col_type === 'spanning_header')
|
||||
|
||||
// Height proportional to text line count
|
||||
const rowLines = rowLineCounts[rowIdx] || 1
|
||||
const cellRowH = lineUnitH * rowLines
|
||||
|
||||
return (
|
||||
<div key={row.index} style={{ display: 'contents' }}>
|
||||
{isSpanning ? (
|
||||
zone.cells
|
||||
.filter((c) => c.row_index === row.index && c.col_type === 'spanning_header')
|
||||
.sort((a, b) => a.col_index - b.col_index)
|
||||
.map((cell) => {
|
||||
const colspan = cell.colspan || numCols
|
||||
const color = getCellColor(cell)
|
||||
return (
|
||||
<div
|
||||
key={cell.cell_id}
|
||||
className={`px-1 overflow-hidden ${row.is_header ? 'font-bold' : ''}`}
|
||||
style={{
|
||||
gridColumn: `${cell.col_index + 1} / ${cell.col_index + 1 + colspan}`,
|
||||
height: `${cellRowH}px`,
|
||||
color: color || undefined,
|
||||
whiteSpace: 'pre-wrap',
|
||||
display: 'flex',
|
||||
alignItems: 'center',
|
||||
}}
|
||||
>
|
||||
{cell.text}
|
||||
</div>
|
||||
)
|
||||
})
|
||||
) : (
|
||||
zone.columns.map((col) => {
|
||||
const cell = cellMap.get(`${row.index}_${col.index}`)
|
||||
const color = getCellColor(cell)
|
||||
const isBold = col.bold || cell?.is_bold || row.is_header
|
||||
const text = cell?.text ?? ''
|
||||
const isMultiLine = text.includes('\n')
|
||||
|
||||
return (
|
||||
<div
|
||||
key={col.index}
|
||||
className={`px-1 overflow-hidden ${isBold ? 'font-bold' : ''}`}
|
||||
style={{
|
||||
height: `${cellRowH}px`,
|
||||
color: color || undefined,
|
||||
whiteSpace: isMultiLine ? 'pre-wrap' : 'nowrap',
|
||||
textOverflow: isMultiLine ? undefined : 'ellipsis',
|
||||
display: 'flex',
|
||||
alignItems: isMultiLine ? 'flex-start' : 'center',
|
||||
paddingLeft: isMultiLine ? `${fontSize * 0.5}px` : undefined,
|
||||
}}
|
||||
>
|
||||
{text}
|
||||
</div>
|
||||
)
|
||||
})
|
||||
)}
|
||||
{unifiedZone ? (
|
||||
<GridTable
|
||||
zone={unifiedZone}
|
||||
selectedCell={selectedCell}
|
||||
selectedCells={selectedCells}
|
||||
onSelectCell={setSelectedCell}
|
||||
onCellTextChange={updateCellText}
|
||||
onToggleColumnBold={toggleColumnBold}
|
||||
onToggleRowHeader={toggleRowHeader}
|
||||
onNavigate={(cellId, dir) => {
|
||||
const next = getAdjacentCell(cellId, dir)
|
||||
if (next) setSelectedCell(next)
|
||||
}}
|
||||
onDeleteColumn={deleteColumn}
|
||||
onAddColumn={addColumn}
|
||||
onDeleteRow={deleteRow}
|
||||
onAddRow={addRow}
|
||||
onToggleCellSelection={toggleCellSelection}
|
||||
onSetCellColor={setCellColor}
|
||||
/>
|
||||
) : (
|
||||
<div className="p-8 text-center text-gray-400">
|
||||
<p>Kein Unified Grid verfügbar.</p>
|
||||
<button onClick={buildUnified} className="mt-2 text-teal-600 text-sm">Jetzt aufbauen</button>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Coordinate grid
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function CoordinateGrid({ imgW, imgH, scale, spacing }: {
|
||||
imgW: number; imgH: number; scale: number; spacing: number
|
||||
}) {
|
||||
const lines: JSX.Element[] = []
|
||||
for (let x = 0; x <= imgW; x += spacing) {
|
||||
const px = x * scale
|
||||
lines.push(
|
||||
<div key={`v${x}`} className="absolute top-0 bottom-0 pointer-events-none" style={{ left: `${px}px`, width: '1px', background: 'rgba(0,150,255,0.2)' }}>
|
||||
<span className="absolute top-0 left-1 text-[8px] text-blue-400 font-mono">{x}</span>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
for (let y = 0; y <= imgH; y += spacing) {
|
||||
const px = y * scale
|
||||
lines.push(
|
||||
<div key={`h${y}`} className="absolute left-0 right-0 pointer-events-none" style={{ top: `${px}px`, height: '1px', background: 'rgba(0,150,255,0.2)' }}>
|
||||
<span className="absolute left-1 top-0.5 text-[8px] text-blue-400 font-mono">{y}</span>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
return <>{lines}</>
|
||||
}
|
||||
|
||||
@@ -2350,3 +2350,62 @@ async def build_box_grids(session_id: str, request: Request):
|
||||
"spell_fixes": spell_fixes,
|
||||
"zones": zones,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Unified Grid endpoint
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.post("/sessions/{session_id}/build-unified-grid")
|
||||
async def build_unified_grid_endpoint(session_id: str):
|
||||
"""Build a single-zone unified grid merging content + box zones.
|
||||
|
||||
Takes the existing multi-zone grid_editor_result and produces a
|
||||
unified grid where boxes are integrated into the main row sequence.
|
||||
Persists as unified_grid_result (preserves original multi-zone data).
|
||||
"""
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
|
||||
grid_data = session.get("grid_editor_result")
|
||||
if not grid_data:
|
||||
raise HTTPException(status_code=400, detail="No grid data. Run build-grid first.")
|
||||
|
||||
from unified_grid import build_unified_grid
|
||||
|
||||
result = build_unified_grid(
|
||||
zones=grid_data.get("zones", []),
|
||||
image_width=grid_data.get("image_width", 0),
|
||||
image_height=grid_data.get("image_height", 0),
|
||||
layout_metrics=grid_data.get("layout_metrics", {}),
|
||||
)
|
||||
|
||||
# Persist as separate field (don't overwrite original multi-zone grid)
|
||||
await update_session_db(session_id, unified_grid_result=result)
|
||||
|
||||
logger.info(
|
||||
"build-unified-grid session %s: %d rows, %d cells",
|
||||
session_id,
|
||||
result.get("summary", {}).get("total_rows", 0),
|
||||
result.get("summary", {}).get("total_cells", 0),
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@router.get("/sessions/{session_id}/unified-grid")
|
||||
async def get_unified_grid(session_id: str):
|
||||
"""Retrieve the unified grid for a session."""
|
||||
session = await get_session_db(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
|
||||
|
||||
result = session.get("unified_grid_result")
|
||||
if not result:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="No unified grid. Run build-unified-grid first.",
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
425
klausur-service/backend/unified_grid.py
Normal file
425
klausur-service/backend/unified_grid.py
Normal file
@@ -0,0 +1,425 @@
|
||||
"""
|
||||
Unified Grid Builder — merges multi-zone grid into a single Excel-like grid.
|
||||
|
||||
Takes content zone + box zones and produces one unified zone where:
|
||||
- All content rows use the dominant row height
|
||||
- Full-width boxes are integrated directly (box rows replace standard rows)
|
||||
- Partial-width boxes: extra rows inserted if box has more lines than standard
|
||||
- Box-origin cells carry metadata (bg_color, border) for visual distinction
|
||||
|
||||
The result is a single-zone StructuredGrid that can be:
|
||||
- Rendered in an Excel-like editor
|
||||
- Exported to Excel/CSV
|
||||
- Edited with unified row/column numbering
|
||||
"""
|
||||
|
||||
import logging
|
||||
import math
|
||||
import statistics
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _compute_dominant_row_height(content_zone: Dict) -> float:
|
||||
"""Median of content row-to-row spacings, excluding box-gap jumps."""
|
||||
rows = content_zone.get("rows", [])
|
||||
if len(rows) < 2:
|
||||
return 47.0
|
||||
|
||||
spacings = []
|
||||
for i in range(len(rows) - 1):
|
||||
y1 = rows[i].get("y_min_px", rows[i].get("y_min", 0))
|
||||
y2 = rows[i + 1].get("y_min_px", rows[i + 1].get("y_min", 0))
|
||||
d = y2 - y1
|
||||
if 0 < d < 100: # exclude box-gap jumps
|
||||
spacings.append(d)
|
||||
|
||||
if not spacings:
|
||||
return 47.0
|
||||
spacings.sort()
|
||||
return spacings[len(spacings) // 2]
|
||||
|
||||
|
||||
def _classify_boxes(
|
||||
box_zones: List[Dict],
|
||||
content_width: float,
|
||||
) -> List[Dict]:
|
||||
"""Classify each box as full_width or partial_width."""
|
||||
result = []
|
||||
for bz in box_zones:
|
||||
bb = bz.get("bbox_px", {})
|
||||
bw = bb.get("w", 0)
|
||||
bx = bb.get("x", 0)
|
||||
|
||||
if bw >= content_width * 0.85:
|
||||
classification = "full_width"
|
||||
side = "center"
|
||||
else:
|
||||
classification = "partial_width"
|
||||
# Determine which side of the page the box is on
|
||||
page_center = content_width / 2
|
||||
box_center = bx + bw / 2
|
||||
side = "right" if box_center > page_center else "left"
|
||||
|
||||
# Count total text lines in box (including \n within cells)
|
||||
total_lines = sum(
|
||||
(c.get("text", "").count("\n") + 1)
|
||||
for c in bz.get("cells", [])
|
||||
)
|
||||
|
||||
result.append({
|
||||
"zone": bz,
|
||||
"classification": classification,
|
||||
"side": side,
|
||||
"y_start": bb.get("y", 0),
|
||||
"y_end": bb.get("y", 0) + bb.get("h", 0),
|
||||
"total_lines": total_lines,
|
||||
"bg_hex": bz.get("box_bg_hex", ""),
|
||||
"bg_color": bz.get("box_bg_color", ""),
|
||||
})
|
||||
return result
|
||||
|
||||
|
||||
def build_unified_grid(
|
||||
zones: List[Dict],
|
||||
image_width: int,
|
||||
image_height: int,
|
||||
layout_metrics: Dict,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build a single-zone unified grid from multi-zone grid data.
|
||||
|
||||
Returns a StructuredGrid with one zone containing all rows and cells.
|
||||
"""
|
||||
content_zone = None
|
||||
box_zones = []
|
||||
for z in zones:
|
||||
if z.get("zone_type") == "content":
|
||||
content_zone = z
|
||||
elif z.get("zone_type") == "box":
|
||||
box_zones.append(z)
|
||||
|
||||
if not content_zone:
|
||||
logger.warning("build_unified_grid: no content zone found")
|
||||
return {"zones": zones} # fallback: return as-is
|
||||
|
||||
box_zones.sort(key=lambda b: b.get("bbox_px", {}).get("y", 0))
|
||||
|
||||
dominant_h = _compute_dominant_row_height(content_zone)
|
||||
content_bbox = content_zone.get("bbox_px", {})
|
||||
content_width = content_bbox.get("w", image_width)
|
||||
content_x = content_bbox.get("x", 0)
|
||||
content_cols = content_zone.get("columns", [])
|
||||
num_cols = len(content_cols)
|
||||
|
||||
box_infos = _classify_boxes(box_zones, content_width)
|
||||
|
||||
logger.info(
|
||||
"build_unified_grid: dominant_h=%.1f, %d content rows, %d boxes (%s)",
|
||||
dominant_h, len(content_zone.get("rows", [])), len(box_infos),
|
||||
[b["classification"] for b in box_infos],
|
||||
)
|
||||
|
||||
# --- Build unified row list + cell list ---
|
||||
unified_rows: List[Dict] = []
|
||||
unified_cells: List[Dict] = []
|
||||
unified_row_idx = 0
|
||||
|
||||
# Content rows and cells indexed by row_index
|
||||
content_rows = content_zone.get("rows", [])
|
||||
content_cells = content_zone.get("cells", [])
|
||||
content_cells_by_row: Dict[int, List[Dict]] = {}
|
||||
for c in content_cells:
|
||||
content_cells_by_row.setdefault(c.get("row_index", -1), []).append(c)
|
||||
|
||||
# Track which content rows we've processed
|
||||
content_row_ptr = 0
|
||||
|
||||
for bi, box_info in enumerate(box_infos):
|
||||
bz = box_info["zone"]
|
||||
by_start = box_info["y_start"]
|
||||
by_end = box_info["y_end"]
|
||||
|
||||
# --- Add content rows ABOVE this box ---
|
||||
while content_row_ptr < len(content_rows):
|
||||
cr = content_rows[content_row_ptr]
|
||||
cry = cr.get("y_min_px", cr.get("y_min", 0))
|
||||
if cry >= by_start:
|
||||
break
|
||||
# Add this content row
|
||||
_add_content_row(
|
||||
unified_rows, unified_cells, unified_row_idx,
|
||||
cr, content_cells_by_row, dominant_h, image_height,
|
||||
)
|
||||
unified_row_idx += 1
|
||||
content_row_ptr += 1
|
||||
|
||||
# --- Add box rows ---
|
||||
if box_info["classification"] == "full_width":
|
||||
# Full-width box: integrate box rows directly
|
||||
_add_full_width_box(
|
||||
unified_rows, unified_cells, unified_row_idx,
|
||||
bz, box_info, dominant_h, num_cols, image_height,
|
||||
)
|
||||
unified_row_idx += len(bz.get("rows", []))
|
||||
# Skip content rows that overlap with this box
|
||||
while content_row_ptr < len(content_rows):
|
||||
cr = content_rows[content_row_ptr]
|
||||
cry = cr.get("y_min_px", cr.get("y_min", 0))
|
||||
if cry > by_end:
|
||||
break
|
||||
content_row_ptr += 1
|
||||
|
||||
else:
|
||||
# Partial-width box: merge with adjacent content rows
|
||||
unified_row_idx = _add_partial_width_box(
|
||||
unified_rows, unified_cells, unified_row_idx,
|
||||
bz, box_info, content_rows, content_cells_by_row,
|
||||
content_row_ptr, dominant_h, num_cols, image_height,
|
||||
content_x, content_width,
|
||||
)
|
||||
# Advance content pointer past box region
|
||||
while content_row_ptr < len(content_rows):
|
||||
cr = content_rows[content_row_ptr]
|
||||
cry = cr.get("y_min_px", cr.get("y_min", 0))
|
||||
if cry > by_end:
|
||||
break
|
||||
content_row_ptr += 1
|
||||
|
||||
# --- Add remaining content rows BELOW all boxes ---
|
||||
while content_row_ptr < len(content_rows):
|
||||
cr = content_rows[content_row_ptr]
|
||||
_add_content_row(
|
||||
unified_rows, unified_cells, unified_row_idx,
|
||||
cr, content_cells_by_row, dominant_h, image_height,
|
||||
)
|
||||
unified_row_idx += 1
|
||||
content_row_ptr += 1
|
||||
|
||||
# --- Build unified zone ---
|
||||
unified_zone = {
|
||||
"zone_index": 0,
|
||||
"zone_type": "unified",
|
||||
"bbox_px": content_bbox,
|
||||
"bbox_pct": content_zone.get("bbox_pct", {}),
|
||||
"border": None,
|
||||
"word_count": sum(len(c.get("word_boxes", [])) for c in unified_cells),
|
||||
"columns": content_cols,
|
||||
"rows": unified_rows,
|
||||
"cells": unified_cells,
|
||||
"header_rows": [],
|
||||
}
|
||||
|
||||
logger.info(
|
||||
"build_unified_grid: %d unified rows, %d cells (from %d content + %d box zones)",
|
||||
len(unified_rows), len(unified_cells),
|
||||
len(content_rows), len(box_zones),
|
||||
)
|
||||
|
||||
return {
|
||||
"zones": [unified_zone],
|
||||
"image_width": image_width,
|
||||
"image_height": image_height,
|
||||
"layout_metrics": layout_metrics,
|
||||
"summary": {
|
||||
"total_zones": 1,
|
||||
"total_columns": num_cols,
|
||||
"total_rows": len(unified_rows),
|
||||
"total_cells": len(unified_cells),
|
||||
},
|
||||
"is_unified": True,
|
||||
"dominant_row_h": dominant_h,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _make_row(idx: int, y: float, h: float, img_h: int, is_header: bool = False) -> Dict:
|
||||
return {
|
||||
"index": idx,
|
||||
"row_index": idx,
|
||||
"y_min_px": round(y),
|
||||
"y_max_px": round(y + h),
|
||||
"y_min_pct": round(y / img_h * 100, 2) if img_h else 0,
|
||||
"y_max_pct": round((y + h) / img_h * 100, 2) if img_h else 0,
|
||||
"is_header": is_header,
|
||||
}
|
||||
|
||||
|
||||
def _remap_cell(cell: Dict, new_row: int, new_col: int = None,
|
||||
source_type: str = "content", box_region: Dict = None) -> Dict:
|
||||
"""Create a new cell dict with remapped indices."""
|
||||
c = dict(cell)
|
||||
c["row_index"] = new_row
|
||||
if new_col is not None:
|
||||
c["col_index"] = new_col
|
||||
c["cell_id"] = f"U_R{new_row:02d}_C{c.get('col_index', 0)}"
|
||||
c["source_zone_type"] = source_type
|
||||
if box_region:
|
||||
c["box_region"] = box_region
|
||||
return c
|
||||
|
||||
|
||||
def _add_content_row(
|
||||
unified_rows, unified_cells, row_idx,
|
||||
content_row, cells_by_row, dominant_h, img_h,
|
||||
):
|
||||
"""Add a single content row to the unified grid."""
|
||||
y = content_row.get("y_min_px", content_row.get("y_min", 0))
|
||||
is_hdr = content_row.get("is_header", False)
|
||||
unified_rows.append(_make_row(row_idx, y, dominant_h, img_h, is_hdr))
|
||||
|
||||
for cell in cells_by_row.get(content_row.get("index", -1), []):
|
||||
unified_cells.append(_remap_cell(cell, row_idx, source_type="content"))
|
||||
|
||||
|
||||
def _add_full_width_box(
|
||||
unified_rows, unified_cells, start_row_idx,
|
||||
box_zone, box_info, dominant_h, num_cols, img_h,
|
||||
):
|
||||
"""Add a full-width box's rows to the unified grid."""
|
||||
box_rows = box_zone.get("rows", [])
|
||||
box_cells = box_zone.get("cells", [])
|
||||
box_region = {"bg_hex": box_info["bg_hex"], "bg_color": box_info["bg_color"], "border": True}
|
||||
|
||||
# Distribute box height evenly among its rows
|
||||
box_h = box_info["y_end"] - box_info["y_start"]
|
||||
row_h = box_h / len(box_rows) if box_rows else dominant_h
|
||||
|
||||
for i, br in enumerate(box_rows):
|
||||
y = box_info["y_start"] + i * row_h
|
||||
new_idx = start_row_idx + i
|
||||
is_hdr = br.get("is_header", False)
|
||||
unified_rows.append(_make_row(new_idx, y, row_h, img_h, is_hdr))
|
||||
|
||||
for cell in box_cells:
|
||||
if cell.get("row_index") == br.get("index", i):
|
||||
unified_cells.append(
|
||||
_remap_cell(cell, new_idx, source_type="box", box_region=box_region)
|
||||
)
|
||||
|
||||
|
||||
def _add_partial_width_box(
|
||||
unified_rows, unified_cells, start_row_idx,
|
||||
box_zone, box_info, content_rows, content_cells_by_row,
|
||||
content_row_ptr, dominant_h, num_cols, img_h,
|
||||
content_x, content_width,
|
||||
) -> int:
|
||||
"""Add a partial-width box merged with content rows.
|
||||
|
||||
Returns the next unified_row_idx after processing.
|
||||
"""
|
||||
by_start = box_info["y_start"]
|
||||
by_end = box_info["y_end"]
|
||||
box_h = by_end - by_start
|
||||
box_region = {"bg_hex": box_info["bg_hex"], "bg_color": box_info["bg_color"], "border": True}
|
||||
|
||||
# Content rows in the box's Y range
|
||||
overlap_content_rows = []
|
||||
ptr = content_row_ptr
|
||||
while ptr < len(content_rows):
|
||||
cr = content_rows[ptr]
|
||||
cry = cr.get("y_min_px", cr.get("y_min", 0))
|
||||
if cry > by_end:
|
||||
break
|
||||
if cry >= by_start:
|
||||
overlap_content_rows.append(cr)
|
||||
ptr += 1
|
||||
|
||||
# How many standard rows fit in the box height
|
||||
standard_rows = max(1, math.floor(box_h / dominant_h))
|
||||
# How many text lines the box actually has
|
||||
box_text_lines = box_info["total_lines"]
|
||||
# Extra rows needed
|
||||
extra_rows = max(0, box_text_lines - standard_rows)
|
||||
total_rows_for_region = standard_rows + extra_rows
|
||||
|
||||
logger.info(
|
||||
"partial box: standard=%d, box_lines=%d, extra=%d, content_overlap=%d",
|
||||
standard_rows, box_text_lines, extra_rows, len(overlap_content_rows),
|
||||
)
|
||||
|
||||
# Determine which columns the box occupies
|
||||
box_bb = box_zone.get("bbox_px", {})
|
||||
box_x = box_bb.get("x", 0)
|
||||
box_w = box_bb.get("w", 0)
|
||||
|
||||
# Map box to content columns: find which content columns overlap
|
||||
box_col_start = 0
|
||||
box_col_end = num_cols
|
||||
content_cols_list = []
|
||||
for z_col_idx in range(num_cols):
|
||||
# Find the column definition by checking all column entries
|
||||
# Simple heuristic: if box starts past halfway, it's the right columns
|
||||
pass
|
||||
|
||||
# Simpler approach: box on right side → last N columns
|
||||
# box on left side → first N columns
|
||||
if box_info["side"] == "right":
|
||||
# Box starts at x=box_x. Find first content column that overlaps
|
||||
box_col_start = num_cols # default: beyond all columns
|
||||
for z in (box_zone.get("columns") or [{"index": 0}]):
|
||||
pass
|
||||
# Use content column positions to determine overlap
|
||||
content_cols_data = [
|
||||
{"idx": c.get("index", i), "x_min": c.get("x_min_px", 0), "x_max": c.get("x_max_px", 0)}
|
||||
for i, c in enumerate(content_rows[0:0] or []) # placeholder
|
||||
]
|
||||
# Simple: split columns at midpoint
|
||||
box_col_start = num_cols // 2 # right half
|
||||
box_col_end = num_cols
|
||||
else:
|
||||
box_col_start = 0
|
||||
box_col_end = num_cols // 2
|
||||
|
||||
# Build rows for this region
|
||||
box_cells = box_zone.get("cells", [])
|
||||
box_rows = box_zone.get("rows", [])
|
||||
row_idx = start_row_idx
|
||||
|
||||
# Expand box cell texts with \n into individual lines for row mapping
|
||||
box_lines: List[Tuple[str, Dict]] = [] # (text_line, parent_cell)
|
||||
for bc in sorted(box_cells, key=lambda c: c.get("row_index", 0)):
|
||||
text = bc.get("text", "")
|
||||
for line in text.split("\n"):
|
||||
box_lines.append((line.strip(), bc))
|
||||
|
||||
for i in range(total_rows_for_region):
|
||||
y = by_start + i * dominant_h
|
||||
unified_rows.append(_make_row(row_idx, y, dominant_h, img_h))
|
||||
|
||||
# Content cells for this row (from overlapping content rows)
|
||||
if i < len(overlap_content_rows):
|
||||
cr = overlap_content_rows[i]
|
||||
for cell in content_cells_by_row.get(cr.get("index", -1), []):
|
||||
# Only include cells from columns NOT covered by the box
|
||||
ci = cell.get("col_index", 0)
|
||||
if ci < box_col_start or ci >= box_col_end:
|
||||
unified_cells.append(_remap_cell(cell, row_idx, source_type="content"))
|
||||
|
||||
# Box cell for this row
|
||||
if i < len(box_lines):
|
||||
line_text, parent_cell = box_lines[i]
|
||||
box_cell = {
|
||||
"cell_id": f"U_R{row_idx:02d}_C{box_col_start}",
|
||||
"row_index": row_idx,
|
||||
"col_index": box_col_start,
|
||||
"col_type": "spanning_header" if (box_col_end - box_col_start) > 1 else parent_cell.get("col_type", "column_1"),
|
||||
"colspan": box_col_end - box_col_start,
|
||||
"text": line_text,
|
||||
"confidence": parent_cell.get("confidence", 0),
|
||||
"bbox_px": parent_cell.get("bbox_px", {}),
|
||||
"bbox_pct": parent_cell.get("bbox_pct", {}),
|
||||
"word_boxes": [],
|
||||
"ocr_engine": parent_cell.get("ocr_engine", ""),
|
||||
"is_bold": parent_cell.get("is_bold", False),
|
||||
"source_zone_type": "box",
|
||||
"box_region": box_region,
|
||||
}
|
||||
unified_cells.append(box_cell)
|
||||
|
||||
row_idx += 1
|
||||
|
||||
return row_idx
|
||||
Reference in New Issue
Block a user