From dcb873db35947e8e9684a816c32b7753d58a33ca Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Tue, 14 Apr 2026 15:29:40 +0200 Subject: [PATCH] StepAnsicht: section-based layout with averaged row heights Major rewrite of reconstruction rendering: - Page split into vertical sections (content/box) around box boundaries - Content sections: uniform row height = (last_row - first_row) / (n-1) - Box sections: rows evenly distributed within box height - Content rows positioned absolutely at original y-coordinates - Font size derived from row height (55% of row height) - Multi-line cells (bullets) get expanded height with indentation - Boxes render at exact bbox position with colored border - Preparation for unified grid where boxes become part of main grid Co-Authored-By: Claude Opus 4.6 (1M context) --- .../components/ocr-kombi/StepAnsicht.tsx | 395 +++++++++++++----- 1 file changed, 285 insertions(+), 110 deletions(-) diff --git a/admin-lehrer/components/ocr-kombi/StepAnsicht.tsx b/admin-lehrer/components/ocr-kombi/StepAnsicht.tsx index 8d7c427..becd9d3 100644 --- a/admin-lehrer/components/ocr-kombi/StepAnsicht.tsx +++ b/admin-lehrer/components/ocr-kombi/StepAnsicht.tsx @@ -3,15 +3,18 @@ /** * StepAnsicht — Split-view page layout comparison. * - * Left: Original scan with OCR word overlay (red) + coordinate grid - * Right: Reconstructed layout with all zones + coordinate grid + * Left: Original scan with OCR word overlay + * Right: Reconstructed layout with averaged row heights per section * - * Both sides share the same coordinate system for easy visual comparison. + * Layout principle: the page is divided into vertical sections separated + * by boxes. Each section gets a uniform row height calculated from + * (last_row_y - first_row_y) / (num_rows - 1). Boxes are rendered + * inline between sections (not as floating overlays). */ -import { useEffect, useRef, useState } from 'react' +import { useEffect, useMemo, useRef, useState } from 'react' import { useGridEditor } from '@/components/grid-editor/useGridEditor' -import type { GridZone, GridEditorCell } from '@/components/grid-editor/types' +import type { GridZone, GridEditorCell, GridRow } from '@/components/grid-editor/types' const KLAUSUR_API = '/klausur-api' @@ -20,6 +23,16 @@ interface StepAnsichtProps { onNext: () => void } +/** A vertical section of the page: either content rows or a box zone. */ +interface PageSection { + type: 'content' | 'box' + yStart: number // pixel y in original image + yEnd: number // pixel y end + zone?: GridZone // for box sections + rows?: GridRow[] // for content sections — subset of content zone rows + avgRowH: number // averaged row height in original pixels +} + function getCellColor(cell: GridEditorCell | undefined): string | null { if (!cell) return null if (cell.color_override) return cell.color_override @@ -33,13 +46,12 @@ export function StepAnsicht({ sessionId, onNext }: StepAnsichtProps) { const leftRef = useRef(null) const [panelWidth, setPanelWidth] = useState(0) const [showGrid, setShowGrid] = useState(true) - const [gridSpacing, setGridSpacing] = useState(100) // px in original coordinates + const [gridSpacing, setGridSpacing] = useState(100) useEffect(() => { if (sessionId) loadGrid() }, [sessionId]) // eslint-disable-line react-hooks/exhaustive-deps - // Track panel width useEffect(() => { if (!leftRef.current) return const ro = new ResizeObserver(([entry]) => setPanelWidth(entry.contentRect.width)) @@ -47,6 +59,85 @@ export function StepAnsicht({ sessionId, onNext }: StepAnsichtProps) { return () => ro.disconnect() }, []) + // Build page sections: split content rows around box zones + const sections = useMemo(() => { + if (!grid) return [] + const contentZone = grid.zones.find((z) => z.zone_type === 'content') + const boxZones = grid.zones.filter((z) => z.zone_type === 'box') + .sort((a, b) => a.bbox_px.y - b.bbox_px.y) + + if (!contentZone) return [] + + const allRows = contentZone.rows + const result: PageSection[] = [] + + // Box boundaries sorted by y + const boxBounds = boxZones.map((bz) => ({ + zone: bz, + yStart: bz.bbox_px.y, + yEnd: bz.bbox_px.y + bz.bbox_px.h, + })) + + // Split content rows into sections around boxes + let currentRows: GridRow[] = [] + let boxIdx = 0 + + for (const row of allRows) { + const ry = row.y_min_px ?? (row as any).y_min ?? 0 + + // Check if we've passed a box boundary — insert box section + while (boxIdx < boxBounds.length && ry >= boxBounds[boxIdx].yStart) { + // Flush current content section + if (currentRows.length > 0) { + result.push(makeContentSection(currentRows)) + currentRows = [] + } + // Insert box section + const bb = boxBounds[boxIdx] + const bRows = bb.zone.rows || [] + let bAvgH = 35 + if (bRows.length >= 2) { + const bys = bRows.map((r) => r.y_min_px ?? (r as any).y_min ?? 0) + bAvgH = (bys[bys.length - 1] - bys[0]) / (bRows.length - 1) + } + result.push({ + type: 'box', + yStart: bb.yStart, + yEnd: bb.yEnd, + zone: bb.zone, + avgRowH: bAvgH, + }) + boxIdx++ + } + + // Skip rows that fall inside a box boundary + const insideBox = boxBounds.some((bb) => ry >= bb.yStart && ry <= bb.yEnd) + if (!insideBox) { + currentRows.push(row) + } + } + + // Flush remaining content rows + if (currentRows.length > 0) { + result.push(makeContentSection(currentRows)) + } + + // Insert remaining boxes (if any rows didn't trigger them) + while (boxIdx < boxBounds.length) { + const bb = boxBounds[boxIdx] + const bRows = bb.zone.rows || [] + let bAvgH = 35 + if (bRows.length >= 2) { + const bys = bRows.map((r) => r.y_min_px ?? (r as any).y_min ?? 0) + bAvgH = (bys[bys.length - 1] - bys[0]) / (bRows.length - 1) + } + result.push({ type: 'box', yStart: bb.yStart, yEnd: bb.yEnd, zone: bb.zone, avgRowH: bAvgH }) + boxIdx++ + } + + return result + }, [grid]) + if (loading) { return (
@@ -69,82 +160,79 @@ export function StepAnsicht({ sessionId, onNext }: StepAnsichtProps) { const imgH = grid.image_height || 1 const scale = panelWidth > 0 ? panelWidth / imgW : 0.5 const panelHeight = imgH * scale - - const baseFontPx = (grid as any).layout_metrics?.font_size_suggestion_px || 14 - const avgRowH = (grid as any).layout_metrics?.avg_row_height_px || 31 - const scaledFont = Math.max(7, baseFontPx * scale) + const contentZone = grid.zones.find((z) => z.zone_type === 'content') return (
{/* Header */}
-

- Ansicht — Original vs. Rekonstruktion -

+

Ansicht — Original vs. Rekonstruktion

- Links: Original mit OCR-Overlay. Rechts: Rekonstruierte Seite. Koordinatengitter zum Abgleich. + Links: Original mit OCR. Rechts: Rekonstruktion mit gemittelten Zeilenhöhen.

- setGridSpacing(Number(e.target.value))} className="text-xs px-1.5 py-1 rounded border border-gray-300 dark:border-gray-600 bg-white dark:bg-gray-700"> - +
{/* Split view */} -
0 ? `${panelHeight + 40}px` : '600px' }}> +
{/* LEFT: Original + OCR overlay */}
-
- Original + OCR -
- - {/* Server-rendered OCR overlay image (scan + red snapped letters) */} +
Original + OCR
{sessionId && ( Original + OCR Overlay )} - - {/* Coordinate grid */} {showGrid && }
{/* RIGHT: Reconstruction */}
-
- Rekonstruktion -
+
Rekonstruktion
- {/* Rendered zones */} - {grid.zones.map((zone) => ( - - ))} + {/* Render sections sequentially */} + {sections.map((sec, si) => { + if (sec.type === 'box' && sec.zone) { + return ( + + ) + } + if (sec.type === 'content' && sec.rows && contentZone) { + return ( + + ) + } + return null + })} - {/* Coordinate grid */} {showGrid && }
@@ -153,70 +241,133 @@ export function StepAnsicht({ sessionId, onNext }: StepAnsichtProps) { } // --------------------------------------------------------------------------- -// Coordinate grid overlay +// Helpers // --------------------------------------------------------------------------- -function CoordinateGrid({ imgW, imgH, scale, spacing }: { - imgW: number; imgH: number; scale: number; spacing: number -}) { - const lines: JSX.Element[] = [] - - // Vertical lines - for (let x = 0; x <= imgW; x += spacing) { - const px = x * scale - lines.push( -
- {x} -
- ) +function makeContentSection(rows: GridRow[]): PageSection { + const ys = rows.map((r) => r.y_min_px ?? (r as any).y_min ?? 0) + const yEnd = rows[rows.length - 1].y_max_px ?? (rows[rows.length - 1] as any).y_max ?? ys[ys.length - 1] + 30 + let avgRowH = 35 + if (rows.length >= 2) { + avgRowH = (ys[ys.length - 1] - ys[0]) / (rows.length - 1) } - - // Horizontal lines - for (let y = 0; y <= imgH; y += spacing) { - const px = y * scale - lines.push( -
- {y} -
- ) - } - - return <>{lines} + return { type: 'content', yStart: ys[0], yEnd, rows, avgRowH } } // --------------------------------------------------------------------------- -// Zone renderer (reconstruction side) +// Content section renderer — rows from content zone at absolute positions // --------------------------------------------------------------------------- -function ZoneRenderer({ zone, scale, fontSize, avgRowH }: { - zone: GridZone; scale: number; fontSize: number; avgRowH: number +function ContentSectionRenderer({ zone, rows, yStart, scale, avgRowH }: { + zone: GridZone; rows: GridRow[]; yStart: number; scale: number; avgRowH: number }) { - const isBox = zone.zone_type === 'box' - const boxColor = (zone as any).box_bg_hex || '#6b7280' + const cellMap = new Map() + for (const cell of zone.cells) { + cellMap.set(`${cell.row_index}_${cell.col_index}`, cell) + } + const rowH = avgRowH * scale + const fontSize = Math.max(7, rowH * 0.55) + + return ( + <> + {rows.map((row, ri) => { + const rowY = (row.y_min_px ?? (row as any).y_min ?? 0) * scale + const isSpanning = zone.cells.some((c) => c.row_index === row.index && c.col_type === 'spanning_header') + + // Column widths + const colWidths = zone.columns.map((col) => Math.max(5, ((col.x_max_px ?? 0) - (col.x_min_px ?? 0)) * scale)) + const zoneLeft = zone.bbox_px.x * scale + const zoneWidth = zone.bbox_px.w * scale + const totalColW = colWidths.reduce((s, w) => s + w, 0) + const colScale = totalColW > 0 ? zoneWidth / totalColW : 1 + + return ( +
`${(w * colScale).toFixed(1)}px`).join(' '), + fontSize: `${fontSize}px`, + lineHeight: `${rowH}px`, + }} + > + {isSpanning ? ( + zone.cells + .filter((c) => c.row_index === row.index && c.col_type === 'spanning_header') + .sort((a, b) => a.col_index - b.col_index) + .map((cell) => { + const colspan = cell.colspan || zone.columns.length + const color = getCellColor(cell) + return ( +
+ {cell.text} +
+ ) + }) + ) : ( + zone.columns.map((col) => { + const cell = cellMap.get(`${row.index}_${col.index}`) + const color = getCellColor(cell) + const isBold = col.bold || cell?.is_bold || row.is_header + return ( +
+ {cell?.text ?? ''} +
+ ) + }) + )} +
+ ) + })} + + ) +} + +// --------------------------------------------------------------------------- +// Box section renderer — box zone at absolute position with border +// --------------------------------------------------------------------------- + +function BoxSectionRenderer({ zone, scale, avgRowH }: { + zone: GridZone; scale: number; avgRowH: number +}) { + const boxColor = (zone as any).box_bg_hex || '#6b7280' if (!zone.cells || zone.cells.length === 0) return null const left = zone.bbox_px.x * scale const top = zone.bbox_px.y * scale const width = zone.bbox_px.w * scale const height = zone.bbox_px.h * scale + const rowH = avgRowH * scale + const fontSize = Math.max(7, rowH * 0.5) const cellMap = new Map() for (const cell of zone.cells) { cellMap.set(`${cell.row_index}_${cell.col_index}`, cell) } - // Column widths scaled to zone - const colWidths = zone.columns.map((col) => { - const w = (col.x_max_px ?? 0) - (col.x_min_px ?? 0) - return Math.max(5, w * scale) - }) + const colWidths = zone.columns.map((col) => Math.max(5, ((col.x_max_px ?? 0) - (col.x_min_px ?? 0)) * scale)) const totalColW = colWidths.reduce((s, w) => s + w, 0) const colScale = totalColW > 0 ? width / totalColW : 1 - const scaledColWidths = colWidths.map((w) => w * colScale) - const numCols = zone.columns.length + // Evenly distribute rows within the box + const numRows = zone.rows.length + const evenRowH = numRows > 0 ? height / numRows : rowH + return (
-
`${w.toFixed(1)}px`).join(' ') }}> - {zone.rows.map((row, rowIdx) => { +
`${(w * colScale).toFixed(1)}px`).join(' ') }}> + {zone.rows.map((row) => { const isSpanning = zone.cells.some((c) => c.row_index === row.index && c.col_type === 'spanning_header') - // Row height = distance to next row's start (not text height) - // This produces correct line spacing matching the original - const nextRow = rowIdx + 1 < zone.rows.length ? zone.rows[rowIdx + 1] : null - const rowStartY = row.y_min_px ?? row.y_min ?? 0 - const nextStartY = nextRow ? (nextRow.y_min_px ?? nextRow.y_min ?? 0) : rowStartY + avgRowH - const rowSpacing = nextStartY - rowStartY - const rowH = Math.max(fontSize * 1.3, rowSpacing * scale) - - // Multi-line cells need more height + // Multi-line height const maxLines = Math.max(1, ...zone.cells .filter((c) => c.row_index === row.index) .map((c) => (c.text ?? '').split('\n').length)) - const effectiveRowH = rowH * Math.max(1, maxLines) + const cellRowH = evenRowH * (maxLines > 1 ? maxLines * 0.7 : 1) return (
@@ -262,12 +405,14 @@ function ZoneRenderer({ zone, scale, fontSize, avgRowH }: { return (
{cell.text} @@ -277,20 +422,23 @@ function ZoneRenderer({ zone, scale, fontSize, avgRowH }: { ) : ( zone.columns.map((col) => { const cell = cellMap.get(`${row.index}_${col.index}`) - if (!cell) return
const color = getCellColor(cell) - const isBold = col.bold || cell.is_bold || row.is_header - const text = cell.text ?? '' + const isBold = col.bold || cell?.is_bold || row.is_header + const text = cell?.text ?? '' + const isMultiLine = text.includes('\n') return (
{text} @@ -305,3 +453,30 @@ function ZoneRenderer({ zone, scale, fontSize, avgRowH }: {
) } + +// --------------------------------------------------------------------------- +// Coordinate grid +// --------------------------------------------------------------------------- + +function CoordinateGrid({ imgW, imgH, scale, spacing }: { + imgW: number; imgH: number; scale: number; spacing: number +}) { + const lines: JSX.Element[] = [] + for (let x = 0; x <= imgW; x += spacing) { + const px = x * scale + lines.push( +
+ {x} +
+ ) + } + for (let y = 0; y <= imgH; y += spacing) { + const px = y * scale + lines.push( +
+ {y} +
+ ) + } + return <>{lines} +}