From bcd97e7d781526a5b48eba5c103569954f493c5c Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Tue, 10 Mar 2026 16:18:47 +0100 Subject: [PATCH] feat: Overlay-Modus fuer ganzseitige Tabellenrekonstruktion mit Pixel-Positionierung - usePixelWordPositions Hook extrahiert (shared zwischen StepLlmReview und StepReconstruction) - StepReconstruction: neuer Overlay-Modus mit 50/50 Layout (Original + Rekonstruktion) - Sub-Session-Zellen werden in Parent-Koordinaten konvertiert und zusammengefuehrt - Spalten-/Zeilenlinien und Box-Zone-Markierung aus column_result/row_result - Schriftgroesse-Slider und Bold-Toggle fuer Overlay - StepLlmReview: ~140 Zeilen Pixel-Analyse durch Hook ersetzt Co-Authored-By: Claude Opus 4.6 --- .../components/ocr-pipeline/StepLlmReview.tsx | 148 +----- .../ocr-pipeline/StepReconstruction.tsx | 460 ++++++++++++++++-- .../ocr-pipeline/usePixelWordPositions.ts | 162 ++++++ 3 files changed, 588 insertions(+), 182 deletions(-) create mode 100644 admin-lehrer/components/ocr-pipeline/usePixelWordPositions.ts diff --git a/admin-lehrer/components/ocr-pipeline/StepLlmReview.tsx b/admin-lehrer/components/ocr-pipeline/StepLlmReview.tsx index 45ee16e..338a7b9 100644 --- a/admin-lehrer/components/ocr-pipeline/StepLlmReview.tsx +++ b/admin-lehrer/components/ocr-pipeline/StepLlmReview.tsx @@ -2,6 +2,7 @@ import { useCallback, useEffect, useMemo, useRef, useState } from 'react' import type { GridCell, GridResult, WordEntry, ColumnMeta } from '@/app/(admin)/ai/ocr-pipeline/types' +import { usePixelWordPositions } from './usePixelWordPositions' const KLAUSUR_API = '/klausur-api' @@ -92,8 +93,11 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) { const reconRef = useRef(null) const [reconWidth, setReconWidth] = useState(0) - // Pixel-analysed word positions: cell_id → [{xPct, wPct, text, fontRatio}] - const [cellWordPositions, setCellWordPositions] = useState>(new Map()) + // Pixel-analysed word positions via shared hook + const overlayImageUrl = sessionId + ? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped` + : '' + const cellWordPositions = usePixelWordPositions(overlayImageUrl, cells, viewMode === 'overlay') const tableRef = useRef(null) const activeRowRef = useRef(null) @@ -109,146 +113,6 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) { return () => obs.disconnect() }, [viewMode]) - // Pixel-based word positioning: analyse dark-pixel clusters on the image - useEffect(() => { - if (viewMode !== 'overlay' || cells.length === 0 || !sessionId) return - - const imgUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped` - const img = new Image() - img.crossOrigin = 'anonymous' - img.onload = () => { - const canvas = document.createElement('canvas') - canvas.width = img.naturalWidth - canvas.height = img.naturalHeight - const ctx = canvas.getContext('2d') - if (!ctx) return - ctx.drawImage(img, 0, 0) - - const refFontSize = 40 - const fontFam = "'Liberation Sans', Arial, sans-serif" - ctx.font = `${refFontSize}px ${fontFam}` - - const positions = new Map() - - for (const cell of cells) { - if (!cell.bbox_pct || !cell.text) continue - - // Split by 3+ whitespace into word-groups - const groups = cell.text.split(/\s{3,}/).map(s => s.trim()).filter(Boolean) - - // Pixel region for this cell - const imgW = img.naturalWidth - const imgH = img.naturalHeight - const cx = Math.round(cell.bbox_pct.x / 100 * imgW) - const cy = Math.round(cell.bbox_pct.y / 100 * imgH) - const cw = Math.round(cell.bbox_pct.w / 100 * imgW) - const ch = Math.round(cell.bbox_pct.h / 100 * imgH) - if (cw <= 0 || ch <= 0) continue - - const imageData = ctx.getImageData(cx, cy, cw, ch) - - // Vertical projection: count dark pixels per column - const proj = new Float32Array(cw) - for (let y = 0; y < ch; y++) { - for (let x = 0; x < cw; x++) { - const idx = (y * cw + x) * 4 - const lum = 0.299 * imageData.data[idx] + 0.587 * imageData.data[idx + 1] + 0.114 * imageData.data[idx + 2] - if (lum < 128) proj[x]++ - } - } - - // Find dark-pixel clusters (word groups on the image) - const threshold = Math.max(1, ch * 0.03) - const minGap = Math.max(5, Math.round(cw * 0.02)) - const clusters: { start: number; end: number }[] = [] - let inCluster = false - let clStart = 0 - let gap = 0 - - for (let x = 0; x < cw; x++) { - if (proj[x] >= threshold) { - if (!inCluster) { clStart = x; inCluster = true } - gap = 0 - } else if (inCluster) { - gap++ - if (gap > minGap) { - clusters.push({ start: clStart, end: x - gap }) - inCluster = false - gap = 0 - } - } - } - if (inCluster) clusters.push({ start: clStart, end: cw - 1 - gap }) - - if (clusters.length === 0) continue - - const wordPos: { xPct: number; wPct: number; text: string; fontRatio: number }[] = [] - - if (groups.length <= 1) { - // Single group: position at first cluster, merge all clusters for width - const firstCl = clusters[0] - const lastCl = clusters[clusters.length - 1] - const clusterW = lastCl.end - firstCl.start + 1 - // Auto font-size: fit text width to cluster width - const measured = ctx.measureText(cell.text.trim()) - const autoFontPx = refFontSize * (clusterW / measured.width) - const fontRatio = Math.min(autoFontPx / ch, 1.0) // ratio of cell height - wordPos.push({ - xPct: cell.bbox_pct.x + (firstCl.start / cw) * cell.bbox_pct.w, - wPct: ((lastCl.end - firstCl.start + 1) / cw) * cell.bbox_pct.w, - text: cell.text.trim(), - fontRatio, - }) - } else if (clusters.length >= groups.length) { - // Multiple groups: match to clusters left-to-right - for (let i = 0; i < groups.length; i++) { - const cl = clusters[i] - const clusterW = cl.end - cl.start + 1 - const measured = ctx.measureText(groups[i]) - const autoFontPx = refFontSize * (clusterW / measured.width) - const fontRatio = Math.min(autoFontPx / ch, 1.0) - wordPos.push({ - xPct: cell.bbox_pct.x + (cl.start / cw) * cell.bbox_pct.w, - wPct: ((cl.end - cl.start + 1) / cw) * cell.bbox_pct.w, - text: groups[i], - fontRatio, - }) - } - } else { - continue // fewer clusters than groups — skip - } - - positions.set(cell.cell_id, wordPos) - } - - // Normalise: find the most common fontRatio (mode) and apply it to all - const allRatios: number[] = [] - for (const wps of positions.values()) { - for (const wp of wps) allRatios.push(wp.fontRatio) - } - if (allRatios.length > 0) { - // Bucket ratios to 2 decimal places, find mode - const buckets = new Map() - for (const r of allRatios) { - const key = Math.round(r * 50) / 50 // round to nearest 0.02 - buckets.set(key, (buckets.get(key) || 0) + 1) - } - let modeRatio = allRatios[0] - let modeCount = 0 - for (const [ratio, count] of buckets) { - if (count > modeCount) { modeRatio = ratio; modeCount = count } - } - // Apply mode to all word positions - for (const wps of positions.values()) { - for (const wp of wps) wp.fontRatio = modeRatio - } - } - - setCellWordPositions(positions) - } - img.src = imgUrl - }, [viewMode, cells, sessionId]) - // Load session data on mount useEffect(() => { if (!sessionId) return diff --git a/admin-lehrer/components/ocr-pipeline/StepReconstruction.tsx b/admin-lehrer/components/ocr-pipeline/StepReconstruction.tsx index 9b8d6d2..1a9ccb5 100644 --- a/admin-lehrer/components/ocr-pipeline/StepReconstruction.tsx +++ b/admin-lehrer/components/ocr-pipeline/StepReconstruction.tsx @@ -2,7 +2,8 @@ import { useCallback, useEffect, useMemo, useRef, useState } from 'react' import dynamic from 'next/dynamic' -import type { GridResult, GridCell, WordEntry } from '@/app/(admin)/ai/ocr-pipeline/types' +import type { GridResult, GridCell, ColumnResult, RowResult, PageZone, PageRegion, RowItem } from '@/app/(admin)/ai/ocr-pipeline/types' +import { usePixelWordPositions } from './usePixelWordPositions' const KLAUSUR_API = '/klausur-api' @@ -12,7 +13,7 @@ const FabricReconstructionCanvas = dynamic( { ssr: false, loading: () =>
Editor wird geladen...
} ) -type EditorMode = 'simple' | 'editor' +type EditorMode = 'simple' | 'editor' | 'overlay' interface StepReconstructionProps { sessionId: string | null @@ -46,11 +47,42 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp const [undoStack, setUndoStack] = useState([]) const [redoStack, setRedoStack] = useState([]) - // (allCells removed — cells now contains all cells including empty ones) - const containerRef = useRef(null) const imageRef = useRef(null) + // Overlay mode state + const [isParentWithBoxes, setIsParentWithBoxes] = useState(false) + const [mergedGridCells, setMergedGridCells] = useState([]) + const [parentColumns, setParentColumns] = useState([]) + const [parentRows, setParentRows] = useState([]) + const [parentZones, setParentZones] = useState([]) + const [imageNaturalSize, setImageNaturalSize] = useState<{ w: number; h: number } | null>(null) + const [fontScale, setFontScale] = useState(0.7) + const [globalBold, setGlobalBold] = useState(false) + const reconRef = useRef(null) + const [reconWidth, setReconWidth] = useState(0) + + // Pixel-based word positions for overlay mode + const overlayImageUrl = sessionId + ? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped` + : '' + const cellWordPositions = usePixelWordPositions( + overlayImageUrl, + mergedGridCells, + editorMode === 'overlay', + ) + + // Track reconstruction container width for font size calculation + useEffect(() => { + const el = reconRef.current + if (!el) return + const obs = new ResizeObserver(entries => { + for (const entry of entries) setReconWidth(entry.contentRect.width) + }) + obs.observe(el) + return () => obs.disconnect() + }, [editorMode]) + // Load session data on mount useEffect(() => { if (!sessionId) return @@ -97,6 +129,100 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp setEditedTexts(new Map()) setUndoStack([]) setRedoStack([]) + + // Check for parent with boxes (sub-sessions + zones) + const columnResult: ColumnResult | undefined = data.column_result + const rowResult: RowResult | undefined = data.row_result + const subSessions: { id: string; box_index: number }[] = data.sub_sessions || [] + const zones: PageZone[] = columnResult?.zones || [] + const hasBoxes = subSessions.length > 0 && zones.some(z => z.zone_type === 'box') + + setIsParentWithBoxes(hasBoxes) + + if (columnResult?.columns) setParentColumns(columnResult.columns) + if (rowResult?.rows) setParentRows(rowResult.rows) + if (zones.length > 0) setParentZones(zones) + + // Store image dimensions + if (wordResult.image_width && wordResult.image_height) { + setImageNaturalSize({ w: wordResult.image_width, h: wordResult.image_height }) + } + + if (hasBoxes) { + // Default to overlay mode for parent sessions with boxes + setEditorMode('overlay') + + // Load sub-sessions and merge cells + const imgW = wordResult.image_width || 1 + const imgH = wordResult.image_height || 1 + const allMergedCells: GridCell[] = [...rawGridCells] + + for (const sub of subSessions) { + try { + const subRes = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sub.id}`) + if (!subRes.ok) continue + const subData = await subRes.json() + const subWordResult: GridResult | undefined = subData.word_result + if (!subWordResult?.cells) continue + + // Find the box zone for this sub-session + const boxZone = zones.find(z => z.zone_type === 'box') + if (!boxZone?.box) continue + + const box = boxZone.box + // Box coordinates are in pixels, convert to pct + const boxXPct = (box.x / imgW) * 100 + const boxYPct = (box.y / imgH) * 100 + const boxWPct = (box.width / imgW) * 100 + const boxHPct = (box.height / imgH) * 100 + + // Convert sub-session cell coordinates to parent coordinates + for (const subCell of subWordResult.cells) { + if (!subCell.bbox_pct) continue + const parentCellX = boxXPct + (subCell.bbox_pct.x / 100) * boxWPct + const parentCellY = boxYPct + (subCell.bbox_pct.y / 100) * boxHPct + const parentCellW = (subCell.bbox_pct.w / 100) * boxWPct + const parentCellH = (subCell.bbox_pct.h / 100) * boxHPct + + allMergedCells.push({ + ...subCell, + cell_id: `sub_${sub.id}_${subCell.cell_id}`, + bbox_pct: { + x: parentCellX, + y: parentCellY, + w: parentCellW, + h: parentCellH, + }, + bbox_px: { + x: Math.round(parentCellX / 100 * imgW), + y: Math.round(parentCellY / 100 * imgH), + w: Math.round(parentCellW / 100 * imgW), + h: Math.round(parentCellH / 100 * imgH), + }, + }) + } + } catch { + // Skip failing sub-sessions + } + } + + setMergedGridCells(allMergedCells) + + // Also add merged cells as editable cells + const mergedEditableCells: EditableCell[] = allMergedCells.map(c => ({ + cellId: c.cell_id, + text: c.text, + originalText: c.text, + bboxPct: c.bbox_pct, + colType: c.col_type, + rowIndex: c.row_index, + colIndex: c.col_index, + })) + setCells(mergedEditableCells) + } else { + setMergedGridCells(rawGridCells) + } + setStatus('ready') } catch (e: unknown) { setError(e instanceof Error ? e.message : String(e)) @@ -347,6 +473,215 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp ) } + // Overlay rendering helper + const renderOverlayMode = () => { + const imgW = imageNaturalSize?.w || 1 + const imgH = imageNaturalSize?.h || 1 + const aspect = imgH / imgW + const containerH = reconWidth * aspect + + return ( +
+ {/* Left: Original image */} +
+
+ Originalbild +
+
+ {/* eslint-disable-next-line @next/next/no-img-element */} + Original { + const img = e.target as HTMLImageElement + setImageNaturalSize({ w: img.naturalWidth, h: img.naturalHeight }) + }} + /> +
+
+ + {/* Right: Reconstructed table overlay */} +
+
+ Rekonstruktion ({cells.length} Zellen) +
+
+
+ {/* Column lines */} + {parentColumns + .filter(c => !['header', 'footer'].includes(c.type)) + .map((col, i) => ( +
+ ))} + + {/* Row lines */} + {parentRows.map((row, i) => ( +
+ ))} + + {/* Box zone highlight */} + {parentZones + .filter(z => z.zone_type === 'box' && z.box) + .map((z, i) => { + const box = z.box! + return ( +
+ ) + })} + + {/* Pixel-positioned words / editable inputs */} + {cells.map((cell) => { + const displayText = getDisplayText(cell) + const edited = isEdited(cell) + const wordPos = cellWordPositions.get(cell.cellId) + const cellHeightPx = containerH * (cell.bboxPct.h / 100) + + // Pixel-analysed: render word-groups at detected positions as inputs + if (wordPos && wordPos.length > 0) { + return wordPos.map((wp, i) => { + const autoFontPx = cellHeightPx * wp.fontRatio * fontScale + const fs = Math.max(6, autoFontPx) + + // For multi-group cells, only the first group is the primary input + // Show as span (read-only positioned) — editing happens at cell level + if (wordPos.length > 1) { + return ( + + {wp.text} + + ) + } + + // Single group: render as editable input at pixel position + return ( +
+ handleTextChange(cell.cellId, e.target.value)} + onKeyDown={(e) => handleKeyDown(e, cell.cellId)} + className={`w-full h-full bg-transparent border-0 outline-none px-0 transition-colors ${ + edited ? 'bg-green-50/30' : '' + }`} + style={{ + fontSize: `${fs}px`, + fontWeight: globalBold ? 'bold' : (cell.colType === 'column_en' ? 'bold' : 'normal'), + fontFamily: "'Liberation Sans', Arial, sans-serif", + lineHeight: '1', + color: '#1a1a1a', + }} + title={`${cell.cellId} (${cell.colType})`} + /> + {edited && ( + + )} +
+ ) + }) + } + + // Multi-group cell with pixel positions: already handled above + // Fallback: no pixel data — single input at cell bbox + if (!cell.text) return null + + const fontSize = Math.max(6, cellHeightPx * fontScale) + return ( +
+ handleTextChange(cell.cellId, e.target.value)} + onKeyDown={(e) => handleKeyDown(e, cell.cellId)} + className={`w-full h-full bg-transparent border-0 outline-none px-0 transition-colors ${ + edited ? 'bg-green-50/30' : '' + }`} + style={{ + fontSize: `${fontSize}px`, + fontWeight: globalBold ? 'bold' : 'normal', + fontFamily: "'Liberation Sans', Arial, sans-serif", + lineHeight: '1', + color: '#1a1a1a', + }} + title={`${cell.cellId} (${cell.colType})`} + /> + {edited && ( + + )} +
+ ) + })} +
+
+
+
+ ) + } + return (
{/* Toolbar */} @@ -367,6 +702,18 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp > Einfach + {isParentWithBoxes && ( + + )} + {/* Overlay-specific toolbar */} + {editorMode === 'overlay' && ( + <> + + +
+ + )} -
+ {/* Non-overlay controls */} + {editorMode !== 'overlay' && ( + <> + {/* Empty field toggle */} + - {/* Zoom controls */} - - {zoom}% - - +
-
+ {/* Zoom controls */} + + {zoom}% + + + +
+ + )}
- {/* Reconstruction canvas — Simple or Editor mode */} - {editorMode === 'editor' && sessionId ? ( + {/* Reconstruction canvas */} + {editorMode === 'overlay' ? ( + renderOverlayMode() + ) : editorMode === 'editor' && sessionId ? ( . + */ +export function usePixelWordPositions( + imageUrl: string, + cells: GridCell[], + active: boolean, +): Map { + const [cellWordPositions, setCellWordPositions] = useState>(new Map()) + + useEffect(() => { + if (!active || cells.length === 0 || !imageUrl) return + + const img = new Image() + img.crossOrigin = 'anonymous' + img.onload = () => { + const canvas = document.createElement('canvas') + canvas.width = img.naturalWidth + canvas.height = img.naturalHeight + const ctx = canvas.getContext('2d') + if (!ctx) return + ctx.drawImage(img, 0, 0) + + const refFontSize = 40 + const fontFam = "'Liberation Sans', Arial, sans-serif" + ctx.font = `${refFontSize}px ${fontFam}` + + const positions = new Map() + + for (const cell of cells) { + if (!cell.bbox_pct || !cell.text) continue + + // Split by 3+ whitespace into word-groups + const groups = cell.text.split(/\s{3,}/).map(s => s.trim()).filter(Boolean) + + // Pixel region for this cell + const imgW = img.naturalWidth + const imgH = img.naturalHeight + const cx = Math.round(cell.bbox_pct.x / 100 * imgW) + const cy = Math.round(cell.bbox_pct.y / 100 * imgH) + const cw = Math.round(cell.bbox_pct.w / 100 * imgW) + const ch = Math.round(cell.bbox_pct.h / 100 * imgH) + if (cw <= 0 || ch <= 0) continue + + const imageData = ctx.getImageData(cx, cy, cw, ch) + + // Vertical projection: count dark pixels per column + const proj = new Float32Array(cw) + for (let y = 0; y < ch; y++) { + for (let x = 0; x < cw; x++) { + const idx = (y * cw + x) * 4 + const lum = 0.299 * imageData.data[idx] + 0.587 * imageData.data[idx + 1] + 0.114 * imageData.data[idx + 2] + if (lum < 128) proj[x]++ + } + } + + // Find dark-pixel clusters (word groups on the image) + const threshold = Math.max(1, ch * 0.03) + const minGap = Math.max(5, Math.round(cw * 0.02)) + const clusters: { start: number; end: number }[] = [] + let inCluster = false + let clStart = 0 + let gap = 0 + + for (let x = 0; x < cw; x++) { + if (proj[x] >= threshold) { + if (!inCluster) { clStart = x; inCluster = true } + gap = 0 + } else if (inCluster) { + gap++ + if (gap > minGap) { + clusters.push({ start: clStart, end: x - gap }) + inCluster = false + gap = 0 + } + } + } + if (inCluster) clusters.push({ start: clStart, end: cw - 1 - gap }) + + if (clusters.length === 0) continue + + const wordPos: WordPosition[] = [] + + if (groups.length <= 1) { + // Single group: position at first cluster, merge all clusters for width + const firstCl = clusters[0] + const lastCl = clusters[clusters.length - 1] + const clusterW = lastCl.end - firstCl.start + 1 + const measured = ctx.measureText(cell.text.trim()) + const autoFontPx = refFontSize * (clusterW / measured.width) + const fontRatio = Math.min(autoFontPx / ch, 1.0) + wordPos.push({ + xPct: cell.bbox_pct.x + (firstCl.start / cw) * cell.bbox_pct.w, + wPct: ((lastCl.end - firstCl.start + 1) / cw) * cell.bbox_pct.w, + text: cell.text.trim(), + fontRatio, + }) + } else if (clusters.length >= groups.length) { + // Multiple groups: match to clusters left-to-right + for (let i = 0; i < groups.length; i++) { + const cl = clusters[i] + const clusterW = cl.end - cl.start + 1 + const measured = ctx.measureText(groups[i]) + const autoFontPx = refFontSize * (clusterW / measured.width) + const fontRatio = Math.min(autoFontPx / ch, 1.0) + wordPos.push({ + xPct: cell.bbox_pct.x + (cl.start / cw) * cell.bbox_pct.w, + wPct: ((cl.end - cl.start + 1) / cw) * cell.bbox_pct.w, + text: groups[i], + fontRatio, + }) + } + } else { + continue // fewer clusters than groups — skip + } + + positions.set(cell.cell_id, wordPos) + } + + // Normalise: find the most common fontRatio (mode) and apply it to all + const allRatios: number[] = [] + for (const wps of positions.values()) { + for (const wp of wps) allRatios.push(wp.fontRatio) + } + if (allRatios.length > 0) { + // Bucket ratios to 2 decimal places, find mode + const buckets = new Map() + for (const r of allRatios) { + const key = Math.round(r * 50) / 50 // round to nearest 0.02 + buckets.set(key, (buckets.get(key) || 0) + 1) + } + let modeRatio = allRatios[0] + let modeCount = 0 + for (const [ratio, count] of buckets) { + if (count > modeCount) { modeRatio = ratio; modeCount = count } + } + // Apply mode to all word positions + for (const wps of positions.values()) { + for (const wp of wps) wp.fontRatio = modeRatio + } + } + + setCellWordPositions(positions) + } + img.src = imageUrl + }, [active, cells, imageUrl]) + + return cellWordPositions +}