From ad28f9420a18b3ee7482dd2fdb0bae3084baaec4 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Tue, 10 Mar 2026 12:36:57 +0100 Subject: [PATCH] feat: Pixel-basierte Wortpositionierung im Overlay Analysiert Schwarzpixel-Verteilung auf dem Originalbild per Canvas. Findet Wort-Cluster pro Zeile und positioniert erkannte Textgruppen an den exakten Pixel-Positionen. Monospace-Font zurueck auf Sans-Serif. Co-Authored-By: Claude Opus 4.6 --- .../components/ocr-pipeline/StepLlmReview.tsx | 126 +++++++++++++++++- 1 file changed, 125 insertions(+), 1 deletion(-) diff --git a/admin-lehrer/components/ocr-pipeline/StepLlmReview.tsx b/admin-lehrer/components/ocr-pipeline/StepLlmReview.tsx index 61b75e0..eecff9b 100644 --- a/admin-lehrer/components/ocr-pipeline/StepLlmReview.tsx +++ b/admin-lehrer/components/ocr-pipeline/StepLlmReview.tsx @@ -92,6 +92,9 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) { const reconRef = useRef(null) const [reconWidth, setReconWidth] = useState(0) + // Pixel-analysed word positions: cell_id → [{xPct, wPct, text}] + const [cellWordPositions, setCellWordPositions] = useState>(new Map()) + const tableRef = useRef(null) const activeRowRef = useRef(null) @@ -106,6 +109,95 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) { return () => obs.disconnect() }, [viewMode]) + // Pixel-based word positioning: analyse dark-pixel clusters on the image + useEffect(() => { + if (viewMode !== 'overlay' || cells.length === 0 || !sessionId) return + + const imgUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped` + const img = new Image() + img.crossOrigin = 'anonymous' + img.onload = () => { + const canvas = document.createElement('canvas') + canvas.width = img.naturalWidth + canvas.height = img.naturalHeight + const ctx = canvas.getContext('2d') + if (!ctx) return + ctx.drawImage(img, 0, 0) + + const positions = new Map() + + for (const cell of cells) { + if (!cell.bbox_pct || !cell.text) continue + + // Split by 3+ whitespace — only analyse cells with multiple word-groups + const groups = cell.text.split(/\s{3,}/).map(s => s.trim()).filter(Boolean) + if (groups.length <= 1) continue + + // Pixel region for this cell + const imgW = img.naturalWidth + const imgH = img.naturalHeight + const cx = Math.round(cell.bbox_pct.x / 100 * imgW) + const cy = Math.round(cell.bbox_pct.y / 100 * imgH) + const cw = Math.round(cell.bbox_pct.w / 100 * imgW) + const ch = Math.round(cell.bbox_pct.h / 100 * imgH) + if (cw <= 0 || ch <= 0) continue + + const imageData = ctx.getImageData(cx, cy, cw, ch) + + // Vertical projection: count dark pixels per column + const proj = new Float32Array(cw) + for (let y = 0; y < ch; y++) { + for (let x = 0; x < cw; x++) { + const idx = (y * cw + x) * 4 + const lum = 0.299 * imageData.data[idx] + 0.587 * imageData.data[idx + 1] + 0.114 * imageData.data[idx + 2] + if (lum < 128) proj[x]++ + } + } + + // Find dark-pixel clusters (word groups on the image) + const threshold = Math.max(1, ch * 0.03) + const minGap = Math.max(5, Math.round(cw * 0.02)) + const clusters: { start: number; end: number }[] = [] + let inCluster = false + let clStart = 0 + let gap = 0 + + for (let x = 0; x < cw; x++) { + if (proj[x] >= threshold) { + if (!inCluster) { clStart = x; inCluster = true } + gap = 0 + } else if (inCluster) { + gap++ + if (gap > minGap) { + clusters.push({ start: clStart, end: x - gap }) + inCluster = false + gap = 0 + } + } + } + if (inCluster) clusters.push({ start: clStart, end: cw - 1 - gap }) + + // Need enough clusters for all word groups + if (clusters.length < groups.length) continue + + // Match word-groups to clusters left-to-right + const wordPos: { xPct: number; wPct: number; text: string }[] = [] + for (let i = 0; i < groups.length; i++) { + const cl = clusters[i] + wordPos.push({ + xPct: cell.bbox_pct.x + (cl.start / cw) * cell.bbox_pct.w, + wPct: ((cl.end - cl.start + 1) / cw) * cell.bbox_pct.w, + text: groups[i], + }) + } + positions.set(cell.cell_id, wordPos) + } + + setCellWordPositions(positions) + } + img.src = imgUrl + }, [viewMode, cells, sessionId]) + // Load session data on mount useEffect(() => { if (!sessionId) return @@ -701,6 +793,38 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) { const containerH = reconWidth * aspect const cellHeightPx = containerH * (cell.bbox_pct.h / 100) const fontSize = Math.max(6, cellHeightPx * fontScale) + + const wordPos = cellWordPositions.get(cell.cell_id) + + // Pixel-analysed: render each word-group at its detected position + if (wordPos && wordPos.length > 1) { + return wordPos.map((wp, i) => ( + handleCellEdit(cell.cell_id, cell.row_index, e.currentTarget.textContent)} + > + {wp.text} + + )) + } + + // Fallback: single span for entire cell return (