feat: Overlay-Modus fuer ganzseitige Tabellenrekonstruktion mit Pixel-Positionierung

- usePixelWordPositions Hook extrahiert (shared zwischen StepLlmReview und StepReconstruction) - StepReconstruction: neuer Overlay-Modus mit 50/50 Layout (Original + Rekonstruktion) - Sub-Session-Zellen werden in Parent-Koordinaten konvertiert und zusammengefuehrt - Spalten-/Zeilenlinien und Box-Zone-Markierung aus column_result/row_result - Schriftgroesse-Slider und Bold-Toggle fuer Overlay - StepLlmReview: ~140 Zeilen Pixel-Analyse durch Hook ersetzt Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-10 16:18:47 +01:00
parent 7f8615b8c1
commit bcd97e7d78
3 changed files with 588 additions and 182 deletions
--- a/admin-lehrer/components/ocr-pipeline/StepLlmReview.tsx
+++ b/admin-lehrer/components/ocr-pipeline/StepLlmReview.tsx
@@ -2,6 +2,7 @@

 import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
 import type { GridCell, GridResult, WordEntry, ColumnMeta } from '@/app/(admin)/ai/ocr-pipeline/types'
+import { usePixelWordPositions } from './usePixelWordPositions'

 const KLAUSUR_API = '/klausur-api'

@@ -92,8 +93,11 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
  const reconRef = useRef<HTMLDivElement>(null)
  const [reconWidth, setReconWidth] = useState(0)

-  // Pixel-analysed word positions: cell_id → [{xPct, wPct, text, fontRatio}]
-  const [cellWordPositions, setCellWordPositions] = useState<Map<string, { xPct: number; wPct: number; text: string; fontRatio: number }[]>>(new Map())
+  // Pixel-analysed word positions via shared hook
+  const overlayImageUrl = sessionId
+    ? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
+    : ''
+  const cellWordPositions = usePixelWordPositions(overlayImageUrl, cells, viewMode === 'overlay')

  const tableRef = useRef<HTMLDivElement>(null)
  const activeRowRef = useRef<HTMLTableRowElement>(null)
@@ -109,146 +113,6 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
    return () => obs.disconnect()
  }, [viewMode])

-  // Pixel-based word positioning: analyse dark-pixel clusters on the image
-  useEffect(() => {
-    if (viewMode !== 'overlay' || cells.length === 0 || !sessionId) return
-
-    const imgUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
-    const img = new Image()
-    img.crossOrigin = 'anonymous'
-    img.onload = () => {
-      const canvas = document.createElement('canvas')
-      canvas.width = img.naturalWidth
-      canvas.height = img.naturalHeight
-      const ctx = canvas.getContext('2d')
-      if (!ctx) return
-      ctx.drawImage(img, 0, 0)
-
-      const refFontSize = 40
-      const fontFam = "'Liberation Sans', Arial, sans-serif"
-      ctx.font = `${refFontSize}px ${fontFam}`
-
-      const positions = new Map<string, { xPct: number; wPct: number; text: string; fontRatio: number }[]>()
-
-      for (const cell of cells) {
-        if (!cell.bbox_pct || !cell.text) continue
-
-        // Split by 3+ whitespace into word-groups
-        const groups = cell.text.split(/\s{3,}/).map(s => s.trim()).filter(Boolean)
-
-        // Pixel region for this cell
-        const imgW = img.naturalWidth
-        const imgH = img.naturalHeight
-        const cx = Math.round(cell.bbox_pct.x / 100 * imgW)
-        const cy = Math.round(cell.bbox_pct.y / 100 * imgH)
-        const cw = Math.round(cell.bbox_pct.w / 100 * imgW)
-        const ch = Math.round(cell.bbox_pct.h / 100 * imgH)
-        if (cw <= 0 || ch <= 0) continue
-
-        const imageData = ctx.getImageData(cx, cy, cw, ch)
-
-        // Vertical projection: count dark pixels per column
-        const proj = new Float32Array(cw)
-        for (let y = 0; y < ch; y++) {
-          for (let x = 0; x < cw; x++) {
-            const idx = (y * cw + x) * 4
-            const lum = 0.299 * imageData.data[idx] + 0.587 * imageData.data[idx + 1] + 0.114 * imageData.data[idx + 2]
-            if (lum < 128) proj[x]++
-          }
-        }
-
-        // Find dark-pixel clusters (word groups on the image)
-        const threshold = Math.max(1, ch * 0.03)
-        const minGap = Math.max(5, Math.round(cw * 0.02))
-        const clusters: { start: number; end: number }[] = []
-        let inCluster = false
-        let clStart = 0
-        let gap = 0
-
-        for (let x = 0; x < cw; x++) {
-          if (proj[x] >= threshold) {
-            if (!inCluster) { clStart = x; inCluster = true }
-            gap = 0
-          } else if (inCluster) {
-            gap++
-            if (gap > minGap) {
-              clusters.push({ start: clStart, end: x - gap })
-              inCluster = false
-              gap = 0
-            }
-          }
-        }
-        if (inCluster) clusters.push({ start: clStart, end: cw - 1 - gap })
-
-        if (clusters.length === 0) continue
-
-        const wordPos: { xPct: number; wPct: number; text: string; fontRatio: number }[] = []
-
-        if (groups.length <= 1) {
-          // Single group: position at first cluster, merge all clusters for width
-          const firstCl = clusters[0]
-          const lastCl = clusters[clusters.length - 1]
-          const clusterW = lastCl.end - firstCl.start + 1
-          // Auto font-size: fit text width to cluster width
-          const measured = ctx.measureText(cell.text.trim())
-          const autoFontPx = refFontSize * (clusterW / measured.width)
-          const fontRatio = Math.min(autoFontPx / ch, 1.0) // ratio of cell height
-          wordPos.push({
-            xPct: cell.bbox_pct.x + (firstCl.start / cw) * cell.bbox_pct.w,
-            wPct: ((lastCl.end - firstCl.start + 1) / cw) * cell.bbox_pct.w,
-            text: cell.text.trim(),
-            fontRatio,
-          })
-        } else if (clusters.length >= groups.length) {
-          // Multiple groups: match to clusters left-to-right
-          for (let i = 0; i < groups.length; i++) {
-            const cl = clusters[i]
-            const clusterW = cl.end - cl.start + 1
-            const measured = ctx.measureText(groups[i])
-            const autoFontPx = refFontSize * (clusterW / measured.width)
-            const fontRatio = Math.min(autoFontPx / ch, 1.0)
-            wordPos.push({
-              xPct: cell.bbox_pct.x + (cl.start / cw) * cell.bbox_pct.w,
-              wPct: ((cl.end - cl.start + 1) / cw) * cell.bbox_pct.w,
-              text: groups[i],
-              fontRatio,
-            })
-          }
-        } else {
-          continue // fewer clusters than groups — skip
-        }
-
-        positions.set(cell.cell_id, wordPos)
-      }
-
-      // Normalise: find the most common fontRatio (mode) and apply it to all
-      const allRatios: number[] = []
-      for (const wps of positions.values()) {
-        for (const wp of wps) allRatios.push(wp.fontRatio)
-      }
-      if (allRatios.length > 0) {
-        // Bucket ratios to 2 decimal places, find mode
-        const buckets = new Map<number, number>()
-        for (const r of allRatios) {
-          const key = Math.round(r * 50) / 50 // round to nearest 0.02
-          buckets.set(key, (buckets.get(key) || 0) + 1)
-        }
-        let modeRatio = allRatios[0]
-        let modeCount = 0
-        for (const [ratio, count] of buckets) {
-          if (count > modeCount) { modeRatio = ratio; modeCount = count }
-        }
-        // Apply mode to all word positions
-        for (const wps of positions.values()) {
-          for (const wp of wps) wp.fontRatio = modeRatio
-        }
-      }
-
-      setCellWordPositions(positions)
-    }
-    img.src = imgUrl
-  }, [viewMode, cells, sessionId])
-
  // Load session data on mount
  useEffect(() => {
    if (!sessionId) return