feat: Pixel-basierte Wortpositionierung im Overlay

Analysiert Schwarzpixel-Verteilung auf dem Originalbild per Canvas. Findet Wort-Cluster pro Zeile und positioniert erkannte Textgruppen an den exakten Pixel-Positionen. Monospace-Font zurueck auf Sans-Serif. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-10 12:36:57 +01:00
parent 6314e60464
commit ad28f9420a
1 changed files with 125 additions and 1 deletions
@@ -92,6 +92,9 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
  const reconRef = useRef<HTMLDivElement>(null)
  const [reconWidth, setReconWidth] = useState(0)

+  // Pixel-analysed word positions: cell_id → [{xPct, wPct, text}]
+  const [cellWordPositions, setCellWordPositions] = useState<Map<string, { xPct: number; wPct: number; text: string }[]>>(new Map())
+
  const tableRef = useRef<HTMLDivElement>(null)
  const activeRowRef = useRef<HTMLTableRowElement>(null)

@@ -106,6 +109,95 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
    return () => obs.disconnect()
  }, [viewMode])

+  // Pixel-based word positioning: analyse dark-pixel clusters on the image
+  useEffect(() => {
+    if (viewMode !== 'overlay' || cells.length === 0 || !sessionId) return
+
+    const imgUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
+    const img = new Image()
+    img.crossOrigin = 'anonymous'
+    img.onload = () => {
+      const canvas = document.createElement('canvas')
+      canvas.width = img.naturalWidth
+      canvas.height = img.naturalHeight
+      const ctx = canvas.getContext('2d')
+      if (!ctx) return
+      ctx.drawImage(img, 0, 0)
+
+      const positions = new Map<string, { xPct: number; wPct: number; text: string }[]>()
+
+      for (const cell of cells) {
+        if (!cell.bbox_pct || !cell.text) continue
+
+        // Split by 3+ whitespace — only analyse cells with multiple word-groups
+        const groups = cell.text.split(/\s{3,}/).map(s => s.trim()).filter(Boolean)
+        if (groups.length <= 1) continue
+
+        // Pixel region for this cell
+        const imgW = img.naturalWidth
+        const imgH = img.naturalHeight
+        const cx = Math.round(cell.bbox_pct.x / 100 * imgW)
+        const cy = Math.round(cell.bbox_pct.y / 100 * imgH)
+        const cw = Math.round(cell.bbox_pct.w / 100 * imgW)
+        const ch = Math.round(cell.bbox_pct.h / 100 * imgH)
+        if (cw <= 0 || ch <= 0) continue
+
+        const imageData = ctx.getImageData(cx, cy, cw, ch)
+
+        // Vertical projection: count dark pixels per column
+        const proj = new Float32Array(cw)
+        for (let y = 0; y < ch; y++) {
+          for (let x = 0; x < cw; x++) {
+            const idx = (y * cw + x) * 4
+            const lum = 0.299 * imageData.data[idx] + 0.587 * imageData.data[idx + 1] + 0.114 * imageData.data[idx + 2]
+            if (lum < 128) proj[x]++
+          }
+        }
+
+        // Find dark-pixel clusters (word groups on the image)
+        const threshold = Math.max(1, ch * 0.03)
+        const minGap = Math.max(5, Math.round(cw * 0.02))
+        const clusters: { start: number; end: number }[] = []
+        let inCluster = false
+        let clStart = 0
+        let gap = 0
+
+        for (let x = 0; x < cw; x++) {
+          if (proj[x] >= threshold) {
+            if (!inCluster) { clStart = x; inCluster = true }
+            gap = 0
+          } else if (inCluster) {
+            gap++
+            if (gap > minGap) {
+              clusters.push({ start: clStart, end: x - gap })
+              inCluster = false
+              gap = 0
+            }
+          }
+        }
+        if (inCluster) clusters.push({ start: clStart, end: cw - 1 - gap })
+
+        // Need enough clusters for all word groups
+        if (clusters.length < groups.length) continue
+
+        // Match word-groups to clusters left-to-right
+        const wordPos: { xPct: number; wPct: number; text: string }[] = []
+        for (let i = 0; i < groups.length; i++) {
+          const cl = clusters[i]
+          wordPos.push({
+            xPct: cell.bbox_pct.x + (cl.start / cw) * cell.bbox_pct.w,
+            wPct: ((cl.end - cl.start + 1) / cw) * cell.bbox_pct.w,
+            text: groups[i],
+          })
+        }
+        positions.set(cell.cell_id, wordPos)
+      }
+
+      setCellWordPositions(positions)
+    }
+    img.src = imgUrl
+  }, [viewMode, cells, sessionId])
+
  // Load session data on mount
  useEffect(() => {
    if (!sessionId) return
@@ -701,6 +793,38 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
                    const containerH = reconWidth * aspect
                    const cellHeightPx = containerH * (cell.bbox_pct.h / 100)
                    const fontSize = Math.max(6, cellHeightPx * fontScale)
+
+                    const wordPos = cellWordPositions.get(cell.cell_id)
+
+                    // Pixel-analysed: render each word-group at its detected position
+                    if (wordPos && wordPos.length > 1) {
+                      return wordPos.map((wp, i) => (
+                        <span
+                          key={`${cell.cell_id}_${i}`}
+                          className="absolute leading-none overflow-hidden"
+                          contentEditable
+                          suppressContentEditableWarning
+                          style={{
+                            left: `${wp.xPct}%`,
+                            top: `${cell.bbox_pct.y}%`,
+                            width: `${wp.wPct}%`,
+                            height: `${cell.bbox_pct.h}%`,
+                            fontSize: `${fontSize}px`,
+                            fontWeight: globalBold ? 'bold' : (cell.is_bold ? 'bold' : 'normal'),
+                            fontFamily: "'Liberation Sans', Arial, sans-serif",
+                            display: 'flex',
+                            alignItems: 'center',
+                            whiteSpace: 'nowrap',
+                            color: '#1a1a1a',
+                          }}
+                          onBlur={(e) => handleCellEdit(cell.cell_id, cell.row_index, e.currentTarget.textContent)}
+                        >
+                          {wp.text}
+                        </span>
+                      ))
+                    }
+
+                    // Fallback: single span for entire cell
                    return (
                      <span
                        key={cell.cell_id}
@@ -715,7 +839,7 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
                          fontSize: `${fontSize}px`,
                          fontWeight: globalBold ? 'bold' : (cell.is_bold ? 'bold' : 'normal'),
                          paddingLeft: `${leftPaddingPct}%`,
-                          fontFamily: "'Courier New', 'Liberation Mono', monospace",
+                          fontFamily: "'Liberation Sans', Arial, sans-serif",
                          display: 'flex',
                          alignItems: 'center',
                          whiteSpace: 'pre',