feat: Overlay-Modus fuer ganzseitige Tabellenrekonstruktion mit Pixel-Positionierung

- usePixelWordPositions Hook extrahiert (shared zwischen StepLlmReview und StepReconstruction) - StepReconstruction: neuer Overlay-Modus mit 50/50 Layout (Original + Rekonstruktion) - Sub-Session-Zellen werden in Parent-Koordinaten konvertiert und zusammengefuehrt - Spalten-/Zeilenlinien und Box-Zone-Markierung aus column_result/row_result - Schriftgroesse-Slider und Bold-Toggle fuer Overlay - StepLlmReview: ~140 Zeilen Pixel-Analyse durch Hook ersetzt Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-10 16:18:47 +01:00
parent 7f8615b8c1
commit bcd97e7d78
3 changed files with 588 additions and 182 deletions
@@ -2,6 +2,7 @@

 import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
 import type { GridCell, GridResult, WordEntry, ColumnMeta } from '@/app/(admin)/ai/ocr-pipeline/types'
+import { usePixelWordPositions } from './usePixelWordPositions'

 const KLAUSUR_API = '/klausur-api'

@@ -92,8 +93,11 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
  const reconRef = useRef<HTMLDivElement>(null)
  const [reconWidth, setReconWidth] = useState(0)

-  // Pixel-analysed word positions: cell_id → [{xPct, wPct, text, fontRatio}]
-  const [cellWordPositions, setCellWordPositions] = useState<Map<string, { xPct: number; wPct: number; text: string; fontRatio: number }[]>>(new Map())
+  // Pixel-analysed word positions via shared hook
+  const overlayImageUrl = sessionId
+    ? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
+    : ''
+  const cellWordPositions = usePixelWordPositions(overlayImageUrl, cells, viewMode === 'overlay')

  const tableRef = useRef<HTMLDivElement>(null)
  const activeRowRef = useRef<HTMLTableRowElement>(null)
@@ -109,146 +113,6 @@ export function StepLlmReview({ sessionId, onNext }: StepLlmReviewProps) {
    return () => obs.disconnect()
  }, [viewMode])

-  // Pixel-based word positioning: analyse dark-pixel clusters on the image
-  useEffect(() => {
-    if (viewMode !== 'overlay' || cells.length === 0 || !sessionId) return
-
-    const imgUrl = `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
-    const img = new Image()
-    img.crossOrigin = 'anonymous'
-    img.onload = () => {
-      const canvas = document.createElement('canvas')
-      canvas.width = img.naturalWidth
-      canvas.height = img.naturalHeight
-      const ctx = canvas.getContext('2d')
-      if (!ctx) return
-      ctx.drawImage(img, 0, 0)
-
-      const refFontSize = 40
-      const fontFam = "'Liberation Sans', Arial, sans-serif"
-      ctx.font = `${refFontSize}px ${fontFam}`
-
-      const positions = new Map<string, { xPct: number; wPct: number; text: string; fontRatio: number }[]>()
-
-      for (const cell of cells) {
-        if (!cell.bbox_pct || !cell.text) continue
-
-        // Split by 3+ whitespace into word-groups
-        const groups = cell.text.split(/\s{3,}/).map(s => s.trim()).filter(Boolean)
-
-        // Pixel region for this cell
-        const imgW = img.naturalWidth
-        const imgH = img.naturalHeight
-        const cx = Math.round(cell.bbox_pct.x / 100 * imgW)
-        const cy = Math.round(cell.bbox_pct.y / 100 * imgH)
-        const cw = Math.round(cell.bbox_pct.w / 100 * imgW)
-        const ch = Math.round(cell.bbox_pct.h / 100 * imgH)
-        if (cw <= 0 || ch <= 0) continue
-
-        const imageData = ctx.getImageData(cx, cy, cw, ch)
-
-        // Vertical projection: count dark pixels per column
-        const proj = new Float32Array(cw)
-        for (let y = 0; y < ch; y++) {
-          for (let x = 0; x < cw; x++) {
-            const idx = (y * cw + x) * 4
-            const lum = 0.299 * imageData.data[idx] + 0.587 * imageData.data[idx + 1] + 0.114 * imageData.data[idx + 2]
-            if (lum < 128) proj[x]++
-          }
-        }
-
-        // Find dark-pixel clusters (word groups on the image)
-        const threshold = Math.max(1, ch * 0.03)
-        const minGap = Math.max(5, Math.round(cw * 0.02))
-        const clusters: { start: number; end: number }[] = []
-        let inCluster = false
-        let clStart = 0
-        let gap = 0
-
-        for (let x = 0; x < cw; x++) {
-          if (proj[x] >= threshold) {
-            if (!inCluster) { clStart = x; inCluster = true }
-            gap = 0
-          } else if (inCluster) {
-            gap++
-            if (gap > minGap) {
-              clusters.push({ start: clStart, end: x - gap })
-              inCluster = false
-              gap = 0
-            }
-          }
-        }
-        if (inCluster) clusters.push({ start: clStart, end: cw - 1 - gap })
-
-        if (clusters.length === 0) continue
-
-        const wordPos: { xPct: number; wPct: number; text: string; fontRatio: number }[] = []
-
-        if (groups.length <= 1) {
-          // Single group: position at first cluster, merge all clusters for width
-          const firstCl = clusters[0]
-          const lastCl = clusters[clusters.length - 1]
-          const clusterW = lastCl.end - firstCl.start + 1
-          // Auto font-size: fit text width to cluster width
-          const measured = ctx.measureText(cell.text.trim())
-          const autoFontPx = refFontSize * (clusterW / measured.width)
-          const fontRatio = Math.min(autoFontPx / ch, 1.0) // ratio of cell height
-          wordPos.push({
-            xPct: cell.bbox_pct.x + (firstCl.start / cw) * cell.bbox_pct.w,
-            wPct: ((lastCl.end - firstCl.start + 1) / cw) * cell.bbox_pct.w,
-            text: cell.text.trim(),
-            fontRatio,
-          })
-        } else if (clusters.length >= groups.length) {
-          // Multiple groups: match to clusters left-to-right
-          for (let i = 0; i < groups.length; i++) {
-            const cl = clusters[i]
-            const clusterW = cl.end - cl.start + 1
-            const measured = ctx.measureText(groups[i])
-            const autoFontPx = refFontSize * (clusterW / measured.width)
-            const fontRatio = Math.min(autoFontPx / ch, 1.0)
-            wordPos.push({
-              xPct: cell.bbox_pct.x + (cl.start / cw) * cell.bbox_pct.w,
-              wPct: ((cl.end - cl.start + 1) / cw) * cell.bbox_pct.w,
-              text: groups[i],
-              fontRatio,
-            })
-          }
-        } else {
-          continue // fewer clusters than groups — skip
-        }
-
-        positions.set(cell.cell_id, wordPos)
-      }
-
-      // Normalise: find the most common fontRatio (mode) and apply it to all
-      const allRatios: number[] = []
-      for (const wps of positions.values()) {
-        for (const wp of wps) allRatios.push(wp.fontRatio)
-      }
-      if (allRatios.length > 0) {
-        // Bucket ratios to 2 decimal places, find mode
-        const buckets = new Map<number, number>()
-        for (const r of allRatios) {
-          const key = Math.round(r * 50) / 50 // round to nearest 0.02
-          buckets.set(key, (buckets.get(key) || 0) + 1)
-        }
-        let modeRatio = allRatios[0]
-        let modeCount = 0
-        for (const [ratio, count] of buckets) {
-          if (count > modeCount) { modeRatio = ratio; modeCount = count }
-        }
-        // Apply mode to all word positions
-        for (const wps of positions.values()) {
-          for (const wp of wps) wp.fontRatio = modeRatio
-        }
-      }
-
-      setCellWordPositions(positions)
-    }
-    img.src = imgUrl
-  }, [viewMode, cells, sessionId])
-
  // Load session data on mount
  useEffect(() => {
    if (!sessionId) return
@@ -2,7 +2,8 @@

 import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
 import dynamic from 'next/dynamic'
-import type { GridResult, GridCell, WordEntry } from '@/app/(admin)/ai/ocr-pipeline/types'
+import type { GridResult, GridCell, ColumnResult, RowResult, PageZone, PageRegion, RowItem } from '@/app/(admin)/ai/ocr-pipeline/types'
+import { usePixelWordPositions } from './usePixelWordPositions'

 const KLAUSUR_API = '/klausur-api'

@@ -12,7 +13,7 @@ const FabricReconstructionCanvas = dynamic(
  { ssr: false, loading: () => <div className="py-8 text-center text-sm text-gray-400">Editor wird geladen...</div> }
 )

-type EditorMode = 'simple' | 'editor'
+type EditorMode = 'simple' | 'editor' | 'overlay'

 interface StepReconstructionProps {
  sessionId: string | null
@@ -46,11 +47,42 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
  const [undoStack, setUndoStack] = useState<UndoAction[]>([])
  const [redoStack, setRedoStack] = useState<UndoAction[]>([])

-  // (allCells removed — cells now contains all cells including empty ones)
-
  const containerRef = useRef<HTMLDivElement>(null)
  const imageRef = useRef<HTMLImageElement>(null)

+  // Overlay mode state
+  const [isParentWithBoxes, setIsParentWithBoxes] = useState(false)
+  const [mergedGridCells, setMergedGridCells] = useState<GridCell[]>([])
+  const [parentColumns, setParentColumns] = useState<PageRegion[]>([])
+  const [parentRows, setParentRows] = useState<RowItem[]>([])
+  const [parentZones, setParentZones] = useState<PageZone[]>([])
+  const [imageNaturalSize, setImageNaturalSize] = useState<{ w: number; h: number } | null>(null)
+  const [fontScale, setFontScale] = useState(0.7)
+  const [globalBold, setGlobalBold] = useState(false)
+  const reconRef = useRef<HTMLDivElement>(null)
+  const [reconWidth, setReconWidth] = useState(0)
+
+  // Pixel-based word positions for overlay mode
+  const overlayImageUrl = sessionId
+    ? `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/image/cropped`
+    : ''
+  const cellWordPositions = usePixelWordPositions(
+    overlayImageUrl,
+    mergedGridCells,
+    editorMode === 'overlay',
+  )
+
+  // Track reconstruction container width for font size calculation
+  useEffect(() => {
+    const el = reconRef.current
+    if (!el) return
+    const obs = new ResizeObserver(entries => {
+      for (const entry of entries) setReconWidth(entry.contentRect.width)
+    })
+    obs.observe(el)
+    return () => obs.disconnect()
+  }, [editorMode])
+
  // Load session data on mount
  useEffect(() => {
    if (!sessionId) return
@@ -97,6 +129,100 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
      setEditedTexts(new Map())
      setUndoStack([])
      setRedoStack([])
+
+      // Check for parent with boxes (sub-sessions + zones)
+      const columnResult: ColumnResult | undefined = data.column_result
+      const rowResult: RowResult | undefined = data.row_result
+      const subSessions: { id: string; box_index: number }[] = data.sub_sessions || []
+      const zones: PageZone[] = columnResult?.zones || []
+      const hasBoxes = subSessions.length > 0 && zones.some(z => z.zone_type === 'box')
+
+      setIsParentWithBoxes(hasBoxes)
+
+      if (columnResult?.columns) setParentColumns(columnResult.columns)
+      if (rowResult?.rows) setParentRows(rowResult.rows)
+      if (zones.length > 0) setParentZones(zones)
+
+      // Store image dimensions
+      if (wordResult.image_width && wordResult.image_height) {
+        setImageNaturalSize({ w: wordResult.image_width, h: wordResult.image_height })
+      }
+
+      if (hasBoxes) {
+        // Default to overlay mode for parent sessions with boxes
+        setEditorMode('overlay')
+
+        // Load sub-sessions and merge cells
+        const imgW = wordResult.image_width || 1
+        const imgH = wordResult.image_height || 1
+        const allMergedCells: GridCell[] = [...rawGridCells]
+
+        for (const sub of subSessions) {
+          try {
+            const subRes = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sub.id}`)
+            if (!subRes.ok) continue
+            const subData = await subRes.json()
+            const subWordResult: GridResult | undefined = subData.word_result
+            if (!subWordResult?.cells) continue
+
+            // Find the box zone for this sub-session
+            const boxZone = zones.find(z => z.zone_type === 'box')
+            if (!boxZone?.box) continue
+
+            const box = boxZone.box
+            // Box coordinates are in pixels, convert to pct
+            const boxXPct = (box.x / imgW) * 100
+            const boxYPct = (box.y / imgH) * 100
+            const boxWPct = (box.width / imgW) * 100
+            const boxHPct = (box.height / imgH) * 100
+
+            // Convert sub-session cell coordinates to parent coordinates
+            for (const subCell of subWordResult.cells) {
+              if (!subCell.bbox_pct) continue
+              const parentCellX = boxXPct + (subCell.bbox_pct.x / 100) * boxWPct
+              const parentCellY = boxYPct + (subCell.bbox_pct.y / 100) * boxHPct
+              const parentCellW = (subCell.bbox_pct.w / 100) * boxWPct
+              const parentCellH = (subCell.bbox_pct.h / 100) * boxHPct
+
+              allMergedCells.push({
+                ...subCell,
+                cell_id: `sub_${sub.id}_${subCell.cell_id}`,
+                bbox_pct: {
+                  x: parentCellX,
+                  y: parentCellY,
+                  w: parentCellW,
+                  h: parentCellH,
+                },
+                bbox_px: {
+                  x: Math.round(parentCellX / 100 * imgW),
+                  y: Math.round(parentCellY / 100 * imgH),
+                  w: Math.round(parentCellW / 100 * imgW),
+                  h: Math.round(parentCellH / 100 * imgH),
+                },
+              })
+            }
+          } catch {
+            // Skip failing sub-sessions
+          }
+        }
+
+        setMergedGridCells(allMergedCells)
+
+        // Also add merged cells as editable cells
+        const mergedEditableCells: EditableCell[] = allMergedCells.map(c => ({
+          cellId: c.cell_id,
+          text: c.text,
+          originalText: c.text,
+          bboxPct: c.bbox_pct,
+          colType: c.col_type,
+          rowIndex: c.row_index,
+          colIndex: c.col_index,
+        }))
+        setCells(mergedEditableCells)
+      } else {
+        setMergedGridCells(rawGridCells)
+      }
+
      setStatus('ready')
    } catch (e: unknown) {
      setError(e instanceof Error ? e.message : String(e))
@@ -347,6 +473,215 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
    )
  }

+  // Overlay rendering helper
+  const renderOverlayMode = () => {
+    const imgW = imageNaturalSize?.w || 1
+    const imgH = imageNaturalSize?.h || 1
+    const aspect = imgH / imgW
+    const containerH = reconWidth * aspect
+
+    return (
+      <div className="grid grid-cols-2 gap-4">
+        {/* Left: Original image */}
+        <div>
+          <div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
+            Originalbild
+          </div>
+          <div className="border rounded-lg overflow-hidden dark:border-gray-700 bg-gray-50 dark:bg-gray-900 sticky top-4">
+            {/* eslint-disable-next-line @next/next/no-img-element */}
+            <img
+              src={dewarpedUrl}
+              alt="Original"
+              className="w-full h-auto"
+              onLoad={(e) => {
+                const img = e.target as HTMLImageElement
+                setImageNaturalSize({ w: img.naturalWidth, h: img.naturalHeight })
+              }}
+            />
+          </div>
+        </div>
+
+        {/* Right: Reconstructed table overlay */}
+        <div>
+          <div className="text-xs font-medium text-gray-500 dark:text-gray-400 mb-1">
+            Rekonstruktion ({cells.length} Zellen)
+          </div>
+          <div className="border border-gray-200 dark:border-gray-700 rounded-lg overflow-hidden bg-white">
+            <div
+              ref={reconRef}
+              className="relative"
+              style={{
+                aspectRatio: `${imgW} / ${imgH}`,
+              }}
+            >
+              {/* Column lines */}
+              {parentColumns
+                .filter(c => !['header', 'footer'].includes(c.type))
+                .map((col, i) => (
+                  <div
+                    key={`col-${i}`}
+                    className="absolute top-0 bottom-0 border-l border-gray-300/50"
+                    style={{ left: `${(col.x / imgW) * 100}%` }}
+                  />
+                ))}
+
+              {/* Row lines */}
+              {parentRows.map((row, i) => (
+                <div
+                  key={`row-${i}`}
+                  className="absolute left-0 right-0 border-t border-gray-300/50"
+                  style={{ top: `${(row.y / imgH) * 100}%` }}
+                />
+              ))}
+
+              {/* Box zone highlight */}
+              {parentZones
+                .filter(z => z.zone_type === 'box' && z.box)
+                .map((z, i) => {
+                  const box = z.box!
+                  return (
+                    <div
+                      key={`box-${i}`}
+                      className="absolute border-2 border-blue-400/30 bg-blue-50/10 pointer-events-none"
+                      style={{
+                        left: `${(box.x / imgW) * 100}%`,
+                        top: `${(box.y / imgH) * 100}%`,
+                        width: `${(box.width / imgW) * 100}%`,
+                        height: `${(box.height / imgH) * 100}%`,
+                      }}
+                    />
+                  )
+                })}
+
+              {/* Pixel-positioned words / editable inputs */}
+              {cells.map((cell) => {
+                const displayText = getDisplayText(cell)
+                const edited = isEdited(cell)
+                const wordPos = cellWordPositions.get(cell.cellId)
+                const cellHeightPx = containerH * (cell.bboxPct.h / 100)
+
+                // Pixel-analysed: render word-groups at detected positions as inputs
+                if (wordPos && wordPos.length > 0) {
+                  return wordPos.map((wp, i) => {
+                    const autoFontPx = cellHeightPx * wp.fontRatio * fontScale
+                    const fs = Math.max(6, autoFontPx)
+
+                    // For multi-group cells, only the first group is the primary input
+                    // Show as span (read-only positioned) — editing happens at cell level
+                    if (wordPos.length > 1) {
+                      return (
+                        <span
+                          key={`${cell.cellId}_wp_${i}`}
+                          className="absolute leading-none pointer-events-none select-none"
+                          style={{
+                            left: `${wp.xPct}%`,
+                            top: `${cell.bboxPct.y}%`,
+                            width: `${wp.wPct}%`,
+                            height: `${cell.bboxPct.h}%`,
+                            fontSize: `${fs}px`,
+                            fontWeight: globalBold ? 'bold' : (cell.colType === 'column_en' ? 'bold' : 'normal'),
+                            fontFamily: "'Liberation Sans', Arial, sans-serif",
+                            display: 'flex',
+                            alignItems: 'center',
+                            whiteSpace: 'nowrap',
+                            overflow: 'visible',
+                            color: '#1a1a1a',
+                          }}
+                        >
+                          {wp.text}
+                        </span>
+                      )
+                    }
+
+                    // Single group: render as editable input at pixel position
+                    return (
+                      <div key={`${cell.cellId}_wp_${i}`} className="absolute group" style={{
+                        left: `${wp.xPct}%`,
+                        top: `${cell.bboxPct.y}%`,
+                        width: `${wp.wPct}%`,
+                        height: `${cell.bboxPct.h}%`,
+                      }}>
+                        <input
+                          id={`cell-${cell.cellId}`}
+                          type="text"
+                          value={displayText}
+                          onChange={(e) => handleTextChange(cell.cellId, e.target.value)}
+                          onKeyDown={(e) => handleKeyDown(e, cell.cellId)}
+                          className={`w-full h-full bg-transparent border-0 outline-none px-0 transition-colors ${
+                            edited ? 'bg-green-50/30' : ''
+                          }`}
+                          style={{
+                            fontSize: `${fs}px`,
+                            fontWeight: globalBold ? 'bold' : (cell.colType === 'column_en' ? 'bold' : 'normal'),
+                            fontFamily: "'Liberation Sans', Arial, sans-serif",
+                            lineHeight: '1',
+                            color: '#1a1a1a',
+                          }}
+                          title={`${cell.cellId} (${cell.colType})`}
+                        />
+                        {edited && (
+                          <button
+                            onClick={() => resetCell(cell.cellId)}
+                            className="absolute -top-1 -right-1 w-4 h-4 bg-red-500 text-white rounded-full text-[9px] leading-none opacity-0 group-hover:opacity-100 transition-opacity flex items-center justify-center"
+                            title="Zuruecksetzen"
+                          >
+                            &times;
+                          </button>
+                        )}
+                      </div>
+                    )
+                  })
+                }
+
+                // Multi-group cell with pixel positions: already handled above
+                // Fallback: no pixel data — single input at cell bbox
+                if (!cell.text) return null
+
+                const fontSize = Math.max(6, cellHeightPx * fontScale)
+                return (
+                  <div key={cell.cellId} className="absolute group" style={{
+                    left: `${cell.bboxPct.x}%`,
+                    top: `${cell.bboxPct.y}%`,
+                    width: `${cell.bboxPct.w}%`,
+                    height: `${cell.bboxPct.h}%`,
+                  }}>
+                    <input
+                      id={`cell-${cell.cellId}`}
+                      type="text"
+                      value={displayText}
+                      onChange={(e) => handleTextChange(cell.cellId, e.target.value)}
+                      onKeyDown={(e) => handleKeyDown(e, cell.cellId)}
+                      className={`w-full h-full bg-transparent border-0 outline-none px-0 transition-colors ${
+                        edited ? 'bg-green-50/30' : ''
+                      }`}
+                      style={{
+                        fontSize: `${fontSize}px`,
+                        fontWeight: globalBold ? 'bold' : 'normal',
+                        fontFamily: "'Liberation Sans', Arial, sans-serif",
+                        lineHeight: '1',
+                        color: '#1a1a1a',
+                      }}
+                      title={`${cell.cellId} (${cell.colType})`}
+                    />
+                    {edited && (
+                      <button
+                        onClick={() => resetCell(cell.cellId)}
+                        className="absolute -top-1 -right-1 w-4 h-4 bg-red-500 text-white rounded-full text-[9px] leading-none opacity-0 group-hover:opacity-100 transition-opacity flex items-center justify-center"
+                        title="Zuruecksetzen"
+                      >
+                        &times;
+                      </button>
+                    )}
+                  </div>
+                )
+              })}
+            </div>
+          </div>
+        </div>
+      </div>
+    )
+  }
+
  return (
    <div className="space-y-3">
      {/* Toolbar */}
@@ -367,6 +702,18 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
            >
              Einfach
            </button>
+            {isParentWithBoxes && (
+              <button
+                onClick={() => setEditorMode('overlay')}
+                className={`px-2 py-0.5 transition-colors ${
+                  editorMode === 'overlay'
+                    ? 'bg-teal-600 text-white'
+                    : 'hover:bg-gray-50 dark:hover:bg-gray-700 text-gray-600 dark:text-gray-400'
+                }`}
+              >
+                Overlay
+              </button>
+            )}
            <button
              onClick={() => setEditorMode('editor')}
              className={`px-2 py-0.5 transition-colors ${
@@ -406,43 +753,74 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp

          <div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />

-          {/* Empty field toggle */}
-          <button
-            onClick={() => setShowEmptyHighlight(v => !v)}
-            className={`px-2 py-1 text-xs border rounded transition-colors ${
-              showEmptyHighlight
-                ? 'border-red-300 bg-red-50 text-red-600 dark:border-red-700 dark:bg-red-900/30 dark:text-red-400'
-                : 'border-gray-300 dark:border-gray-600 hover:bg-gray-50 dark:hover:bg-gray-700'
-            }`}
-            title="Leere Pflichtfelder markieren"
-          >
-            Leer
-          </button>
+          {/* Overlay-specific toolbar */}
+          {editorMode === 'overlay' && (
+            <>
+              <label className="flex items-center gap-1 text-xs text-gray-600 dark:text-gray-400">
+                Schrift
+                <input
+                  type="range" min={30} max={120} value={Math.round(fontScale * 100)}
+                  onChange={e => setFontScale(Number(e.target.value) / 100)}
+                  className="w-20 h-1 accent-teal-600"
+                />
+                <span className="w-8 text-right font-mono">{Math.round(fontScale * 100)}%</span>
+              </label>
+              <button
+                onClick={() => setGlobalBold(b => !b)}
+                className={`px-2 py-1 text-xs rounded border transition-colors font-bold ${
+                  globalBold
+                    ? 'bg-teal-600 text-white border-teal-600'
+                    : 'bg-white dark:bg-gray-700 text-gray-600 dark:text-gray-400 border-gray-300 dark:border-gray-600'
+                }`}
+              >
+                B
+              </button>
+              <div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
+            </>
+          )}

-          <div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
+          {/* Non-overlay controls */}
+          {editorMode !== 'overlay' && (
+            <>
+              {/* Empty field toggle */}
+              <button
+                onClick={() => setShowEmptyHighlight(v => !v)}
+                className={`px-2 py-1 text-xs border rounded transition-colors ${
+                  showEmptyHighlight
+                    ? 'border-red-300 bg-red-50 text-red-600 dark:border-red-700 dark:bg-red-900/30 dark:text-red-400'
+                    : 'border-gray-300 dark:border-gray-600 hover:bg-gray-50 dark:hover:bg-gray-700'
+                }`}
+                title="Leere Pflichtfelder markieren"
+              >
+                Leer
+              </button>

-          {/* Zoom controls */}
-          <button
-            onClick={() => setZoom(z => Math.max(50, z - 25))}
-            className="px-2 py-1 text-xs border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700"
-          >
-            &minus;
-          </button>
-          <span className="text-xs text-gray-500 w-10 text-center">{zoom}%</span>
-          <button
-            onClick={() => setZoom(z => Math.min(200, z + 25))}
-            className="px-2 py-1 text-xs border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700"
-          >
-            +
-          </button>
-          <button
-            onClick={() => setZoom(100)}
-            className="px-2 py-1 text-xs border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700"
-          >
-            Fit
-          </button>
+              <div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />

-          <div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
+              {/* Zoom controls */}
+              <button
+                onClick={() => setZoom(z => Math.max(50, z - 25))}
+                className="px-2 py-1 text-xs border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700"
+              >
+                &minus;
+              </button>
+              <span className="text-xs text-gray-500 w-10 text-center">{zoom}%</span>
+              <button
+                onClick={() => setZoom(z => Math.min(200, z + 25))}
+                className="px-2 py-1 text-xs border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700"
+              >
+                +
+              </button>
+              <button
+                onClick={() => setZoom(100)}
+                className="px-2 py-1 text-xs border border-gray-300 dark:border-gray-600 rounded hover:bg-gray-50 dark:hover:bg-gray-700"
+              >
+                Fit
+              </button>
+
+              <div className="w-px h-5 bg-gray-300 dark:bg-gray-600 mx-1" />
+            </>
+          )}

          <button
            onClick={saveReconstruction}
@@ -454,8 +832,10 @@ export function StepReconstruction({ sessionId, onNext }: StepReconstructionProp
        </div>
      </div>

-      {/* Reconstruction canvas — Simple or Editor mode */}
-      {editorMode === 'editor' && sessionId ? (
+      {/* Reconstruction canvas */}
+      {editorMode === 'overlay' ? (
+        renderOverlayMode()
+      ) : editorMode === 'editor' && sessionId ? (
        <FabricReconstructionCanvas
          sessionId={sessionId}
          cells={gridCells}
@@ -0,0 +1,162 @@
+import { useEffect, useState } from 'react'
+import type { GridCell } from '@/app/(admin)/ai/ocr-pipeline/types'
+
+export interface WordPosition {
+  xPct: number
+  wPct: number
+  text: string
+  fontRatio: number
+}
+
+/**
+ * Shared hook: analyse dark-pixel clusters on an image to determine
+ * the exact horizontal position & auto-font-size of word groups in each cell.
+ *
+ * Returns a Map<cell_id, WordPosition[]>.
+ */
+export function usePixelWordPositions(
+  imageUrl: string,
+  cells: GridCell[],
+  active: boolean,
+): Map<string, WordPosition[]> {
+  const [cellWordPositions, setCellWordPositions] = useState<Map<string, WordPosition[]>>(new Map())
+
+  useEffect(() => {
+    if (!active || cells.length === 0 || !imageUrl) return
+
+    const img = new Image()
+    img.crossOrigin = 'anonymous'
+    img.onload = () => {
+      const canvas = document.createElement('canvas')
+      canvas.width = img.naturalWidth
+      canvas.height = img.naturalHeight
+      const ctx = canvas.getContext('2d')
+      if (!ctx) return
+      ctx.drawImage(img, 0, 0)
+
+      const refFontSize = 40
+      const fontFam = "'Liberation Sans', Arial, sans-serif"
+      ctx.font = `${refFontSize}px ${fontFam}`
+
+      const positions = new Map<string, WordPosition[]>()
+
+      for (const cell of cells) {
+        if (!cell.bbox_pct || !cell.text) continue
+
+        // Split by 3+ whitespace into word-groups
+        const groups = cell.text.split(/\s{3,}/).map(s => s.trim()).filter(Boolean)
+
+        // Pixel region for this cell
+        const imgW = img.naturalWidth
+        const imgH = img.naturalHeight
+        const cx = Math.round(cell.bbox_pct.x / 100 * imgW)
+        const cy = Math.round(cell.bbox_pct.y / 100 * imgH)
+        const cw = Math.round(cell.bbox_pct.w / 100 * imgW)
+        const ch = Math.round(cell.bbox_pct.h / 100 * imgH)
+        if (cw <= 0 || ch <= 0) continue
+
+        const imageData = ctx.getImageData(cx, cy, cw, ch)
+
+        // Vertical projection: count dark pixels per column
+        const proj = new Float32Array(cw)
+        for (let y = 0; y < ch; y++) {
+          for (let x = 0; x < cw; x++) {
+            const idx = (y * cw + x) * 4
+            const lum = 0.299 * imageData.data[idx] + 0.587 * imageData.data[idx + 1] + 0.114 * imageData.data[idx + 2]
+            if (lum < 128) proj[x]++
+          }
+        }
+
+        // Find dark-pixel clusters (word groups on the image)
+        const threshold = Math.max(1, ch * 0.03)
+        const minGap = Math.max(5, Math.round(cw * 0.02))
+        const clusters: { start: number; end: number }[] = []
+        let inCluster = false
+        let clStart = 0
+        let gap = 0
+
+        for (let x = 0; x < cw; x++) {
+          if (proj[x] >= threshold) {
+            if (!inCluster) { clStart = x; inCluster = true }
+            gap = 0
+          } else if (inCluster) {
+            gap++
+            if (gap > minGap) {
+              clusters.push({ start: clStart, end: x - gap })
+              inCluster = false
+              gap = 0
+            }
+          }
+        }
+        if (inCluster) clusters.push({ start: clStart, end: cw - 1 - gap })
+
+        if (clusters.length === 0) continue
+
+        const wordPos: WordPosition[] = []
+
+        if (groups.length <= 1) {
+          // Single group: position at first cluster, merge all clusters for width
+          const firstCl = clusters[0]
+          const lastCl = clusters[clusters.length - 1]
+          const clusterW = lastCl.end - firstCl.start + 1
+          const measured = ctx.measureText(cell.text.trim())
+          const autoFontPx = refFontSize * (clusterW / measured.width)
+          const fontRatio = Math.min(autoFontPx / ch, 1.0)
+          wordPos.push({
+            xPct: cell.bbox_pct.x + (firstCl.start / cw) * cell.bbox_pct.w,
+            wPct: ((lastCl.end - firstCl.start + 1) / cw) * cell.bbox_pct.w,
+            text: cell.text.trim(),
+            fontRatio,
+          })
+        } else if (clusters.length >= groups.length) {
+          // Multiple groups: match to clusters left-to-right
+          for (let i = 0; i < groups.length; i++) {
+            const cl = clusters[i]
+            const clusterW = cl.end - cl.start + 1
+            const measured = ctx.measureText(groups[i])
+            const autoFontPx = refFontSize * (clusterW / measured.width)
+            const fontRatio = Math.min(autoFontPx / ch, 1.0)
+            wordPos.push({
+              xPct: cell.bbox_pct.x + (cl.start / cw) * cell.bbox_pct.w,
+              wPct: ((cl.end - cl.start + 1) / cw) * cell.bbox_pct.w,
+              text: groups[i],
+              fontRatio,
+            })
+          }
+        } else {
+          continue // fewer clusters than groups — skip
+        }
+
+        positions.set(cell.cell_id, wordPos)
+      }
+
+      // Normalise: find the most common fontRatio (mode) and apply it to all
+      const allRatios: number[] = []
+      for (const wps of positions.values()) {
+        for (const wp of wps) allRatios.push(wp.fontRatio)
+      }
+      if (allRatios.length > 0) {
+        // Bucket ratios to 2 decimal places, find mode
+        const buckets = new Map<number, number>()
+        for (const r of allRatios) {
+          const key = Math.round(r * 50) / 50 // round to nearest 0.02
+          buckets.set(key, (buckets.get(key) || 0) + 1)
+        }
+        let modeRatio = allRatios[0]
+        let modeCount = 0
+        for (const [ratio, count] of buckets) {
+          if (count > modeCount) { modeRatio = ratio; modeCount = count }
+        }
+        // Apply mode to all word positions
+        for (const wps of positions.values()) {
+          for (const wp of wps) wp.fontRatio = modeRatio
+        }
+      }
+
+      setCellWordPositions(positions)
+    }
+    img.src = imageUrl
+  }, [active, cells, imageUrl])
+
+  return cellWordPositions
+}