From b81baa1d1671353016c475b898b6ef2566b6db3d Mon Sep 17 00:00:00 2001
From: Benjamin Admin <benjaminadmin@MacBook-Pro.fritz.box>
Date: Wed, 11 Mar 2026 16:51:55 +0100
Subject: [PATCH] fix: Slide-Modus globale Schriftgroesse statt per-Token Scale

Schriftgroesse wird jetzt GLOBAL aus der medianen Zellhoehe berechnet
(65% der Zellhoehe als Ziel-Font). Alle Tokens bekommen dieselbe
konsistente Groesse. Die Slide-Logik bestimmt nur noch die x-Position.

Vorher: Scale pro Zelle aus Ink-Span/Textbreite -> inkonsistente Groessen.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../ocr-overlay/useSlideWordPositions.ts      | 162 ++++++++----------
 1 file changed, 67 insertions(+), 95 deletions(-)

diff --git a/admin-lehrer/components/ocr-overlay/useSlideWordPositions.ts b/admin-lehrer/components/ocr-overlay/useSlideWordPositions.ts
index d7f060a..f05e502 100644
--- a/admin-lehrer/components/ocr-overlay/useSlideWordPositions.ts
+++ b/admin-lehrer/components/ocr-overlay/useSlideWordPositions.ts
@@ -9,29 +9,25 @@ export interface WordPosition {
 }
 
 /**
- * Alternative positioning algorithm: "slide from left".
+ * "Slide from left" positioning algorithm.
  *
- * Instead of matching text groups to pixel clusters (which can lose words),
- * this algorithm takes ALL recognised words and slides them left-to-right
- * across the row's dark-pixel projection until each word "locks" onto its
- * ink coverage.
+ * Takes ALL recognised words per cell and slides them left-to-right across
+ * the row's dark-pixel projection until each word "locks" onto its ink.
+ *
+ * Key design: font size is determined GLOBALLY (median cell height),
+ * NOT per-token.  The slide only determines the x-position.  Token width
+ * is derived from the global font size + canvas measureText, ensuring
+ * consistent sizing across all cells.
  *
  * Algorithm per cell:
- * 1. Build horizontal dark-pixel projection (same as cluster approach).
- * 2. Split the cell text into individual tokens (words/symbols).
- * 3. Measure each token's expected pixel width (canvas measureText).
- * 4. Slide a cursor from x=0 rightward.  For each token, find the first
- *    x position where the projection has enough dark pixels under the
- *    token's width span (≥ coverageThreshold of the span is "inked").
- * 5. Lock the token at that x, advance cursor past it + a small gap.
+ * 1. Build horizontal dark-pixel projection.
+ * 2. Find dark-pixel clusters (contiguous inked regions).
+ * 3. Split cell text into tokens.
+ * 4. Compute a global scale: median cell height → reference font → pixel widths.
+ * 5. For each token, slide from cursor position until ink coverage is found.
+ * 6. Place token at that x with width from measureText * globalScale.
  *
- * This guarantees:
- * - ALL words appear (nothing is dropped)
- * - Original spacing is roughly preserved (words land on their ink)
- * - Box borders/lines are naturally covered by "|" / "l" tokens
- * - No complex cluster-matching or artifact-merging rules needed
- *
- * Returns Map<cell_id, WordPosition[]>.
+ * Guarantees: no words dropped, no complex matching rules needed.
  */
 export function useSlideWordPositions(
   imageUrl: string,
@@ -69,12 +65,34 @@ export function useSlideWordPositions(
       const fontFam = "'Liberation Sans', Arial, sans-serif"
       ctx.font = `${refFontSize}px ${fontFam}`
 
+      // --- Compute a GLOBAL scale from median cell height ---
+      // This ensures all tokens across all cells get the same font size.
+      const cellHeights = cells
+        .filter(c => c.bbox_pct && c.bbox_pct.h > 0)
+        .map(c => Math.round(c.bbox_pct.h / 100 * imgH))
+        .sort((a, b) => a - b)
+      const medianCh = cellHeights.length > 0
+        ? cellHeights[Math.floor(cellHeights.length / 2)]
+        : 30
+
+      // Target font size in image pixels = fraction of median cell height.
+      // Typical printed text fills ~60-70% of the row height.
+      const targetFontPx = medianCh * 0.65
+      // globalScale maps measureText pixels (at refFontSize) → image pixels
+      const globalScale = targetFontPx / refFontSize
+      // fontRatio for the renderer (medianCellHeightPx * fontRatio * fontScale = fontSize)
+      // We want autoFontPx = targetFontPx, renderer does medianCh * fontRatio * fontScale
+      // with fontScale=0.7 default → fontRatio = targetFontPx / (medianCh * 0.7)
+      // But we don't know fontScale here. So just set fontRatio = targetFontPx / medianCh
+      // and let the user's fontScale slider adjust.
+      const globalFontRatio = Math.min(targetFontPx / medianCh, 1.0)
+
       const positions = new Map<string, WordPosition[]>()
 
       for (const cell of cells) {
         if (!cell.bbox_pct || !cell.text) continue
 
-        // --- Get cell rectangle in image pixels ---
+        // --- Cell rectangle in image pixels ---
         let cx: number, cy: number
         const cw = Math.round(cell.bbox_pct.w / 100 * imgW)
         const ch = Math.round(cell.bbox_pct.h / 100 * imgH)
@@ -91,7 +109,7 @@ export function useSlideWordPositions(
         if (cy < 0) cy = 0
         if (cx + cw > imgW || cy + ch > imgH) continue
 
-        // --- Build dark-pixel projection ---
+        // --- Dark-pixel projection ---
         const imageData = ctx.getImageData(cx, cy, cw, ch)
         const proj = new Float32Array(cw)
         for (let y = 0; y < ch; y++) {
@@ -102,98 +120,73 @@ export function useSlideWordPositions(
           }
         }
 
-        // Dark pixel threshold per column (minimum to count as "inked")
         const threshold = Math.max(1, ch * 0.03)
 
-        // Build binary ink mask: true if column has enough dark pixels
+        // Binary ink mask
         const ink = new Uint8Array(cw)
         for (let x = 0; x < cw; x++) {
           ink[x] = proj[x] >= threshold ? 1 : 0
         }
 
-        // For 180° rotation, flip the ink mask
         if (rotation === 180) {
           ink.reverse()
         }
 
-        // --- Split text into tokens ---
-        // Use triple-space groups first (preserving OCR column separation),
-        // then split each group into individual words for fine positioning.
+        // --- Tokens ---
         const tokens = cell.text.split(/\s+/).filter(Boolean)
         if (tokens.length === 0) continue
 
-        // Measure each token's width in pixels (at reference font size)
-        const tokenWidths = tokens.map(t => ctx.measureText(t).width)
+        // Token widths in image pixels (using global scale)
+        const tokenWidthsPx = tokens.map(t => Math.round(ctx.measureText(t).width * globalScale))
+        const spaceWidthPx = Math.round(ctx.measureText(' ').width * globalScale)
 
-        // Total measured width of all tokens + inter-word spaces
-        const spaceWidth = ctx.measureText(' ').width
-        const totalTextW = tokenWidths.reduce((a, b) => a + b, 0) + (tokens.length - 1) * spaceWidth
-
-        // Scale factor: map measured text width → pixel width on image.
-        // Use the total INK SPAN (first dark pixel to last dark pixel),
-        // not the count of dark columns.  Text characters have gaps between
-        // strokes, so counting only dark pixels gives a much-too-small scale.
-        let firstInk = -1, lastInk = -1
-        for (let x = 0; x < cw; x++) {
-          if (ink[x]) {
-            if (firstInk < 0) firstInk = x
-            lastInk = x
-          }
-        }
-
-        // If almost no ink, skip
-        if (firstInk < 0 || lastInk <= firstInk) continue
-
-        const inkSpan = lastInk - firstInk + 1
-        const scale = inkSpan / totalTextW
-
-        // --- Slide each token from left to right ---
+        // --- Slide each token left-to-right ---
         const wordPos: WordPosition[] = []
-        let cursor = 0 // current search position in cell pixels
-        const minGapPx = Math.max(2, Math.round(cw * 0.005)) // minimum gap between tokens
+        let cursor = 0
 
         for (let ti = 0; ti < tokens.length; ti++) {
-          const tokenW = Math.round(tokenWidths[ti] * scale)
-          if (tokenW <= 0) continue
+          const tokenW = Math.max(1, tokenWidthsPx[ti])
 
-          // Find first position from cursor where the token has enough ink coverage.
-          // "Enough" = at least 15% of the token's width has ink underneath.
+          // Find first x from cursor where ≥15% of span has ink
           const coverageNeeded = Math.max(1, Math.round(tokenW * 0.15))
           let bestX = cursor
 
-          for (let x = cursor; x <= cw - tokenW; x++) {
+          // Don't search beyond cell width
+          const searchLimit = Math.min(cw - 1, cw - tokenW)
+
+          for (let x = cursor; x <= searchLimit; x++) {
             let inkCount = 0
-            for (let dx = 0; dx < tokenW; dx++) {
+            const end = Math.min(x + tokenW, cw)
+            for (let dx = 0; dx < end - x; dx++) {
               inkCount += ink[x + dx]
             }
             if (inkCount >= coverageNeeded) {
               bestX = x
               break
             }
-            // If we've scanned way past where ink should be, just use cursor
-            if (x > cursor + cw * 0.3 && ti > 0) {
+            // Safety: don't scan more than 40% of cell width past cursor
+            // to avoid tokens jumping far right when there's a large gap
+            if (x > cursor + cw * 0.4 && ti > 0) {
               bestX = cursor
               break
             }
           }
 
-          // Compute font size from token width vs measured width
-          const autoFontPx = refFontSize * (tokenW / tokenWidths[ti])
-          const fontRatio = Math.min(autoFontPx / ch, 1.0)
-
-          // Convert pixel position to percentage within cell, then to image %
-          const xInCellPct = bestX / cw
-          const wInCellPct = tokenW / cw
+          // Clamp to cell bounds
+          if (bestX + tokenW > cw) {
+            bestX = Math.max(0, cw - tokenW)
+          }
 
+          // Convert to percentage
           wordPos.push({
-            xPct: cell.bbox_pct.x + xInCellPct * cell.bbox_pct.w,
-            wPct: wInCellPct * cell.bbox_pct.w,
+            xPct: cell.bbox_pct.x + (bestX / cw) * cell.bbox_pct.w,
+            wPct: (tokenW / cw) * cell.bbox_pct.w,
             text: tokens[ti],
-            fontRatio,
+            fontRatio: globalFontRatio,
           })
 
-          // Advance cursor past this token + gap
-          cursor = bestX + tokenW + minGapPx
+          // Advance cursor: past this token + space
+          cursor = bestX + tokenW + spaceWidthPx
         }
 
         if (wordPos.length > 0) {
@@ -201,27 +194,6 @@ export function useSlideWordPositions(
         }
       }
 
-      // Normalise font: use mode fontRatio for all words
-      const allRatios: number[] = []
-      for (const wps of positions.values()) {
-        for (const wp of wps) allRatios.push(wp.fontRatio)
-      }
-      if (allRatios.length > 0) {
-        const buckets = new Map<number, number>()
-        for (const r of allRatios) {
-          const key = Math.round(r * 50) / 50
-          buckets.set(key, (buckets.get(key) || 0) + 1)
-        }
-        let modeRatio = allRatios[0]
-        let modeCount = 0
-        for (const [ratio, count] of buckets) {
-          if (count > modeCount) { modeRatio = ratio; modeCount = count }
-        }
-        for (const wps of positions.values()) {
-          for (const wp of wps) wp.fontRatio = modeRatio
-        }
-      }
-
       setResult(positions)
     }
     img.src = imageUrl