fix: Slide-Modus globale Schriftgroesse statt per-Token Scale

Schriftgroesse wird jetzt GLOBAL aus der medianen Zellhoehe berechnet (65% der Zellhoehe als Ziel-Font). Alle Tokens bekommen dieselbe konsistente Groesse. Die Slide-Logik bestimmt nur noch die x-Position. Vorher: Scale pro Zelle aus Ink-Span/Textbreite -> inkonsistente Groessen. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-11 16:51:55 +01:00
parent 2010cab894
commit b81baa1d16
1 changed files with 67 additions and 95 deletions
@@ -9,29 +9,25 @@ export interface WordPosition {
 }
 /**
- * Alternative positioning algorithm: "slide from left".
+ * "Slide from left" positioning algorithm.
 *
- * Instead of matching text groups to pixel clusters (which can lose words),
+ * Takes ALL recognised words per cell and slides them left-to-right across
- * this algorithm takes ALL recognised words and slides them left-to-right
+ * the row's dark-pixel projection until each word "locks" onto its ink.
- * across the row's dark-pixel projection until each word "locks" onto its
+ *
- * ink coverage.
+ * Key design: font size is determined GLOBALLY (median cell height),
 * NOT per-token.  The slide only determines the x-position.  Token width
 * is derived from the global font size + canvas measureText, ensuring
 * consistent sizing across all cells.
 *
 * Algorithm per cell:
- * 1. Build horizontal dark-pixel projection (same as cluster approach).
+ * 1. Build horizontal dark-pixel projection.
- * 2. Split the cell text into individual tokens (words/symbols).
+ * 2. Find dark-pixel clusters (contiguous inked regions).
- * 3. Measure each token's expected pixel width (canvas measureText).
+ * 3. Split cell text into tokens.
- * 4. Slide a cursor from x=0 rightward.  For each token, find the first
+ * 4. Compute a global scale: median cell height → reference font → pixel widths.
- *    x position where the projection has enough dark pixels under the
+ * 5. For each token, slide from cursor position until ink coverage is found.
- *    token's width span (≥ coverageThreshold of the span is "inked").
+ * 6. Place token at that x with width from measureText * globalScale.
 * 5. Lock the token at that x, advance cursor past it + a small gap.
 *
- * This guarantees:
+ * Guarantees: no words dropped, no complex matching rules needed.
 * - ALL words appear (nothing is dropped)
 * - Original spacing is roughly preserved (words land on their ink)
 * - Box borders/lines are naturally covered by "|" / "l" tokens
 * - No complex cluster-matching or artifact-merging rules needed
 *
 * Returns Map<cell_id, WordPosition[]>.
 */
 export function useSlideWordPositions(
  imageUrl: string,
@@ -69,12 +65,34 @@ export function useSlideWordPositions(
      const fontFam = "'Liberation Sans', Arial, sans-serif"
      ctx.font = `${refFontSize}px ${fontFam}`
      // --- Compute a GLOBAL scale from median cell height ---
      // This ensures all tokens across all cells get the same font size.
      const cellHeights = cells
        .filter(c => c.bbox_pct && c.bbox_pct.h > 0)
        .map(c => Math.round(c.bbox_pct.h / 100 * imgH))
        .sort((a, b) => a - b)
      const medianCh = cellHeights.length > 0
        ? cellHeights[Math.floor(cellHeights.length / 2)]
        : 30
      // Target font size in image pixels = fraction of median cell height.
      // Typical printed text fills ~60-70% of the row height.
      const targetFontPx = medianCh * 0.65
      // globalScale maps measureText pixels (at refFontSize) → image pixels
      const globalScale = targetFontPx / refFontSize
      // fontRatio for the renderer (medianCellHeightPx * fontRatio * fontScale = fontSize)
      // We want autoFontPx = targetFontPx, renderer does medianCh * fontRatio * fontScale
      // with fontScale=0.7 default → fontRatio = targetFontPx / (medianCh * 0.7)
      // But we don't know fontScale here. So just set fontRatio = targetFontPx / medianCh
      // and let the user's fontScale slider adjust.
      const globalFontRatio = Math.min(targetFontPx / medianCh, 1.0)
      const positions = new Map<string, WordPosition[]>()
      for (const cell of cells) {
        if (!cell.bbox_pct || !cell.text) continue
-        // --- Get cell rectangle in image pixels ---
+        // --- Cell rectangle in image pixels ---
        let cx: number, cy: number
        const cw = Math.round(cell.bbox_pct.w / 100 * imgW)
        const ch = Math.round(cell.bbox_pct.h / 100 * imgH)
@@ -91,7 +109,7 @@ export function useSlideWordPositions(
        if (cy < 0) cy = 0
        if (cx + cw > imgW || cy + ch > imgH) continue
-        // --- Build dark-pixel projection ---
+        // --- Dark-pixel projection ---
        const imageData = ctx.getImageData(cx, cy, cw, ch)
        const proj = new Float32Array(cw)
        for (let y = 0; y < ch; y++) {
@@ -102,98 +120,73 @@ export function useSlideWordPositions(
          }
        }
        // Dark pixel threshold per column (minimum to count as "inked")
        const threshold = Math.max(1, ch * 0.03)
-        // Build binary ink mask: true if column has enough dark pixels
+        // Binary ink mask
        const ink = new Uint8Array(cw)
        for (let x = 0; x < cw; x++) {
          ink[x] = proj[x] >= threshold ? 1 : 0
        }
        // For 180° rotation, flip the ink mask
        if (rotation === 180) {
          ink.reverse()
        }
-        // --- Split text into tokens ---
+        // --- Tokens ---
        // Use triple-space groups first (preserving OCR column separation),
        // then split each group into individual words for fine positioning.
        const tokens = cell.text.split(/\s+/).filter(Boolean)
        if (tokens.length === 0) continue
-        // Measure each token's width in pixels (at reference font size)
+        // Token widths in image pixels (using global scale)
-        const tokenWidths = tokens.map(t => ctx.measureText(t).width)
+        const tokenWidthsPx = tokens.map(t => Math.round(ctx.measureText(t).width * globalScale))
        const spaceWidthPx = Math.round(ctx.measureText(' ').width * globalScale)
-        // Total measured width of all tokens + inter-word spaces
+        // --- Slide each token left-to-right ---
        const spaceWidth = ctx.measureText(' ').width
        const totalTextW = tokenWidths.reduce((a, b) => a + b, 0) + (tokens.length - 1) * spaceWidth
        // Scale factor: map measured text width → pixel width on image.
        // Use the total INK SPAN (first dark pixel to last dark pixel),
        // not the count of dark columns.  Text characters have gaps between
        // strokes, so counting only dark pixels gives a much-too-small scale.
        let firstInk = -1, lastInk = -1
        for (let x = 0; x < cw; x++) {
          if (ink[x]) {
            if (firstInk < 0) firstInk = x
            lastInk = x
          }
        }
        // If almost no ink, skip
        if (firstInk < 0 || lastInk <= firstInk) continue
        const inkSpan = lastInk - firstInk + 1
        const scale = inkSpan / totalTextW
        // --- Slide each token from left to right ---
        const wordPos: WordPosition[] = []
-        let cursor = 0 // current search position in cell pixels
+        let cursor = 0
        const minGapPx = Math.max(2, Math.round(cw * 0.005)) // minimum gap between tokens
        for (let ti = 0; ti < tokens.length; ti++) {
-          const tokenW = Math.round(tokenWidths[ti] * scale)
+          const tokenW = Math.max(1, tokenWidthsPx[ti])
          if (tokenW <= 0) continue
-          // Find first position from cursor where the token has enough ink coverage.
+          // Find first x from cursor where ≥15% of span has ink
          // "Enough" = at least 15% of the token's width has ink underneath.
          const coverageNeeded = Math.max(1, Math.round(tokenW * 0.15))
          let bestX = cursor
-          for (let x = cursor; x <= cw - tokenW; x++) {
+          // Don't search beyond cell width
          const searchLimit = Math.min(cw - 1, cw - tokenW)
          for (let x = cursor; x <= searchLimit; x++) {
            let inkCount = 0
-            for (let dx = 0; dx < tokenW; dx++) {
+            const end = Math.min(x + tokenW, cw)
            for (let dx = 0; dx < end - x; dx++) {
              inkCount += ink[x + dx]
            }
            if (inkCount >= coverageNeeded) {
              bestX = x
              break
            }
-            // If we've scanned way past where ink should be, just use cursor
+            // Safety: don't scan more than 40% of cell width past cursor
-            if (x > cursor + cw * 0.3 && ti > 0) {
+            // to avoid tokens jumping far right when there's a large gap
            if (x > cursor + cw * 0.4 && ti > 0) {
              bestX = cursor
              break
            }
          }
-          // Compute font size from token width vs measured width
+          // Clamp to cell bounds
-          const autoFontPx = refFontSize * (tokenW / tokenWidths[ti])
+          if (bestX + tokenW > cw) {
-          const fontRatio = Math.min(autoFontPx / ch, 1.0)
+            bestX = Math.max(0, cw - tokenW)
-
+          }
          // Convert pixel position to percentage within cell, then to image %
          const xInCellPct = bestX / cw
          const wInCellPct = tokenW / cw
          // Convert to percentage
          wordPos.push({
-            xPct: cell.bbox_pct.x + xInCellPct * cell.bbox_pct.w,
+            xPct: cell.bbox_pct.x + (bestX / cw) * cell.bbox_pct.w,
-            wPct: wInCellPct * cell.bbox_pct.w,
+            wPct: (tokenW / cw) * cell.bbox_pct.w,
            text: tokens[ti],
-            fontRatio,
+            fontRatio: globalFontRatio,
          })
-          // Advance cursor past this token + gap
+          // Advance cursor: past this token + space
-          cursor = bestX + tokenW + minGapPx
+          cursor = bestX + tokenW + spaceWidthPx
        }
        if (wordPos.length > 0) {
@@ -201,27 +194,6 @@ export function useSlideWordPositions(
        }
      }
      // Normalise font: use mode fontRatio for all words
      const allRatios: number[] = []
      for (const wps of positions.values()) {
        for (const wp of wps) allRatios.push(wp.fontRatio)
      }
      if (allRatios.length > 0) {
        const buckets = new Map<number, number>()
        for (const r of allRatios) {
          const key = Math.round(r * 50) / 50
          buckets.set(key, (buckets.get(key) || 0) + 1)
        }
        let modeRatio = allRatios[0]
        let modeCount = 0
        for (const [ratio, count] of buckets) {
          if (count > modeCount) { modeRatio = ratio; modeCount = count }
        }
        for (const wps of positions.values()) {
          for (const wp of wps) wp.fontRatio = modeRatio
        }
      }
      setResult(positions)
    }
    img.src = imageUrl