From c3da131129b60a7b3ab62a3df6fa37eb2a68b66a Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Wed, 11 Mar 2026 17:59:31 +0100 Subject: [PATCH] fix: Slide fontRatio=1.0 und Token-Breite aus gerenderter Fontgroesse fontRatio war 0.65 (35% kleiner als Fallback-Rendering). Jetzt 1.0 wie beim Fallback. Token-Breiten berechnet aus measureText skaliert auf die tatsaechlich gerenderte Schriftgroesse (medianCh * 0.7). Co-Authored-By: Claude Opus 4.6 --- .../ocr-overlay/useSlideWordPositions.ts | 87 ++++++++++--------- 1 file changed, 46 insertions(+), 41 deletions(-) diff --git a/admin-lehrer/components/ocr-overlay/useSlideWordPositions.ts b/admin-lehrer/components/ocr-overlay/useSlideWordPositions.ts index f05e502..1ab5722 100644 --- a/admin-lehrer/components/ocr-overlay/useSlideWordPositions.ts +++ b/admin-lehrer/components/ocr-overlay/useSlideWordPositions.ts @@ -14,18 +14,15 @@ export interface WordPosition { * Takes ALL recognised words per cell and slides them left-to-right across * the row's dark-pixel projection until each word "locks" onto its ink. * - * Key design: font size is determined GLOBALLY (median cell height), - * NOT per-token. The slide only determines the x-position. Token width - * is derived from the global font size + canvas measureText, ensuring - * consistent sizing across all cells. + * Font size: fontRatio = 1.0 for all tokens. The renderer computes the + * actual font size as medianCellHeightPx * fontRatio * fontScale, which + * matches the fallback rendering exactly. The user controls size via the + * font-scale slider. * - * Algorithm per cell: - * 1. Build horizontal dark-pixel projection. - * 2. Find dark-pixel clusters (contiguous inked regions). - * 3. Split cell text into tokens. - * 4. Compute a global scale: median cell height → reference font → pixel widths. - * 5. For each token, slide from cursor position until ink coverage is found. - * 6. Place token at that x with width from measureText * globalScale. + * Position: each token's x-position is found by sliding a cursor from left + * to right and looking for dark-pixel coverage. Token width (wPct) is + * computed from canvas measureText proportional to the median cell height, + * giving visually correct character widths. * * Guarantees: no words dropped, no complex matching rules needed. */ @@ -61,12 +58,7 @@ export function useSlideWordPositions( ctx.drawImage(img, 0, 0) } - const refFontSize = 40 - const fontFam = "'Liberation Sans', Arial, sans-serif" - ctx.font = `${refFontSize}px ${fontFam}` - - // --- Compute a GLOBAL scale from median cell height --- - // This ensures all tokens across all cells get the same font size. + // --- Compute median cell height in image pixels --- const cellHeights = cells .filter(c => c.bbox_pct && c.bbox_pct.h > 0) .map(c => Math.round(c.bbox_pct.h / 100 * imgH)) @@ -75,17 +67,31 @@ export function useSlideWordPositions( ? cellHeights[Math.floor(cellHeights.length / 2)] : 30 - // Target font size in image pixels = fraction of median cell height. - // Typical printed text fills ~60-70% of the row height. - const targetFontPx = medianCh * 0.65 - // globalScale maps measureText pixels (at refFontSize) → image pixels - const globalScale = targetFontPx / refFontSize - // fontRatio for the renderer (medianCellHeightPx * fontRatio * fontScale = fontSize) - // We want autoFontPx = targetFontPx, renderer does medianCh * fontRatio * fontScale - // with fontScale=0.7 default → fontRatio = targetFontPx / (medianCh * 0.7) - // But we don't know fontScale here. So just set fontRatio = targetFontPx / medianCh - // and let the user's fontScale slider adjust. - const globalFontRatio = Math.min(targetFontPx / medianCh, 1.0) + // The renderer computes: fontSize = medianCellHeightPx * fontRatio * fontScale + // With fontRatio=1.0 and fontScale=0.7 (default), that's 70% of median cell height. + // We need to know how wide each token is at THAT rendered font size, + // expressed in image pixels. + // + // The rendered container is reconWidth px wide = imgW image pixels. + // So 1 image pixel = reconWidth/imgW display pixels. + // Rendered font size (display px) = medianCellHeightPx_display * 1.0 * fontScale + // medianCellHeightPx_display = medianCh * (reconWidth / imgW) + // So rendered font = medianCh * (reconWidth/imgW) * fontScale + // In image-pixel units: medianCh * fontScale + // + // measureText at refFontSize=40 gives pixel widths. + // Scale from refFontSize → actual image-pixel font size: + const refFontSize = 40 + const fontFam = "'Liberation Sans', Arial, sans-serif" + ctx.font = `${refFontSize}px ${fontFam}` + + // Approximate rendered font size in image pixels. + // fontScale default is 0.7 but we don't know it here. + // Use 0.7 as approximation — the slide positions will still be correct + // because we only use this for relative token widths (proportional). + const approxFontScale = 0.7 + const renderedFontImgPx = medianCh * approxFontScale + const measureScale = renderedFontImgPx / refFontSize const positions = new Map() @@ -136,28 +142,29 @@ export function useSlideWordPositions( const tokens = cell.text.split(/\s+/).filter(Boolean) if (tokens.length === 0) continue - // Token widths in image pixels (using global scale) - const tokenWidthsPx = tokens.map(t => Math.round(ctx.measureText(t).width * globalScale)) - const spaceWidthPx = Math.round(ctx.measureText(' ').width * globalScale) + // Token widths in image pixels at the approximate rendered font size + const tokenWidthsPx = tokens.map(t => + Math.max(4, Math.round(ctx.measureText(t).width * measureScale)) + ) + const spaceWidthPx = Math.max(2, Math.round(ctx.measureText(' ').width * measureScale)) // --- Slide each token left-to-right --- const wordPos: WordPosition[] = [] let cursor = 0 for (let ti = 0; ti < tokens.length; ti++) { - const tokenW = Math.max(1, tokenWidthsPx[ti]) + const tokenW = tokenWidthsPx[ti] - // Find first x from cursor where ≥15% of span has ink - const coverageNeeded = Math.max(1, Math.round(tokenW * 0.15)) + // Find first x from cursor where ≥20% of span has ink + const coverageNeeded = Math.max(1, Math.round(tokenW * 0.20)) let bestX = cursor - // Don't search beyond cell width - const searchLimit = Math.min(cw - 1, cw - tokenW) + const searchLimit = Math.max(cursor, cw - tokenW) for (let x = cursor; x <= searchLimit; x++) { let inkCount = 0 - const end = Math.min(x + tokenW, cw) - for (let dx = 0; dx < end - x; dx++) { + const spanEnd = Math.min(x + tokenW, cw) + for (let dx = 0; dx < spanEnd - x; dx++) { inkCount += ink[x + dx] } if (inkCount >= coverageNeeded) { @@ -165,7 +172,6 @@ export function useSlideWordPositions( break } // Safety: don't scan more than 40% of cell width past cursor - // to avoid tokens jumping far right when there's a large gap if (x > cursor + cw * 0.4 && ti > 0) { bestX = cursor break @@ -177,12 +183,11 @@ export function useSlideWordPositions( bestX = Math.max(0, cw - tokenW) } - // Convert to percentage wordPos.push({ xPct: cell.bbox_pct.x + (bestX / cw) * cell.bbox_pct.w, wPct: (tokenW / cw) * cell.bbox_pct.w, text: tokens[ti], - fontRatio: globalFontRatio, + fontRatio: 1.0, }) // Advance cursor: past this token + space