From 35f2706098d4fb330887ab161f51820893bd7247 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Wed, 11 Mar 2026 20:01:57 +0100 Subject: [PATCH] fix: Slide-Modus nutzt cell.text Tokens statt word_boxes Text (keine Woerter verloren) TEXT kommt aus cell.text (bereinigt, IPA-korrigiert). POSITIONEN kommen aus word_boxes (exakte OCR-Koordinaten). Tokens werden 1:1 in Leserichtung zugeordnet. Co-Authored-By: Claude Opus 4.6 --- .../ocr-overlay/useSlideWordPositions.ts | 91 +++++++++++++++---- 1 file changed, 75 insertions(+), 16 deletions(-) diff --git a/admin-lehrer/components/ocr-overlay/useSlideWordPositions.ts b/admin-lehrer/components/ocr-overlay/useSlideWordPositions.ts index c6695b9..8457b2a 100644 --- a/admin-lehrer/components/ocr-overlay/useSlideWordPositions.ts +++ b/admin-lehrer/components/ocr-overlay/useSlideWordPositions.ts @@ -11,14 +11,20 @@ export interface WordPosition { /** * "Slide from left" positioning using OCR word bounding boxes. * - * If the backend provides `word_boxes` (exact per-word coordinates from - * Tesseract/RapidOCR), we place each word directly at its OCR position. - * This gives pixel-accurate overlay without any heuristic pixel scanning. + * TEXT comes from cell.text (cleaned, IPA-corrected). + * POSITIONS come from word_boxes (exact OCR coordinates). * - * Fallback: if no word_boxes, slide tokens across dark-pixel projection - * (original slide algorithm). + * Tokens from cell.text are matched 1:1 (in order) to word_boxes + * sorted left-to-right. This guarantees: + * - ALL words from cell.text appear (no dropping) + * - Words preserve their reading order + * - Each word lands on its correct black-text position + * - No red words overlap each other * - * Font size: fontRatio = 1.0 for all (matches fallback rendering). + * If token count != box count, extra tokens get estimated positions + * (spread across remaining space). + * + * Fallback: pixel-projection slide if no word_boxes available. */ export function useSlideWordPositions( imageUrl: string, @@ -37,26 +43,79 @@ export function useSlideWordPositions( const imgW = img.naturalWidth const imgH = img.naturalHeight - // Check if we can use word_boxes (fast path — no canvas needed) const hasWordBoxes = cells.some(c => c.word_boxes && c.word_boxes.length > 0) if (hasWordBoxes) { - // --- FAST PATH: use OCR word bounding boxes directly --- + // --- WORD-BOX PATH: use OCR positions with cell.text tokens --- const positions = new Map() for (const cell of cells) { if (!cell.bbox_pct || !cell.text) continue - const boxes = cell.word_boxes - if (!boxes || boxes.length === 0) continue - const wordPos: WordPosition[] = boxes + // Tokens from the CLEANED cell text (reading order) + const tokens = cell.text.split(/\s+/).filter(Boolean) + if (tokens.length === 0) continue + + // Word boxes sorted left-to-right + const boxes = (cell.word_boxes || []) .filter(wb => wb.text.trim()) - .map(wb => ({ - xPct: (wb.left / imgW) * 100, - wPct: (wb.width / imgW) * 100, - text: wb.text, + .sort((a, b) => a.left - b.left) + + if (boxes.length === 0) { + // No boxes — place all tokens at cell start as fallback + const fallbackW = cell.bbox_pct.w / tokens.length + const wordPos = tokens.map((t, i) => ({ + xPct: cell.bbox_pct.x + i * fallbackW, + wPct: fallbackW, + text: t, fontRatio: 1.0, })) + positions.set(cell.cell_id, wordPos) + continue + } + + const wordPos: WordPosition[] = [] + + if (tokens.length <= boxes.length) { + // More boxes than tokens: assign each token to a box in order. + // This handles the common case where box count matches or + // exceeds token count (e.g. OCR found extra fragments). + for (let ti = 0; ti < tokens.length; ti++) { + const box = boxes[ti] + wordPos.push({ + xPct: (box.left / imgW) * 100, + wPct: (box.width / imgW) * 100, + text: tokens[ti], + fontRatio: 1.0, + }) + } + } else { + // More tokens than boxes: assign boxes to first N tokens, + // then spread remaining tokens after the last box. + for (let ti = 0; ti < boxes.length; ti++) { + const box = boxes[ti] + wordPos.push({ + xPct: (box.left / imgW) * 100, + wPct: (box.width / imgW) * 100, + text: tokens[ti], + fontRatio: 1.0, + }) + } + // Remaining tokens: estimate position after last box + const lastBox = boxes[boxes.length - 1] + let cursorPx = lastBox.left + lastBox.width + 5 + for (let ti = boxes.length; ti < tokens.length; ti++) { + // Estimate width from average box width + const avgW = boxes.reduce((s, b) => s + b.width, 0) / boxes.length + wordPos.push({ + xPct: (cursorPx / imgW) * 100, + wPct: (avgW / imgW) * 100, + text: tokens[ti], + fontRatio: 1.0, + }) + cursorPx += avgW + 5 + } + } if (wordPos.length > 0) { positions.set(cell.cell_id, wordPos) @@ -67,7 +126,7 @@ export function useSlideWordPositions( return } - // --- SLOW PATH: pixel-projection slide (fallback if no word_boxes) --- + // --- FALLBACK: pixel-projection slide (no word_boxes) --- const canvas = document.createElement('canvas') canvas.width = imgW canvas.height = imgH