diff --git a/admin-lehrer/components/ocr-overlay/useSlideWordPositions.ts b/admin-lehrer/components/ocr-overlay/useSlideWordPositions.ts index 6b2246d..4128920 100644 --- a/admin-lehrer/components/ocr-overlay/useSlideWordPositions.ts +++ b/admin-lehrer/components/ocr-overlay/useSlideWordPositions.ts @@ -46,21 +46,22 @@ export function useSlideWordPositions( const hasWordBoxes = cells.some(c => c.word_boxes && c.word_boxes.length > 0) if (hasWordBoxes) { - // --- WORD-BOX PATH: use OCR positions with cell.text tokens --- - // Uses fuzzy text matching to pair each token with its best box, - // handling reordering, IPA corrections, and token count mismatches. + // --- WORD-BOX PATH: use OCR positions directly --- + // Each word_box already has exact coordinates from OCR. + // Use them as-is — no fuzzy matching needed. const positions = new Map() for (const cell of cells) { if (!cell.bbox_pct || !cell.text) continue - const tokens = cell.text.split(/\s+/).filter(Boolean) - if (tokens.length === 0) continue - const boxes = (cell.word_boxes || []) .filter(wb => wb.text.trim()) + .sort((a, b) => a.left - b.left) if (boxes.length === 0) { + // No word_boxes — spread tokens evenly across cell + const tokens = cell.text.split(/\s+/).filter(Boolean) + if (tokens.length === 0) continue const fallbackW = cell.bbox_pct.w / tokens.length const wordPos = tokens.map((t, i) => ({ xPct: cell.bbox_pct.x + i * fallbackW, @@ -72,80 +73,13 @@ export function useSlideWordPositions( continue } - // Match each token to its best box by text similarity. - // Normalize: lowercase, strip brackets/punctuation for comparison. - const norm = (s: string) => s.toLowerCase().replace(/[^a-z0-9äöüß]/g, '') - - const used = new Set() - const tokenBoxIdx: (number | null)[] = [] - - for (const token of tokens) { - const tn = norm(token) - let bestIdx = -1 - let bestScore = 0 - - for (let bi = 0; bi < boxes.length; bi++) { - if (used.has(bi)) continue - const bn = norm(boxes[bi].text) - // Score: length of common prefix / max length - let common = 0 - const minLen = Math.min(tn.length, bn.length) - for (let k = 0; k < minLen; k++) { - if (tn[k] === bn[k]) common++ - else break - } - // Also check if token is a substring of box text or vice versa - const containsBonus = (bn.includes(tn) || tn.includes(bn)) ? 0.5 : 0 - const score = (minLen > 0 ? common / Math.max(tn.length, bn.length) : 0) + containsBonus - if (score > bestScore) { - bestScore = score - bestIdx = bi - } - } - - if (bestIdx >= 0 && bestScore > 0.2) { - used.add(bestIdx) - tokenBoxIdx.push(bestIdx) - } else { - tokenBoxIdx.push(null) // no match - } - } - - // Build positions: matched tokens get box positions, - // unmatched tokens get interpolated between neighbors. - const wordPos: WordPosition[] = [] - - for (let ti = 0; ti < tokens.length; ti++) { - const bi = tokenBoxIdx[ti] - if (bi !== null) { - const box = boxes[bi] - wordPos.push({ - xPct: (box.left / imgW) * 100, - wPct: (box.width / imgW) * 100, - text: tokens[ti], - fontRatio: 1.0, - }) - } else { - // Interpolate: find nearest matched neighbor before/after - let prevPx = cell.bbox_pct.x / 100 * imgW - let prevW = 0 - for (let p = ti - 1; p >= 0; p--) { - if (tokenBoxIdx[p] !== null) { - const pb = boxes[tokenBoxIdx[p]!] - prevPx = pb.left + pb.width + 5 - prevW = pb.width - break - } - } - const estW = prevW > 0 ? prevW : (cell.bbox_pct.w / 100 * imgW / tokens.length) - wordPos.push({ - xPct: (prevPx / imgW) * 100, - wPct: (estW / imgW) * 100, - text: tokens[ti], - fontRatio: 1.0, - }) - } - } + // Use each word_box directly with its OCR coordinates + const wordPos: WordPosition[] = boxes.map(box => ({ + xPct: (box.left / imgW) * 100, + wPct: (box.width / imgW) * 100, + text: box.text, + fontRatio: 1.0, + })) if (wordPos.length > 0) { positions.set(cell.cell_id, wordPos)