fix: deduplicate overlapping OCR words and use per-word Y positions in overlay
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 30s
CI / test-go-edu-search (push) Successful in 33s
CI / test-python-klausur (push) Failing after 2m9s
CI / test-python-agent-core (push) Successful in 19s
CI / test-nodejs-website (push) Successful in 24s

Backend: Add spatial overlap check (>=50% horizontal IoU) to Kombi merge
so words at the same position are deduplicated even when OCR text differs.

Frontend: Add yPct/hPct to WordPosition so each word renders at its actual
vertical position instead of all words collapsing to the cell center Y.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-13 20:27:08 +01:00
parent 703e110bab
commit d6f51e4418
5 changed files with 70 additions and 4 deletions

View File

@@ -513,9 +513,9 @@ export function OverlayReconstruction({ sessionId, onNext }: OverlayReconstructi
className="absolute leading-none pointer-events-none select-none"
style={{
left: `${wp.xPct}%`,
top: `${bboxPct.y}%`,
top: `${wp.yPct}%`,
width: `${wp.wPct}%`,
height: `${bboxPct.h}%`,
height: `${wp.hPct}%`,
fontSize: `${fs}px`,
fontWeight: globalBold ? 'bold' : 'normal',
fontFamily: "'Liberation Sans', Arial, sans-serif",
@@ -534,9 +534,9 @@ export function OverlayReconstruction({ sessionId, onNext }: OverlayReconstructi
return (
<div key={`${cell.cellId}_wp_${i}`} className="absolute group" style={{
left: `${wp.xPct}%`,
top: `${bboxPct.y}%`,
top: `${wp.yPct}%`,
width: `${wp.wPct}%`,
height: `${bboxPct.h}%`,
height: `${wp.hPct}%`,
}}>
<input
id={`cell-${cell.cellId}`}

View File

@@ -4,6 +4,8 @@ import type { GridCell } from '@/app/(admin)/ai/ocr-overlay/types'
export interface WordPosition {
xPct: number
wPct: number
yPct: number
hPct: number
text: string
fontRatio: number
}
@@ -192,6 +194,8 @@ export function usePixelWordPositions(
wordPos.push({
xPct: cell.bbox_pct.x + (cl.start / cw) * cell.bbox_pct.w,
wPct: ((cl.end - cl.start + 1) / cw) * cell.bbox_pct.w,
yPct: cell.bbox_pct.y,
hPct: cell.bbox_pct.h,
text: groups[gi],
fontRatio,
})
@@ -209,6 +213,8 @@ export function usePixelWordPositions(
wordPos.push({
xPct: cell.bbox_pct.x + (widest.start / cw) * cell.bbox_pct.w,
wPct: ((widest.end - widest.start + 1) / cw) * cell.bbox_pct.w,
yPct: cell.bbox_pct.y,
hPct: cell.bbox_pct.h,
text: cell.text.trim(),
fontRatio,
})

View File

@@ -4,6 +4,8 @@ import type { GridCell } from '@/app/(admin)/ai/ocr-overlay/types'
export interface WordPosition {
xPct: number
wPct: number
yPct: number
hPct: number
text: string
fontRatio: number
}
@@ -66,6 +68,8 @@ export function useSlideWordPositions(
const wordPos = tokens.map((t, i) => ({
xPct: cell.bbox_pct.x + i * fallbackW,
wPct: fallbackW,
yPct: cell.bbox_pct.y,
hPct: cell.bbox_pct.h,
text: t,
fontRatio: 1.0,
}))
@@ -77,6 +81,8 @@ export function useSlideWordPositions(
const wordPos: WordPosition[] = boxes.map(box => ({
xPct: (box.left / imgW) * 100,
wPct: (box.width / imgW) * 100,
yPct: (box.top / imgH) * 100,
hPct: (box.height / imgH) * 100,
text: box.text,
fontRatio: 1.0,
}))
@@ -202,6 +208,8 @@ export function useSlideWordPositions(
wordPos.push({
xPct: cell.bbox_pct.x + (bestX / cw) * cell.bbox_pct.w,
wPct: (tokenW / cw) * cell.bbox_pct.w,
yPct: cell.bbox_pct.y,
hPct: cell.bbox_pct.h,
text: tokens[ti],
fontRatio: 1.0,
})