fix: split PaddleOCR phrase boxes into per-word boxes for overlay slide
Some checks failed
CI / test-nodejs-website (push) Has been cancelled
CI / go-lint (push) Has been cancelled
CI / python-lint (push) Has been cancelled
CI / nodejs-lint (push) Has been cancelled
CI / test-go-school (push) Has been cancelled
CI / test-go-edu-search (push) Has been cancelled
CI / test-python-klausur (push) Has been cancelled
CI / test-python-agent-core (push) Has been cancelled
Some checks failed
CI / test-nodejs-website (push) Has been cancelled
CI / go-lint (push) Has been cancelled
CI / python-lint (push) Has been cancelled
CI / nodejs-lint (push) Has been cancelled
CI / test-go-school (push) Has been cancelled
CI / test-go-edu-search (push) Has been cancelled
CI / test-python-klausur (push) Has been cancelled
CI / test-python-agent-core (push) Has been cancelled
PaddleOCR returns phrase-level bounding boxes (e.g. "competition [kompa'tifn]" as one box) but the overlay slide mechanism expects one box per word for accurate positioning. Multi-word boxes are now split proportionally by character count with small gaps between words. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -181,17 +181,45 @@ def _build_cells(
|
|||||||
confs = [w.get('conf', 0) for w in cell_words if w.get('conf', 0) > 0]
|
confs = [w.get('conf', 0) for w in cell_words if w.get('conf', 0) > 0]
|
||||||
avg_conf = sum(confs) / len(confs) if confs else 0.0
|
avg_conf = sum(confs) / len(confs) if confs else 0.0
|
||||||
|
|
||||||
# Word boxes with absolute pixel coordinates (consistent with cv_cell_grid.py)
|
# Word boxes with absolute pixel coordinates (consistent with cv_cell_grid.py).
|
||||||
|
# PaddleOCR returns phrase-level boxes (e.g. "competition [kompa'tifn]"),
|
||||||
|
# but the overlay slide mechanism expects one box per word. Split multi-word
|
||||||
|
# boxes into individual word positions proportional to character length.
|
||||||
word_boxes = []
|
word_boxes = []
|
||||||
for w in sorted(cell_words, key=lambda ww: (ww['top'], ww['left'])):
|
for w in sorted(cell_words, key=lambda ww: (ww['top'], ww['left'])):
|
||||||
word_boxes.append({
|
raw_text = w.get('text', '').strip()
|
||||||
'text': w.get('text', ''),
|
tokens = raw_text.split()
|
||||||
'left': w['left'],
|
if len(tokens) <= 1:
|
||||||
'top': w['top'],
|
# Single word — keep as-is
|
||||||
'width': w['width'],
|
word_boxes.append({
|
||||||
'height': w['height'],
|
'text': raw_text,
|
||||||
'conf': w.get('conf', 0),
|
'left': w['left'],
|
||||||
})
|
'top': w['top'],
|
||||||
|
'width': w['width'],
|
||||||
|
'height': w['height'],
|
||||||
|
'conf': w.get('conf', 0),
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
# Multi-word phrase — split proportionally by character count
|
||||||
|
total_chars = sum(len(t) for t in tokens)
|
||||||
|
if total_chars == 0:
|
||||||
|
continue
|
||||||
|
# Small gap between words (2% of box width per gap)
|
||||||
|
n_gaps = len(tokens) - 1
|
||||||
|
gap_px = w['width'] * 0.02
|
||||||
|
usable_w = w['width'] - gap_px * n_gaps
|
||||||
|
cursor = w['left']
|
||||||
|
for t in tokens:
|
||||||
|
token_w = max(1, usable_w * len(t) / total_chars)
|
||||||
|
word_boxes.append({
|
||||||
|
'text': t,
|
||||||
|
'left': round(cursor),
|
||||||
|
'top': w['top'],
|
||||||
|
'width': round(token_w),
|
||||||
|
'height': w['height'],
|
||||||
|
'conf': w.get('conf', 0),
|
||||||
|
})
|
||||||
|
cursor += token_w + gap_px
|
||||||
|
|
||||||
cells.append({
|
cells.append({
|
||||||
'cell_id': f"R{ri:02d}_C{ci}",
|
'cell_id': f"R{ri:02d}_C{ci}",
|
||||||
|
|||||||
Reference in New Issue
Block a user