From 40ac593d28b30f8b189b456e841fb49ef293a59d Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Thu, 12 Mar 2026 16:00:06 +0100 Subject: [PATCH] fix: split PaddleOCR phrase boxes into per-word boxes for overlay slide PaddleOCR returns phrase-level bounding boxes (e.g. "competition [kompa'tifn]" as one box) but the overlay slide mechanism expects one box per word for accurate positioning. Multi-word boxes are now split proportionally by character count with small gaps between words. Co-Authored-By: Claude Opus 4.6 --- klausur-service/backend/cv_words_first.py | 46 ++++++++++++++++++----- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/klausur-service/backend/cv_words_first.py b/klausur-service/backend/cv_words_first.py index f1ca0bc..2c78d45 100644 --- a/klausur-service/backend/cv_words_first.py +++ b/klausur-service/backend/cv_words_first.py @@ -181,17 +181,45 @@ def _build_cells( confs = [w.get('conf', 0) for w in cell_words if w.get('conf', 0) > 0] avg_conf = sum(confs) / len(confs) if confs else 0.0 - # Word boxes with absolute pixel coordinates (consistent with cv_cell_grid.py) + # Word boxes with absolute pixel coordinates (consistent with cv_cell_grid.py). + # PaddleOCR returns phrase-level boxes (e.g. "competition [kompa'tifn]"), + # but the overlay slide mechanism expects one box per word. Split multi-word + # boxes into individual word positions proportional to character length. word_boxes = [] for w in sorted(cell_words, key=lambda ww: (ww['top'], ww['left'])): - word_boxes.append({ - 'text': w.get('text', ''), - 'left': w['left'], - 'top': w['top'], - 'width': w['width'], - 'height': w['height'], - 'conf': w.get('conf', 0), - }) + raw_text = w.get('text', '').strip() + tokens = raw_text.split() + if len(tokens) <= 1: + # Single word — keep as-is + word_boxes.append({ + 'text': raw_text, + 'left': w['left'], + 'top': w['top'], + 'width': w['width'], + 'height': w['height'], + 'conf': w.get('conf', 0), + }) + else: + # Multi-word phrase — split proportionally by character count + total_chars = sum(len(t) for t in tokens) + if total_chars == 0: + continue + # Small gap between words (2% of box width per gap) + n_gaps = len(tokens) - 1 + gap_px = w['width'] * 0.02 + usable_w = w['width'] - gap_px * n_gaps + cursor = w['left'] + for t in tokens: + token_w = max(1, usable_w * len(t) / total_chars) + word_boxes.append({ + 'text': t, + 'left': round(cursor), + 'top': w['top'], + 'width': round(token_w), + 'height': w['height'], + 'conf': w.get('conf', 0), + }) + cursor += token_w + gap_px cells.append({ 'cell_id': f"R{ri:02d}_C{ci}",