From 7d19145edb9dd28aa569f1fda91f5c4a387d4f35 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Wed, 11 Mar 2026 20:41:29 +0100 Subject: [PATCH] fix: word_boxes auch fuer breite Spalten (Full-Page OCR) speichern word_boxes wurden nur im Cell-Crop-Pfad (narrow columns) gesetzt, aber nicht im Full-Page Word-Assignment-Pfad (broad columns). Jetzt werden die Tesseract-Wort-Koordinaten in beiden Pfaden gespeichert. Co-Authored-By: Claude Opus 4.6 --- klausur-service/backend/cv_cell_grid.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/klausur-service/backend/cv_cell_grid.py b/klausur-service/backend/cv_cell_grid.py index 81343ec..db1a8f6 100644 --- a/klausur-service/backend/cv_cell_grid.py +++ b/klausur-service/backend/cv_cell_grid.py @@ -458,6 +458,20 @@ def build_cell_grid_v2( 'ocr_engine': 'word_lookup', 'is_bold': False, } + # Store word bounding boxes for pixel-accurate overlay + if words and text.strip(): + cell['word_boxes'] = [ + { + 'text': w.get('text', ''), + 'left': w['left'], + 'top': w['top'], + 'width': w['width'], + 'height': w['height'], + 'conf': w.get('conf', 0), + } + for w in words + if w.get('text', '').strip() + ] cells.append(cell) # --- Phase 2: Narrow columns via cell-crop OCR (parallel) ---