fix: run color annotation on final cell word_boxes, not pre-grid words

_build_cells() creates new word_box dicts, so color fields set before grid building were lost. Now detect_word_colors() runs after cells are built, on the final word_boxes. Recovery still runs before grid building so recovered words participate in column/row detection. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-15 00:53:04 +01:00
parent 2bd63ec402
commit bcd55e12d7
1 changed files with 11 additions and 6 deletions
@@ -450,10 +450,7 @@ async def build_grid(session_id: str):
        img_bgr = cv2.imdecode(arr, cv2.IMREAD_COLOR)
        if img_bgr is not None:
-            # --- Color detection: annotate existing words ---
+            # --- Recover colored text that OCR missed (before grid building) ---
            detect_word_colors(img_bgr, all_words)
            # --- Recover colored text that OCR missed ---
            recovered = recover_colored_text(img_bgr, all_words)
            if recovered:
                recovered_count = len(recovered)
@@ -538,9 +535,17 @@ async def build_grid(session_id: str):
            **grid,
        })
    # 5. Color annotation on final word_boxes in cells
    if img_bgr is not None:
        all_wb: List[Dict] = []
        for z in zones_data:
            for cell in z.get("cells", []):
                all_wb.extend(cell.get("word_boxes", []))
        detect_word_colors(img_bgr, all_wb)
    duration = time.time() - t0
-    # 5. Build result
+    # 6. Build result
    total_cells = sum(len(z.get("cells", [])) for z in zones_data)
    total_columns = sum(len(z.get("columns", [])) for z in zones_data)
    total_rows = sum(len(z.get("rows", [])) for z in zones_data)
@@ -575,7 +580,7 @@ async def build_grid(session_id: str):
        "duration_seconds": round(duration, 2),
    }
-    # 6. Persist to DB
+    # 7. Persist to DB
    await update_session_db(session_id, grid_editor_result=result)
    logger.info(