feat: add color detection for OCR word boxes

New cv_color_detect.py module: - detect_word_colors(): annotates existing words with text color (HSV analysis) - recover_colored_text(): finds colored text regions missed by standard OCR (e.g. red ! markers) using HSV masks + contour detection Integrated into build-grid: words get color/color_name fields, recovered colored regions are merged into the word list before grid building. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-15 00:50:09 +01:00
parent 39a4d8564c
commit 2bd63ec402
2 changed files with 280 additions and 1 deletions
--- a/klausur-service/backend/grid_editor_api.py
+++ b/klausur-service/backend/grid_editor_api.py
@@ -20,6 +20,7 @@ import numpy as np
 from fastapi import APIRouter, HTTPException, Request

 from cv_box_detect import detect_boxes, split_page_into_zones
+from cv_color_detect import detect_word_colors, recover_colored_text
 from cv_words_first import _cluster_rows, _build_cells
 from ocr_pipeline_session_store import (
    get_session_db,
@@ -438,15 +439,30 @@ async def build_grid(session_id: str):

    zones_data: List[Dict[str, Any]] = []
    boxes_detected = 0
+    recovered_count = 0
+    img_bgr = None

    content_x, content_y, content_w, content_h = _get_content_bounds(all_words)

    if img_png:
-        # Decode image for box detection
+        # Decode image for color detection + box detection
        arr = np.frombuffer(img_png, dtype=np.uint8)
        img_bgr = cv2.imdecode(arr, cv2.IMREAD_COLOR)

        if img_bgr is not None:
+            # --- Color detection: annotate existing words ---
+            detect_word_colors(img_bgr, all_words)
+
+            # --- Recover colored text that OCR missed ---
+            recovered = recover_colored_text(img_bgr, all_words)
+            if recovered:
+                recovered_count = len(recovered)
+                all_words.extend(recovered)
+                logger.info(
+                    "build-grid session %s: +%d recovered colored words",
+                    session_id, recovered_count,
+                )
+
            # Detect bordered boxes
            boxes = detect_boxes(
                img_bgr,
@@ -529,6 +545,14 @@ async def build_grid(session_id: str):
    total_columns = sum(len(z.get("columns", [])) for z in zones_data)
    total_rows = sum(len(z.get("rows", [])) for z in zones_data)

+    # Collect color statistics from all word_boxes in cells
+    color_stats: Dict[str, int] = {}
+    for z in zones_data:
+        for cell in z.get("cells", []):
+            for wb in cell.get("word_boxes", []):
+                cn = wb.get("color_name", "black")
+                color_stats[cn] = color_stats.get(cn, 0) + 1
+
    result = {
        "session_id": session_id,
        "image_width": img_w,
@@ -541,6 +565,8 @@ async def build_grid(session_id: str):
            "total_rows": total_rows,
            "total_cells": total_cells,
            "total_words": len(all_words),
+            "recovered_colored": recovered_count,
+            "color_stats": color_stats,
        },
        "formatting": {
            "bold_columns": [],