From c4f2e6554e5a1d36d1c169f7df85dffac60a2a3f Mon Sep 17 00:00:00 2001
From: Benjamin Admin <benjaminadmin@MacBookPro.fritz.box>
Date: Sun, 1 Mar 2026 12:52:41 +0100
Subject: [PATCH] fix(ocr-pipeline): prevent grid from producing more rows than
 gap-based
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two fixes:
1. Grid validation: reject word-center grid if it produces MORE rows
   than gap-based detection (more rows = lines were split = worse).
   Falls back to gap-based rows in that case.

2. Words overlay: draw clean grid cells (column × row intersections)
   instead of padded entry bboxes. Eliminates confusing double lines.
   OCR text labels are placed inside the grid cells directly.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 klausur-service/backend/cv_vocab_pipeline.py |  7 ++
 klausur-service/backend/ocr_pipeline_api.py  | 93 +++++++++++---------
 2 files changed, 57 insertions(+), 43 deletions(-)

diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py
index b2cc866..5479446 100644
--- a/klausur-service/backend/cv_vocab_pipeline.py
+++ b/klausur-service/backend/cv_vocab_pipeline.py
@@ -1829,6 +1829,13 @@ def _regularize_row_grid(
     # Remove empty grid rows (no words assigned)
     grid_rows = [gr for gr in grid_rows if gr.word_count > 0]
 
+    # The grid must not produce MORE rows than gap-based detection.
+    # More rows means the clustering split actual lines — that's worse.
+    if len(grid_rows) > len(content_rows):
+        logger.info(f"RowGrid: grid produced {len(grid_rows)} rows > "
+                    f"{len(content_rows)} gap-based → keeping gap-based rows")
+        return rows
+
     # --- Step H: Merge header/footer + re-index ---
     result = list(non_content) + grid_rows
     result.sort(key=lambda r: r.y)
diff --git a/klausur-service/backend/ocr_pipeline_api.py b/klausur-service/backend/ocr_pipeline_api.py
index e0bcf5c..323ab5a 100644
--- a/klausur-service/backend/ocr_pipeline_api.py
+++ b/klausur-service/backend/ocr_pipeline_api.py
@@ -1256,8 +1256,8 @@ async def _get_words_overlay(session_id: str) -> Response:
 
     img_h, img_w = img.shape[:2]
 
-    # Color map for cell types (BGR)
-    cell_colors = {
+    # Color map for column types (BGR)
+    col_colors = {
         "column_en": (255, 180, 0),      # Blue
         "column_de": (0, 200, 0),         # Green
         "column_example": (0, 140, 255),  # Orange
@@ -1265,28 +1265,43 @@ async def _get_words_overlay(session_id: str) -> Response:
 
     overlay = img.copy()
 
-    # Draw column divider lines (vertical)
+    # Build grid from column_result × row_result (the actual cells)
+    columns = []
     if column_result and column_result.get("columns"):
-        for col in column_result["columns"]:
-            col_type = col.get("type", "")
-            if col_type in cell_colors:
-                cx = col["x"]
-                cv2.line(img, (cx, 0), (cx, img_h), cell_colors[col_type], 1)
-                cx_end = col["x"] + col["width"]
-                cv2.line(img, (cx_end, 0), (cx_end, img_h), cell_colors[col_type], 1)
+        columns = [c for c in column_result["columns"]
+                   if c.get("type", "").startswith("column_")]
 
-    # Draw row divider lines (horizontal) for content rows
+    content_rows_data = []
     if row_result and row_result.get("rows"):
-        for row in row_result["rows"]:
-            if row.get("row_type") == "content":
-                ry = row["y"]
-                cv2.line(img, (0, ry), (img_w, ry), (180, 180, 180), 1)
+        content_rows_data = [r for r in row_result["rows"]
+                             if r.get("row_type") == "content"]
 
-    # Draw entry cells with text labels
+    # Draw grid: column × row cells
+    for col in columns:
+        col_type = col.get("type", "")
+        color = col_colors.get(col_type, (200, 200, 200))
+        cx, cw = col["x"], col["width"]
+
+        for row in content_rows_data:
+            ry, rh = row["y"], row["height"]
+            # Cell rectangle (exact grid intersection, no padding)
+            cv2.rectangle(img, (cx, ry), (cx + cw, ry + rh), color, 1)
+            # Semi-transparent fill
+            cv2.rectangle(overlay, (cx, ry), (cx + cw, ry + rh), color, -1)
+
+    # Place OCR text labels inside grid cells
+    # Build lookup: row_index → entry for fast access
     entries = word_result["entries"]
+    entry_by_row: Dict[int, Dict] = {}
     for entry in entries:
+        entry_by_row[entry.get("row_index", -1)] = entry
+
+    for row_idx, row in enumerate(content_rows_data):
+        entry = entry_by_row.get(row_idx)
+        if not entry:
+            continue
+
         conf = entry.get("confidence", 0)
-        # Color by confidence: green > 70, yellow 50-70, red < 50
         if conf >= 70:
             text_color = (0, 180, 0)
         elif conf >= 50:
@@ -1294,35 +1309,27 @@ async def _get_words_overlay(session_id: str) -> Response:
         else:
             text_color = (0, 0, 220)
 
-        for bbox_key, field_key, col_type in [
-            ("bbox_en", "english", "column_en"),
-            ("bbox_de", "german", "column_de"),
-            ("bbox_ex", "example", "column_example"),
-        ]:
-            bbox = entry.get(bbox_key)
-            text = entry.get(field_key, "")
-            if not bbox or not text:
-                continue
+        ry, rh = row["y"], row["height"]
 
-            # Convert percent to pixels
-            bx = int(bbox["x"] / 100 * img_w)
-            by = int(bbox["y"] / 100 * img_h)
-            bw = int(bbox["w"] / 100 * img_w)
-            bh = int(bbox["h"] / 100 * img_h)
+        for col in columns:
+            col_type = col.get("type", "")
+            cx, cw = col["x"], col["width"]
 
-            color = cell_colors.get(col_type, (200, 200, 200))
+            # Pick the right text field for this column
+            if col_type == "column_en":
+                text = entry.get("english", "")
+            elif col_type == "column_de":
+                text = entry.get("german", "")
+            elif col_type == "column_example":
+                text = entry.get("example", "")
+            else:
+                text = ""
 
-            # Semi-transparent fill
-            cv2.rectangle(overlay, (bx, by), (bx + bw, by + bh), color, -1)
-
-            # Border
-            cv2.rectangle(img, (bx, by), (bx + bw, by + bh), text_color, 1)
-
-            # Text label (truncate if too long)
-            label = text[:30] if len(text) > 30 else text
-            font_scale = 0.35
-            cv2.putText(img, label, (bx + 3, by + bh - 4),
-                        cv2.FONT_HERSHEY_SIMPLEX, font_scale, text_color, 1)
+            if text:
+                label = text.replace('\n', ' ')[:30]
+                font_scale = 0.35
+                cv2.putText(img, label, (cx + 3, ry + rh - 4),
+                            cv2.FONT_HERSHEY_SIMPLEX, font_scale, text_color, 1)
 
     # Blend overlay at 10% opacity
     cv2.addWeighted(overlay, 0.1, img, 0.9, 0, img)