From c4f2e6554e5a1d36d1c169f7df85dffac60a2a3f Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Sun, 1 Mar 2026 12:52:41 +0100 Subject: [PATCH] fix(ocr-pipeline): prevent grid from producing more rows than gap-based MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two fixes: 1. Grid validation: reject word-center grid if it produces MORE rows than gap-based detection (more rows = lines were split = worse). Falls back to gap-based rows in that case. 2. Words overlay: draw clean grid cells (column × row intersections) instead of padded entry bboxes. Eliminates confusing double lines. OCR text labels are placed inside the grid cells directly. Co-Authored-By: Claude Opus 4.6 --- klausur-service/backend/cv_vocab_pipeline.py | 7 ++ klausur-service/backend/ocr_pipeline_api.py | 93 +++++++++++--------- 2 files changed, 57 insertions(+), 43 deletions(-) diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py index b2cc866..5479446 100644 --- a/klausur-service/backend/cv_vocab_pipeline.py +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -1829,6 +1829,13 @@ def _regularize_row_grid( # Remove empty grid rows (no words assigned) grid_rows = [gr for gr in grid_rows if gr.word_count > 0] + # The grid must not produce MORE rows than gap-based detection. + # More rows means the clustering split actual lines — that's worse. + if len(grid_rows) > len(content_rows): + logger.info(f"RowGrid: grid produced {len(grid_rows)} rows > " + f"{len(content_rows)} gap-based → keeping gap-based rows") + return rows + # --- Step H: Merge header/footer + re-index --- result = list(non_content) + grid_rows result.sort(key=lambda r: r.y) diff --git a/klausur-service/backend/ocr_pipeline_api.py b/klausur-service/backend/ocr_pipeline_api.py index e0bcf5c..323ab5a 100644 --- a/klausur-service/backend/ocr_pipeline_api.py +++ b/klausur-service/backend/ocr_pipeline_api.py @@ -1256,8 +1256,8 @@ async def _get_words_overlay(session_id: str) -> Response: img_h, img_w = img.shape[:2] - # Color map for cell types (BGR) - cell_colors = { + # Color map for column types (BGR) + col_colors = { "column_en": (255, 180, 0), # Blue "column_de": (0, 200, 0), # Green "column_example": (0, 140, 255), # Orange @@ -1265,28 +1265,43 @@ async def _get_words_overlay(session_id: str) -> Response: overlay = img.copy() - # Draw column divider lines (vertical) + # Build grid from column_result × row_result (the actual cells) + columns = [] if column_result and column_result.get("columns"): - for col in column_result["columns"]: - col_type = col.get("type", "") - if col_type in cell_colors: - cx = col["x"] - cv2.line(img, (cx, 0), (cx, img_h), cell_colors[col_type], 1) - cx_end = col["x"] + col["width"] - cv2.line(img, (cx_end, 0), (cx_end, img_h), cell_colors[col_type], 1) + columns = [c for c in column_result["columns"] + if c.get("type", "").startswith("column_")] - # Draw row divider lines (horizontal) for content rows + content_rows_data = [] if row_result and row_result.get("rows"): - for row in row_result["rows"]: - if row.get("row_type") == "content": - ry = row["y"] - cv2.line(img, (0, ry), (img_w, ry), (180, 180, 180), 1) + content_rows_data = [r for r in row_result["rows"] + if r.get("row_type") == "content"] - # Draw entry cells with text labels + # Draw grid: column × row cells + for col in columns: + col_type = col.get("type", "") + color = col_colors.get(col_type, (200, 200, 200)) + cx, cw = col["x"], col["width"] + + for row in content_rows_data: + ry, rh = row["y"], row["height"] + # Cell rectangle (exact grid intersection, no padding) + cv2.rectangle(img, (cx, ry), (cx + cw, ry + rh), color, 1) + # Semi-transparent fill + cv2.rectangle(overlay, (cx, ry), (cx + cw, ry + rh), color, -1) + + # Place OCR text labels inside grid cells + # Build lookup: row_index → entry for fast access entries = word_result["entries"] + entry_by_row: Dict[int, Dict] = {} for entry in entries: + entry_by_row[entry.get("row_index", -1)] = entry + + for row_idx, row in enumerate(content_rows_data): + entry = entry_by_row.get(row_idx) + if not entry: + continue + conf = entry.get("confidence", 0) - # Color by confidence: green > 70, yellow 50-70, red < 50 if conf >= 70: text_color = (0, 180, 0) elif conf >= 50: @@ -1294,35 +1309,27 @@ async def _get_words_overlay(session_id: str) -> Response: else: text_color = (0, 0, 220) - for bbox_key, field_key, col_type in [ - ("bbox_en", "english", "column_en"), - ("bbox_de", "german", "column_de"), - ("bbox_ex", "example", "column_example"), - ]: - bbox = entry.get(bbox_key) - text = entry.get(field_key, "") - if not bbox or not text: - continue + ry, rh = row["y"], row["height"] - # Convert percent to pixels - bx = int(bbox["x"] / 100 * img_w) - by = int(bbox["y"] / 100 * img_h) - bw = int(bbox["w"] / 100 * img_w) - bh = int(bbox["h"] / 100 * img_h) + for col in columns: + col_type = col.get("type", "") + cx, cw = col["x"], col["width"] - color = cell_colors.get(col_type, (200, 200, 200)) + # Pick the right text field for this column + if col_type == "column_en": + text = entry.get("english", "") + elif col_type == "column_de": + text = entry.get("german", "") + elif col_type == "column_example": + text = entry.get("example", "") + else: + text = "" - # Semi-transparent fill - cv2.rectangle(overlay, (bx, by), (bx + bw, by + bh), color, -1) - - # Border - cv2.rectangle(img, (bx, by), (bx + bw, by + bh), text_color, 1) - - # Text label (truncate if too long) - label = text[:30] if len(text) > 30 else text - font_scale = 0.35 - cv2.putText(img, label, (bx + 3, by + bh - 4), - cv2.FONT_HERSHEY_SIMPLEX, font_scale, text_color, 1) + if text: + label = text.replace('\n', ' ')[:30] + font_scale = 0.35 + cv2.putText(img, label, (cx + 3, ry + rh - 4), + cv2.FONT_HERSHEY_SIMPLEX, font_scale, text_color, 1) # Blend overlay at 10% opacity cv2.addWeighted(overlay, 0.1, img, 0.9, 0, img)