feat: generische Box-Erkennung fuer zonenbasierte Spaltenerkennung

- Neue Datei cv_box_detect.py: 2-Stufen-Algorithmus (Linien + Farbe) - DetectedBox/PageZone Dataclasses in cv_vocab_types.py - detect_column_geometry_zoned() in cv_layout.py - API-Endpoints erweitert: zones/boxes_detected im column_result - Overlay-Funktionen zeichnen Box-Grenzen als gestrichelte Rechtecke - Fix: numpy array or-Verknuepfung an 7 Stellen in ocr_pipeline_api.py - 12 Unit-Tests fuer Box-Erkennung und Zone-Splitting Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-09 15:06:23 +01:00
parent e60254bc75
commit 7005b18561
6 changed files with 821 additions and 14 deletions
--- a/klausur-service/backend/ocr_pipeline_api.py
+++ b/klausur-service/backend/ocr_pipeline_api.py
@@ -57,6 +57,7 @@ from cv_vocab_pipeline import (
    deskew_image_iterative,
    deskew_two_pass,
    detect_column_geometry,
+    detect_column_geometry_zoned,
    detect_document_type,
    detect_row_geometry,
    expand_narrow_columns,
@@ -1001,7 +1002,7 @@ async def detect_type(session_id: str):
        await _load_session_to_cache(session_id)
    cached = _get_cached(session_id)

-    img_bgr = cached.get("cropped_bgr") or cached.get("dewarped_bgr")
+    img_bgr = cached.get("cropped_bgr") if cached.get("cropped_bgr") is not None else cached.get("dewarped_bgr")
    if img_bgr is None:
        raise HTTPException(status_code=400, detail="Crop or dewarp must be completed first")

@@ -1052,7 +1053,7 @@ async def detect_columns(session_id: str):
        await _load_session_to_cache(session_id)
    cached = _get_cached(session_id)

-    img_bgr = cached.get("cropped_bgr") or cached.get("dewarped_bgr")
+    img_bgr = cached.get("cropped_bgr") if cached.get("cropped_bgr") is not None else cached.get("dewarped_bgr")
    if img_bgr is None:
        raise HTTPException(status_code=400, detail="Crop or dewarp must be completed before column detection")

@@ -1062,21 +1063,26 @@ async def detect_columns(session_id: str):
    ocr_img = create_ocr_image(img_bgr)
    h, w = ocr_img.shape[:2]

-    # Phase A: Geometry detection (returns word_dicts + inv for reuse)
-    geo_result = detect_column_geometry(ocr_img, img_bgr)
+    # Phase A: Zone-aware geometry detection
+    zoned_result = detect_column_geometry_zoned(ocr_img, img_bgr)

-    if geo_result is None:
+    if zoned_result is None:
        # Fallback to projection-based layout
        layout_img = create_layout_image(img_bgr)
        regions = analyze_layout(layout_img, ocr_img)
+        zones_data = None
+        boxes_detected = 0
    else:
-        geometries, left_x, right_x, top_y, bottom_y, word_dicts, inv = geo_result
+        geometries, left_x, right_x, top_y, bottom_y, word_dicts, inv, zones_data, boxes = zoned_result
        content_w = right_x - left_x
+        boxes_detected = len(boxes)

        # Cache intermediates for row detection (avoids second Tesseract run)
        cached["_word_dicts"] = word_dicts
        cached["_inv"] = inv
        cached["_content_bounds"] = (left_x, right_x, top_y, bottom_y)
+        cached["_zones_data"] = zones_data
+        cached["_boxes_detected"] = boxes_detected

        # Detect header/footer early so sub-column clustering ignores them
        header_y, footer_y = _detect_header_footer_gaps(inv, w, h) if inv is not None else (None, None)
@@ -1106,8 +1112,13 @@ async def detect_columns(session_id: str):
        "columns": columns,
        "classification_methods": methods,
        "duration_seconds": round(duration, 2),
+        "boxes_detected": boxes_detected,
    }

+    # Add zone data when boxes are present
+    if zones_data and boxes_detected > 0:
+        column_result["zones"] = zones_data
+
    # Persist to DB — also invalidate downstream results (rows, words)
    await update_session_db(
        session_id,
@@ -1124,13 +1135,14 @@ async def detect_columns(session_id: str):

    col_count = len([c for c in columns if c["type"].startswith("column")])
    logger.info(f"OCR Pipeline: columns session {session_id}: "
-                f"{col_count} columns detected ({duration:.2f}s)")
+                f"{col_count} columns detected, {boxes_detected} box(es) ({duration:.2f}s)")

    img_w = img_bgr.shape[1]
    await _append_pipeline_log(session_id, "columns", {
        "total_columns": len(columns),
        "column_widths_pct": [round(c["width"] / img_w * 100, 1) for c in columns],
        "column_types": [c["type"] for c in columns],
+        "boxes_detected": boxes_detected,
    }, duration_ms=int(duration * 1000))

    return {
@@ -1266,6 +1278,27 @@ async def _get_columns_overlay(session_id: str) -> Response:
    # Blend overlay at 20% opacity
    cv2.addWeighted(overlay, 0.2, img, 0.8, 0, img)

+    # Draw detected box boundaries as dashed rectangles
+    zones = column_result.get("zones", [])
+    for zone in zones:
+        if zone.get("zone_type") == "box" and zone.get("box"):
+            box = zone["box"]
+            bx, by = box["x"], box["y"]
+            bw, bh = box["width"], box["height"]
+            box_color = (0, 200, 255)  # Yellow (BGR)
+            # Draw dashed rectangle by drawing short line segments
+            dash_len = 15
+            for edge_x in range(bx, bx + bw, dash_len * 2):
+                end_x = min(edge_x + dash_len, bx + bw)
+                cv2.line(img, (edge_x, by), (end_x, by), box_color, 2)
+                cv2.line(img, (edge_x, by + bh), (end_x, by + bh), box_color, 2)
+            for edge_y in range(by, by + bh, dash_len * 2):
+                end_y = min(edge_y + dash_len, by + bh)
+                cv2.line(img, (bx, edge_y), (bx, end_y), box_color, 2)
+                cv2.line(img, (bx + bw, edge_y), (bx + bw, end_y), box_color, 2)
+            cv2.putText(img, "BOX", (bx + 10, by + bh - 10),
+                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, box_color, 2)
+
    success, result_png = cv2.imencode(".png", img)
    if not success:
        raise HTTPException(status_code=500, detail="Failed to encode overlay image")
@@ -1284,7 +1317,7 @@ async def detect_rows(session_id: str):
        await _load_session_to_cache(session_id)
    cached = _get_cached(session_id)

-    dewarped_bgr = cached.get("cropped_bgr") or cached.get("dewarped_bgr")
+    dewarped_bgr = cached.get("cropped_bgr") if cached.get("cropped_bgr") is not None else cached.get("dewarped_bgr")
    if dewarped_bgr is None:
        raise HTTPException(status_code=400, detail="Crop or dewarp must be completed before row detection")

@@ -1315,7 +1348,7 @@ async def detect_rows(session_id: str):
    # Build serializable result (exclude words to keep payload small)
    rows_data = []
    for r in rows:
-        rows_data.append({
+        rd = {
            "index": r.index,
            "x": r.x,
            "y": r.y,
@@ -1324,7 +1357,9 @@ async def detect_rows(session_id: str):
            "word_count": r.word_count,
            "row_type": r.row_type,
            "gap_before": r.gap_before,
-        })
+            "zone_index": 0,
+        }
+        rows_data.append(rd)

    type_counts = {}
    for r in rows:
@@ -1456,7 +1491,7 @@ async def detect_words(
        await _load_session_to_cache(session_id)
    cached = _get_cached(session_id)

-    dewarped_bgr = cached.get("cropped_bgr") or cached.get("dewarped_bgr")
+    dewarped_bgr = cached.get("cropped_bgr") if cached.get("cropped_bgr") is not None else cached.get("dewarped_bgr")
    if dewarped_bgr is None:
        logger.warning("detect_words: no cropped/dewarped image for session %s (cache keys: %s)",
                       session_id, [k for k in cached.keys() if k.endswith('_bgr')])
@@ -1560,6 +1595,10 @@ async def detect_words(
    )
    duration = time.time() - t0

+    # Add zone_index to each cell (default 0 for backward compatibility)
+    for cell in cells:
+        cell.setdefault("zone_index", 0)
+
    # Layout detection
    col_types = {c['type'] for c in columns_meta}
    is_vocab = bool(col_types & {'column_en', 'column_de'})
@@ -2749,6 +2788,22 @@ async def _get_rows_overlay(session_id: str) -> Response:
    # Blend overlay at 15% opacity
    cv2.addWeighted(overlay, 0.15, img, 0.85, 0, img)

+    # Draw zone separator lines if zones exist
+    column_result = session.get("column_result") or {}
+    zones = column_result.get("zones", [])
+    if zones:
+        img_w_px = img.shape[1]
+        zone_color = (0, 200, 255)  # Yellow (BGR)
+        dash_len = 20
+        for zone in zones:
+            if zone.get("zone_type") == "box":
+                zy = zone["y"]
+                zh = zone["height"]
+                for line_y in [zy, zy + zh]:
+                    for sx in range(0, img_w_px, dash_len * 2):
+                        ex = min(sx + dash_len, img_w_px)
+                        cv2.line(img, (sx, line_y), (ex, line_y), zone_color, 2)
+
    success, result_png = cv2.imencode(".png", img)
    if not success:
        raise HTTPException(status_code=500, detail="Failed to encode overlay image")
@@ -3182,7 +3237,7 @@ async def run_auto(session_id: str, req: RunAutoRequest, request: Request):
            yield await _auto_sse_event("columns", "start", {})
            try:
                t0 = time.time()
-                col_img = cached.get("cropped_bgr") or cached.get("dewarped_bgr")
+                col_img = cached.get("cropped_bgr") if cached.get("cropped_bgr") is not None else cached.get("dewarped_bgr")
                if col_img is None:
                    raise ValueError("Cropped/dewarped image not available")

@@ -3243,7 +3298,7 @@ async def run_auto(session_id: str, req: RunAutoRequest, request: Request):
            yield await _auto_sse_event("rows", "start", {})
            try:
                t0 = time.time()
-                row_img = cached.get("cropped_bgr") or cached.get("dewarped_bgr")
+                row_img = cached.get("cropped_bgr") if cached.get("cropped_bgr") is not None else cached.get("dewarped_bgr")
                session = await get_session_db(session_id)
                column_result = session.get("column_result") or cached.get("column_result")
                if not column_result or not column_result.get("columns"):
@@ -3321,7 +3376,7 @@ async def run_auto(session_id: str, req: RunAutoRequest, request: Request):
            yield await _auto_sse_event("words", "start", {"engine": req.ocr_engine})
            try:
                t0 = time.time()
-                word_img = cached.get("cropped_bgr") or cached.get("dewarped_bgr")
+                word_img = cached.get("cropped_bgr") if cached.get("cropped_bgr") is not None else cached.get("dewarped_bgr")
                session = await get_session_db(session_id)

                column_result = session.get("column_result") or cached.get("column_result")