feat: generische Box-Erkennung fuer zonenbasierte Spaltenerkennung

- Neue Datei cv_box_detect.py: 2-Stufen-Algorithmus (Linien + Farbe) - DetectedBox/PageZone Dataclasses in cv_vocab_types.py - detect_column_geometry_zoned() in cv_layout.py - API-Endpoints erweitert: zones/boxes_detected im column_result - Overlay-Funktionen zeichnen Box-Grenzen als gestrichelte Rechtecke - Fix: numpy array or-Verknuepfung an 7 Stellen in ocr_pipeline_api.py - 12 Unit-Tests fuer Box-Erkennung und Zone-Splitting Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-09 15:06:23 +01:00
parent e60254bc75
commit 7005b18561
6 changed files with 821 additions and 14 deletions
--- a/klausur-service/backend/cv_layout.py
+++ b/klausur-service/backend/cv_layout.py
@@ -13,10 +13,12 @@ import numpy as np

 from cv_vocab_types import (
    ColumnGeometry,
+    DetectedBox,
    DocumentTypeResult,
    ENGLISH_FUNCTION_WORDS,
    GERMAN_FUNCTION_WORDS,
    PageRegion,
+    PageZone,
    RowGeometry,
 )

@@ -3034,3 +3036,133 @@ def analyze_layout_by_words(ocr_img: np.ndarray, dewarped_bgr: np.ndarray) -> Li
                f"{[(r.type, r.x, r.width, r.classification_confidence) for r in regions if r.type not in ('header','footer','margin_top','margin_bottom')]}")

    return regions
+
+
+# ---------------------------------------------------------------------------
+# Zone-aware column geometry detection
+# ---------------------------------------------------------------------------
+
+def detect_column_geometry_zoned(
+    ocr_img: np.ndarray,
+    dewarped_bgr: np.ndarray,
+) -> Optional[Tuple[
+    List[ColumnGeometry],  # flat column list (all zones)
+    int, int, int, int,    # left_x, right_x, top_y, bottom_y
+    List[Dict],            # word_dicts
+    np.ndarray,            # inv
+    List[Dict],            # zones (serializable)
+    List[DetectedBox],     # detected boxes
+]]:
+    """Zone-aware column geometry detection.
+
+    1. Finds content bounds.
+    2. Runs box detection.
+    3. If boxes found: splits page into zones, runs detect_column_geometry()
+       per content zone on the corresponding sub-image.
+    4. If no boxes: delegates entirely to detect_column_geometry() (backward compat).
+
+    Returns:
+        Extended tuple: (geometries, left_x, right_x, top_y, bottom_y,
+                         word_dicts, inv, zones_data, boxes)
+        or None if detection fails.
+    """
+    from cv_box_detect import detect_boxes, split_page_into_zones
+
+    # First run normal detection to get content bounds and word data
+    geo_result = detect_column_geometry(ocr_img, dewarped_bgr)
+    if geo_result is None:
+        return None
+
+    geometries, left_x, right_x, top_y, bottom_y, word_dicts, inv = geo_result
+    content_w = right_x - left_x
+    content_h = bottom_y - top_y
+
+    # Detect boxes in the image
+    boxes = detect_boxes(
+        dewarped_bgr, left_x, content_w, top_y, content_h,
+    )
+
+    if not boxes:
+        # No boxes — single zone, backward compatible
+        zone_data = [{
+            "index": 0,
+            "zone_type": "content",
+            "y": top_y,
+            "height": content_h,
+            "x": left_x,
+            "width": content_w,
+            "columns": [],  # filled later by caller
+        }]
+        return (geometries, left_x, right_x, top_y, bottom_y,
+                word_dicts, inv, zone_data, boxes)
+
+    # Split into zones
+    zones = split_page_into_zones(left_x, top_y, content_w, content_h, boxes)
+
+    # Run column detection per content zone
+    all_geometries: List[ColumnGeometry] = []
+    zones_data: List[Dict] = []
+
+    for zone in zones:
+        zone_dict: Dict = {
+            "index": zone.index,
+            "zone_type": zone.zone_type,
+            "y": zone.y,
+            "height": zone.height,
+            "x": zone.x,
+            "width": zone.width,
+            "columns": [],
+        }
+
+        if zone.box is not None:
+            zone_dict["box"] = {
+                "x": zone.box.x,
+                "y": zone.box.y,
+                "width": zone.box.width,
+                "height": zone.box.height,
+                "confidence": zone.box.confidence,
+                "border_thickness": zone.box.border_thickness,
+            }
+
+        if zone.zone_type == 'content' and zone.height >= 40:
+            # Extract sub-image for this zone
+            zone_y_end = zone.y + zone.height
+            sub_ocr = ocr_img[zone.y:zone_y_end, :]
+            sub_bgr = dewarped_bgr[zone.y:zone_y_end, :]
+
+            sub_result = detect_column_geometry(sub_ocr, sub_bgr)
+            if sub_result is not None:
+                sub_geoms, sub_lx, sub_rx, sub_ty, sub_by, _sub_words, _sub_inv = sub_result
+
+                # Offset column y-coordinates back to absolute page coords
+                for g in sub_geoms:
+                    g.y += zone.y
+
+                zone_cols = []
+                for g in sub_geoms:
+                    zone_cols.append({
+                        "index": g.index,
+                        "x": g.x,
+                        "y": g.y,
+                        "width": g.width,
+                        "height": g.height,
+                        "word_count": g.word_count,
+                        "width_ratio": g.width_ratio,
+                        "zone_index": zone.index,
+                    })
+                zone_dict["columns"] = zone_cols
+                all_geometries.extend(sub_geoms)
+            else:
+                logger.debug(f"ZonedColumns: zone {zone.index} column detection returned None")
+
+        zones_data.append(zone_dict)
+
+    # If per-zone detection produced no columns, fall back to the original
+    if not all_geometries:
+        all_geometries = geometries
+
+    logger.info(f"ZonedColumns: {len(boxes)} box(es), {len(zones)} zone(s), "
+                f"{len(all_geometries)} total columns")
+
+    return (all_geometries, left_x, right_x, top_y, bottom_y,
+            word_dicts, inv, zones_data, boxes)