feat: generische Box-Erkennung fuer zonenbasierte Spaltenerkennung
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 29s
CI / test-go-edu-search (push) Successful in 30s
CI / test-python-klausur (push) Failing after 1m59s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 19s

- Neue Datei cv_box_detect.py: 2-Stufen-Algorithmus (Linien + Farbe)
- DetectedBox/PageZone Dataclasses in cv_vocab_types.py
- detect_column_geometry_zoned() in cv_layout.py
- API-Endpoints erweitert: zones/boxes_detected im column_result
- Overlay-Funktionen zeichnen Box-Grenzen als gestrichelte Rechtecke
- Fix: numpy array or-Verknuepfung an 7 Stellen in ocr_pipeline_api.py
- 12 Unit-Tests fuer Box-Erkennung und Zone-Splitting

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-09 15:06:23 +01:00
parent e60254bc75
commit 7005b18561
6 changed files with 821 additions and 14 deletions

View File

@@ -13,10 +13,12 @@ import numpy as np
from cv_vocab_types import (
ColumnGeometry,
DetectedBox,
DocumentTypeResult,
ENGLISH_FUNCTION_WORDS,
GERMAN_FUNCTION_WORDS,
PageRegion,
PageZone,
RowGeometry,
)
@@ -3034,3 +3036,133 @@ def analyze_layout_by_words(ocr_img: np.ndarray, dewarped_bgr: np.ndarray) -> Li
f"{[(r.type, r.x, r.width, r.classification_confidence) for r in regions if r.type not in ('header','footer','margin_top','margin_bottom')]}")
return regions
# ---------------------------------------------------------------------------
# Zone-aware column geometry detection
# ---------------------------------------------------------------------------
def detect_column_geometry_zoned(
ocr_img: np.ndarray,
dewarped_bgr: np.ndarray,
) -> Optional[Tuple[
List[ColumnGeometry], # flat column list (all zones)
int, int, int, int, # left_x, right_x, top_y, bottom_y
List[Dict], # word_dicts
np.ndarray, # inv
List[Dict], # zones (serializable)
List[DetectedBox], # detected boxes
]]:
"""Zone-aware column geometry detection.
1. Finds content bounds.
2. Runs box detection.
3. If boxes found: splits page into zones, runs detect_column_geometry()
per content zone on the corresponding sub-image.
4. If no boxes: delegates entirely to detect_column_geometry() (backward compat).
Returns:
Extended tuple: (geometries, left_x, right_x, top_y, bottom_y,
word_dicts, inv, zones_data, boxes)
or None if detection fails.
"""
from cv_box_detect import detect_boxes, split_page_into_zones
# First run normal detection to get content bounds and word data
geo_result = detect_column_geometry(ocr_img, dewarped_bgr)
if geo_result is None:
return None
geometries, left_x, right_x, top_y, bottom_y, word_dicts, inv = geo_result
content_w = right_x - left_x
content_h = bottom_y - top_y
# Detect boxes in the image
boxes = detect_boxes(
dewarped_bgr, left_x, content_w, top_y, content_h,
)
if not boxes:
# No boxes — single zone, backward compatible
zone_data = [{
"index": 0,
"zone_type": "content",
"y": top_y,
"height": content_h,
"x": left_x,
"width": content_w,
"columns": [], # filled later by caller
}]
return (geometries, left_x, right_x, top_y, bottom_y,
word_dicts, inv, zone_data, boxes)
# Split into zones
zones = split_page_into_zones(left_x, top_y, content_w, content_h, boxes)
# Run column detection per content zone
all_geometries: List[ColumnGeometry] = []
zones_data: List[Dict] = []
for zone in zones:
zone_dict: Dict = {
"index": zone.index,
"zone_type": zone.zone_type,
"y": zone.y,
"height": zone.height,
"x": zone.x,
"width": zone.width,
"columns": [],
}
if zone.box is not None:
zone_dict["box"] = {
"x": zone.box.x,
"y": zone.box.y,
"width": zone.box.width,
"height": zone.box.height,
"confidence": zone.box.confidence,
"border_thickness": zone.box.border_thickness,
}
if zone.zone_type == 'content' and zone.height >= 40:
# Extract sub-image for this zone
zone_y_end = zone.y + zone.height
sub_ocr = ocr_img[zone.y:zone_y_end, :]
sub_bgr = dewarped_bgr[zone.y:zone_y_end, :]
sub_result = detect_column_geometry(sub_ocr, sub_bgr)
if sub_result is not None:
sub_geoms, sub_lx, sub_rx, sub_ty, sub_by, _sub_words, _sub_inv = sub_result
# Offset column y-coordinates back to absolute page coords
for g in sub_geoms:
g.y += zone.y
zone_cols = []
for g in sub_geoms:
zone_cols.append({
"index": g.index,
"x": g.x,
"y": g.y,
"width": g.width,
"height": g.height,
"word_count": g.word_count,
"width_ratio": g.width_ratio,
"zone_index": zone.index,
})
zone_dict["columns"] = zone_cols
all_geometries.extend(sub_geoms)
else:
logger.debug(f"ZonedColumns: zone {zone.index} column detection returned None")
zones_data.append(zone_dict)
# If per-zone detection produced no columns, fall back to the original
if not all_geometries:
all_geometries = geometries
logger.info(f"ZonedColumns: {len(boxes)} box(es), {len(zones)} zone(s), "
f"{len(all_geometries)} total columns")
return (all_geometries, left_x, right_x, top_y, bottom_y,
word_dicts, inv, zones_data, boxes)