feat: generische Box-Erkennung fuer zonenbasierte Spaltenerkennung
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 29s
CI / test-go-edu-search (push) Successful in 30s
CI / test-python-klausur (push) Failing after 1m59s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 19s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 29s
CI / test-go-edu-search (push) Successful in 30s
CI / test-python-klausur (push) Failing after 1m59s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 19s
- Neue Datei cv_box_detect.py: 2-Stufen-Algorithmus (Linien + Farbe) - DetectedBox/PageZone Dataclasses in cv_vocab_types.py - detect_column_geometry_zoned() in cv_layout.py - API-Endpoints erweitert: zones/boxes_detected im column_result - Overlay-Funktionen zeichnen Box-Grenzen als gestrichelte Rechtecke - Fix: numpy array or-Verknuepfung an 7 Stellen in ocr_pipeline_api.py - 12 Unit-Tests fuer Box-Erkennung und Zone-Splitting Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -13,10 +13,12 @@ import numpy as np
|
||||
|
||||
from cv_vocab_types import (
|
||||
ColumnGeometry,
|
||||
DetectedBox,
|
||||
DocumentTypeResult,
|
||||
ENGLISH_FUNCTION_WORDS,
|
||||
GERMAN_FUNCTION_WORDS,
|
||||
PageRegion,
|
||||
PageZone,
|
||||
RowGeometry,
|
||||
)
|
||||
|
||||
@@ -3034,3 +3036,133 @@ def analyze_layout_by_words(ocr_img: np.ndarray, dewarped_bgr: np.ndarray) -> Li
|
||||
f"{[(r.type, r.x, r.width, r.classification_confidence) for r in regions if r.type not in ('header','footer','margin_top','margin_bottom')]}")
|
||||
|
||||
return regions
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Zone-aware column geometry detection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def detect_column_geometry_zoned(
|
||||
ocr_img: np.ndarray,
|
||||
dewarped_bgr: np.ndarray,
|
||||
) -> Optional[Tuple[
|
||||
List[ColumnGeometry], # flat column list (all zones)
|
||||
int, int, int, int, # left_x, right_x, top_y, bottom_y
|
||||
List[Dict], # word_dicts
|
||||
np.ndarray, # inv
|
||||
List[Dict], # zones (serializable)
|
||||
List[DetectedBox], # detected boxes
|
||||
]]:
|
||||
"""Zone-aware column geometry detection.
|
||||
|
||||
1. Finds content bounds.
|
||||
2. Runs box detection.
|
||||
3. If boxes found: splits page into zones, runs detect_column_geometry()
|
||||
per content zone on the corresponding sub-image.
|
||||
4. If no boxes: delegates entirely to detect_column_geometry() (backward compat).
|
||||
|
||||
Returns:
|
||||
Extended tuple: (geometries, left_x, right_x, top_y, bottom_y,
|
||||
word_dicts, inv, zones_data, boxes)
|
||||
or None if detection fails.
|
||||
"""
|
||||
from cv_box_detect import detect_boxes, split_page_into_zones
|
||||
|
||||
# First run normal detection to get content bounds and word data
|
||||
geo_result = detect_column_geometry(ocr_img, dewarped_bgr)
|
||||
if geo_result is None:
|
||||
return None
|
||||
|
||||
geometries, left_x, right_x, top_y, bottom_y, word_dicts, inv = geo_result
|
||||
content_w = right_x - left_x
|
||||
content_h = bottom_y - top_y
|
||||
|
||||
# Detect boxes in the image
|
||||
boxes = detect_boxes(
|
||||
dewarped_bgr, left_x, content_w, top_y, content_h,
|
||||
)
|
||||
|
||||
if not boxes:
|
||||
# No boxes — single zone, backward compatible
|
||||
zone_data = [{
|
||||
"index": 0,
|
||||
"zone_type": "content",
|
||||
"y": top_y,
|
||||
"height": content_h,
|
||||
"x": left_x,
|
||||
"width": content_w,
|
||||
"columns": [], # filled later by caller
|
||||
}]
|
||||
return (geometries, left_x, right_x, top_y, bottom_y,
|
||||
word_dicts, inv, zone_data, boxes)
|
||||
|
||||
# Split into zones
|
||||
zones = split_page_into_zones(left_x, top_y, content_w, content_h, boxes)
|
||||
|
||||
# Run column detection per content zone
|
||||
all_geometries: List[ColumnGeometry] = []
|
||||
zones_data: List[Dict] = []
|
||||
|
||||
for zone in zones:
|
||||
zone_dict: Dict = {
|
||||
"index": zone.index,
|
||||
"zone_type": zone.zone_type,
|
||||
"y": zone.y,
|
||||
"height": zone.height,
|
||||
"x": zone.x,
|
||||
"width": zone.width,
|
||||
"columns": [],
|
||||
}
|
||||
|
||||
if zone.box is not None:
|
||||
zone_dict["box"] = {
|
||||
"x": zone.box.x,
|
||||
"y": zone.box.y,
|
||||
"width": zone.box.width,
|
||||
"height": zone.box.height,
|
||||
"confidence": zone.box.confidence,
|
||||
"border_thickness": zone.box.border_thickness,
|
||||
}
|
||||
|
||||
if zone.zone_type == 'content' and zone.height >= 40:
|
||||
# Extract sub-image for this zone
|
||||
zone_y_end = zone.y + zone.height
|
||||
sub_ocr = ocr_img[zone.y:zone_y_end, :]
|
||||
sub_bgr = dewarped_bgr[zone.y:zone_y_end, :]
|
||||
|
||||
sub_result = detect_column_geometry(sub_ocr, sub_bgr)
|
||||
if sub_result is not None:
|
||||
sub_geoms, sub_lx, sub_rx, sub_ty, sub_by, _sub_words, _sub_inv = sub_result
|
||||
|
||||
# Offset column y-coordinates back to absolute page coords
|
||||
for g in sub_geoms:
|
||||
g.y += zone.y
|
||||
|
||||
zone_cols = []
|
||||
for g in sub_geoms:
|
||||
zone_cols.append({
|
||||
"index": g.index,
|
||||
"x": g.x,
|
||||
"y": g.y,
|
||||
"width": g.width,
|
||||
"height": g.height,
|
||||
"word_count": g.word_count,
|
||||
"width_ratio": g.width_ratio,
|
||||
"zone_index": zone.index,
|
||||
})
|
||||
zone_dict["columns"] = zone_cols
|
||||
all_geometries.extend(sub_geoms)
|
||||
else:
|
||||
logger.debug(f"ZonedColumns: zone {zone.index} column detection returned None")
|
||||
|
||||
zones_data.append(zone_dict)
|
||||
|
||||
# If per-zone detection produced no columns, fall back to the original
|
||||
if not all_geometries:
|
||||
all_geometries = geometries
|
||||
|
||||
logger.info(f"ZonedColumns: {len(boxes)} box(es), {len(zones)} zone(s), "
|
||||
f"{len(all_geometries)} total columns")
|
||||
|
||||
return (all_geometries, left_x, right_x, top_y, bottom_y,
|
||||
word_dicts, inv, zones_data, boxes)
|
||||
|
||||
Reference in New Issue
Block a user