feat: generische Box-Erkennung fuer zonenbasierte Spaltenerkennung
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 29s
CI / test-go-edu-search (push) Successful in 30s
CI / test-python-klausur (push) Failing after 1m59s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 19s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 29s
CI / test-go-edu-search (push) Successful in 30s
CI / test-python-klausur (push) Failing after 1m59s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 19s
- Neue Datei cv_box_detect.py: 2-Stufen-Algorithmus (Linien + Farbe) - DetectedBox/PageZone Dataclasses in cv_vocab_types.py - detect_column_geometry_zoned() in cv_layout.py - API-Endpoints erweitert: zones/boxes_detected im column_result - Overlay-Funktionen zeichnen Box-Grenzen als gestrichelte Rechtecke - Fix: numpy array or-Verknuepfung an 7 Stellen in ocr_pipeline_api.py - 12 Unit-Tests fuer Box-Erkennung und Zone-Splitting Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -57,6 +57,7 @@ from cv_vocab_pipeline import (
|
||||
deskew_image_iterative,
|
||||
deskew_two_pass,
|
||||
detect_column_geometry,
|
||||
detect_column_geometry_zoned,
|
||||
detect_document_type,
|
||||
detect_row_geometry,
|
||||
expand_narrow_columns,
|
||||
@@ -1001,7 +1002,7 @@ async def detect_type(session_id: str):
|
||||
await _load_session_to_cache(session_id)
|
||||
cached = _get_cached(session_id)
|
||||
|
||||
img_bgr = cached.get("cropped_bgr") or cached.get("dewarped_bgr")
|
||||
img_bgr = cached.get("cropped_bgr") if cached.get("cropped_bgr") is not None else cached.get("dewarped_bgr")
|
||||
if img_bgr is None:
|
||||
raise HTTPException(status_code=400, detail="Crop or dewarp must be completed first")
|
||||
|
||||
@@ -1052,7 +1053,7 @@ async def detect_columns(session_id: str):
|
||||
await _load_session_to_cache(session_id)
|
||||
cached = _get_cached(session_id)
|
||||
|
||||
img_bgr = cached.get("cropped_bgr") or cached.get("dewarped_bgr")
|
||||
img_bgr = cached.get("cropped_bgr") if cached.get("cropped_bgr") is not None else cached.get("dewarped_bgr")
|
||||
if img_bgr is None:
|
||||
raise HTTPException(status_code=400, detail="Crop or dewarp must be completed before column detection")
|
||||
|
||||
@@ -1062,21 +1063,26 @@ async def detect_columns(session_id: str):
|
||||
ocr_img = create_ocr_image(img_bgr)
|
||||
h, w = ocr_img.shape[:2]
|
||||
|
||||
# Phase A: Geometry detection (returns word_dicts + inv for reuse)
|
||||
geo_result = detect_column_geometry(ocr_img, img_bgr)
|
||||
# Phase A: Zone-aware geometry detection
|
||||
zoned_result = detect_column_geometry_zoned(ocr_img, img_bgr)
|
||||
|
||||
if geo_result is None:
|
||||
if zoned_result is None:
|
||||
# Fallback to projection-based layout
|
||||
layout_img = create_layout_image(img_bgr)
|
||||
regions = analyze_layout(layout_img, ocr_img)
|
||||
zones_data = None
|
||||
boxes_detected = 0
|
||||
else:
|
||||
geometries, left_x, right_x, top_y, bottom_y, word_dicts, inv = geo_result
|
||||
geometries, left_x, right_x, top_y, bottom_y, word_dicts, inv, zones_data, boxes = zoned_result
|
||||
content_w = right_x - left_x
|
||||
boxes_detected = len(boxes)
|
||||
|
||||
# Cache intermediates for row detection (avoids second Tesseract run)
|
||||
cached["_word_dicts"] = word_dicts
|
||||
cached["_inv"] = inv
|
||||
cached["_content_bounds"] = (left_x, right_x, top_y, bottom_y)
|
||||
cached["_zones_data"] = zones_data
|
||||
cached["_boxes_detected"] = boxes_detected
|
||||
|
||||
# Detect header/footer early so sub-column clustering ignores them
|
||||
header_y, footer_y = _detect_header_footer_gaps(inv, w, h) if inv is not None else (None, None)
|
||||
@@ -1106,8 +1112,13 @@ async def detect_columns(session_id: str):
|
||||
"columns": columns,
|
||||
"classification_methods": methods,
|
||||
"duration_seconds": round(duration, 2),
|
||||
"boxes_detected": boxes_detected,
|
||||
}
|
||||
|
||||
# Add zone data when boxes are present
|
||||
if zones_data and boxes_detected > 0:
|
||||
column_result["zones"] = zones_data
|
||||
|
||||
# Persist to DB — also invalidate downstream results (rows, words)
|
||||
await update_session_db(
|
||||
session_id,
|
||||
@@ -1124,13 +1135,14 @@ async def detect_columns(session_id: str):
|
||||
|
||||
col_count = len([c for c in columns if c["type"].startswith("column")])
|
||||
logger.info(f"OCR Pipeline: columns session {session_id}: "
|
||||
f"{col_count} columns detected ({duration:.2f}s)")
|
||||
f"{col_count} columns detected, {boxes_detected} box(es) ({duration:.2f}s)")
|
||||
|
||||
img_w = img_bgr.shape[1]
|
||||
await _append_pipeline_log(session_id, "columns", {
|
||||
"total_columns": len(columns),
|
||||
"column_widths_pct": [round(c["width"] / img_w * 100, 1) for c in columns],
|
||||
"column_types": [c["type"] for c in columns],
|
||||
"boxes_detected": boxes_detected,
|
||||
}, duration_ms=int(duration * 1000))
|
||||
|
||||
return {
|
||||
@@ -1266,6 +1278,27 @@ async def _get_columns_overlay(session_id: str) -> Response:
|
||||
# Blend overlay at 20% opacity
|
||||
cv2.addWeighted(overlay, 0.2, img, 0.8, 0, img)
|
||||
|
||||
# Draw detected box boundaries as dashed rectangles
|
||||
zones = column_result.get("zones", [])
|
||||
for zone in zones:
|
||||
if zone.get("zone_type") == "box" and zone.get("box"):
|
||||
box = zone["box"]
|
||||
bx, by = box["x"], box["y"]
|
||||
bw, bh = box["width"], box["height"]
|
||||
box_color = (0, 200, 255) # Yellow (BGR)
|
||||
# Draw dashed rectangle by drawing short line segments
|
||||
dash_len = 15
|
||||
for edge_x in range(bx, bx + bw, dash_len * 2):
|
||||
end_x = min(edge_x + dash_len, bx + bw)
|
||||
cv2.line(img, (edge_x, by), (end_x, by), box_color, 2)
|
||||
cv2.line(img, (edge_x, by + bh), (end_x, by + bh), box_color, 2)
|
||||
for edge_y in range(by, by + bh, dash_len * 2):
|
||||
end_y = min(edge_y + dash_len, by + bh)
|
||||
cv2.line(img, (bx, edge_y), (bx, end_y), box_color, 2)
|
||||
cv2.line(img, (bx + bw, edge_y), (bx + bw, end_y), box_color, 2)
|
||||
cv2.putText(img, "BOX", (bx + 10, by + bh - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.7, box_color, 2)
|
||||
|
||||
success, result_png = cv2.imencode(".png", img)
|
||||
if not success:
|
||||
raise HTTPException(status_code=500, detail="Failed to encode overlay image")
|
||||
@@ -1284,7 +1317,7 @@ async def detect_rows(session_id: str):
|
||||
await _load_session_to_cache(session_id)
|
||||
cached = _get_cached(session_id)
|
||||
|
||||
dewarped_bgr = cached.get("cropped_bgr") or cached.get("dewarped_bgr")
|
||||
dewarped_bgr = cached.get("cropped_bgr") if cached.get("cropped_bgr") is not None else cached.get("dewarped_bgr")
|
||||
if dewarped_bgr is None:
|
||||
raise HTTPException(status_code=400, detail="Crop or dewarp must be completed before row detection")
|
||||
|
||||
@@ -1315,7 +1348,7 @@ async def detect_rows(session_id: str):
|
||||
# Build serializable result (exclude words to keep payload small)
|
||||
rows_data = []
|
||||
for r in rows:
|
||||
rows_data.append({
|
||||
rd = {
|
||||
"index": r.index,
|
||||
"x": r.x,
|
||||
"y": r.y,
|
||||
@@ -1324,7 +1357,9 @@ async def detect_rows(session_id: str):
|
||||
"word_count": r.word_count,
|
||||
"row_type": r.row_type,
|
||||
"gap_before": r.gap_before,
|
||||
})
|
||||
"zone_index": 0,
|
||||
}
|
||||
rows_data.append(rd)
|
||||
|
||||
type_counts = {}
|
||||
for r in rows:
|
||||
@@ -1456,7 +1491,7 @@ async def detect_words(
|
||||
await _load_session_to_cache(session_id)
|
||||
cached = _get_cached(session_id)
|
||||
|
||||
dewarped_bgr = cached.get("cropped_bgr") or cached.get("dewarped_bgr")
|
||||
dewarped_bgr = cached.get("cropped_bgr") if cached.get("cropped_bgr") is not None else cached.get("dewarped_bgr")
|
||||
if dewarped_bgr is None:
|
||||
logger.warning("detect_words: no cropped/dewarped image for session %s (cache keys: %s)",
|
||||
session_id, [k for k in cached.keys() if k.endswith('_bgr')])
|
||||
@@ -1560,6 +1595,10 @@ async def detect_words(
|
||||
)
|
||||
duration = time.time() - t0
|
||||
|
||||
# Add zone_index to each cell (default 0 for backward compatibility)
|
||||
for cell in cells:
|
||||
cell.setdefault("zone_index", 0)
|
||||
|
||||
# Layout detection
|
||||
col_types = {c['type'] for c in columns_meta}
|
||||
is_vocab = bool(col_types & {'column_en', 'column_de'})
|
||||
@@ -2749,6 +2788,22 @@ async def _get_rows_overlay(session_id: str) -> Response:
|
||||
# Blend overlay at 15% opacity
|
||||
cv2.addWeighted(overlay, 0.15, img, 0.85, 0, img)
|
||||
|
||||
# Draw zone separator lines if zones exist
|
||||
column_result = session.get("column_result") or {}
|
||||
zones = column_result.get("zones", [])
|
||||
if zones:
|
||||
img_w_px = img.shape[1]
|
||||
zone_color = (0, 200, 255) # Yellow (BGR)
|
||||
dash_len = 20
|
||||
for zone in zones:
|
||||
if zone.get("zone_type") == "box":
|
||||
zy = zone["y"]
|
||||
zh = zone["height"]
|
||||
for line_y in [zy, zy + zh]:
|
||||
for sx in range(0, img_w_px, dash_len * 2):
|
||||
ex = min(sx + dash_len, img_w_px)
|
||||
cv2.line(img, (sx, line_y), (ex, line_y), zone_color, 2)
|
||||
|
||||
success, result_png = cv2.imencode(".png", img)
|
||||
if not success:
|
||||
raise HTTPException(status_code=500, detail="Failed to encode overlay image")
|
||||
@@ -3182,7 +3237,7 @@ async def run_auto(session_id: str, req: RunAutoRequest, request: Request):
|
||||
yield await _auto_sse_event("columns", "start", {})
|
||||
try:
|
||||
t0 = time.time()
|
||||
col_img = cached.get("cropped_bgr") or cached.get("dewarped_bgr")
|
||||
col_img = cached.get("cropped_bgr") if cached.get("cropped_bgr") is not None else cached.get("dewarped_bgr")
|
||||
if col_img is None:
|
||||
raise ValueError("Cropped/dewarped image not available")
|
||||
|
||||
@@ -3243,7 +3298,7 @@ async def run_auto(session_id: str, req: RunAutoRequest, request: Request):
|
||||
yield await _auto_sse_event("rows", "start", {})
|
||||
try:
|
||||
t0 = time.time()
|
||||
row_img = cached.get("cropped_bgr") or cached.get("dewarped_bgr")
|
||||
row_img = cached.get("cropped_bgr") if cached.get("cropped_bgr") is not None else cached.get("dewarped_bgr")
|
||||
session = await get_session_db(session_id)
|
||||
column_result = session.get("column_result") or cached.get("column_result")
|
||||
if not column_result or not column_result.get("columns"):
|
||||
@@ -3321,7 +3376,7 @@ async def run_auto(session_id: str, req: RunAutoRequest, request: Request):
|
||||
yield await _auto_sse_event("words", "start", {"engine": req.ocr_engine})
|
||||
try:
|
||||
t0 = time.time()
|
||||
word_img = cached.get("cropped_bgr") or cached.get("dewarped_bgr")
|
||||
word_img = cached.get("cropped_bgr") if cached.get("cropped_bgr") is not None else cached.get("dewarped_bgr")
|
||||
session = await get_session_db(session_id)
|
||||
|
||||
column_result = session.get("column_result") or cached.get("column_result")
|
||||
|
||||
Reference in New Issue
Block a user