feat: generische Box-Erkennung fuer zonenbasierte Spaltenerkennung
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 29s
CI / test-go-edu-search (push) Successful in 30s
CI / test-python-klausur (push) Failing after 1m59s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 19s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 29s
CI / test-go-edu-search (push) Successful in 30s
CI / test-python-klausur (push) Failing after 1m59s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 19s
- Neue Datei cv_box_detect.py: 2-Stufen-Algorithmus (Linien + Farbe) - DetectedBox/PageZone Dataclasses in cv_vocab_types.py - detect_column_geometry_zoned() in cv_layout.py - API-Endpoints erweitert: zones/boxes_detected im column_result - Overlay-Funktionen zeichnen Box-Grenzen als gestrichelte Rechtecke - Fix: numpy array or-Verknuepfung an 7 Stellen in ocr_pipeline_api.py - 12 Unit-Tests fuer Box-Erkennung und Zone-Splitting Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
369
klausur-service/backend/cv_box_detect.py
Normal file
369
klausur-service/backend/cv_box_detect.py
Normal file
@@ -0,0 +1,369 @@
|
||||
"""
|
||||
Embedded box detection and page zone splitting for the CV vocabulary pipeline.
|
||||
|
||||
Detects boxes (grammar tips, exercises, etc.) that span the page width and
|
||||
interrupt the normal column layout. Splits the page into vertical zones so
|
||||
that column detection can run independently per zone.
|
||||
|
||||
Two-stage algorithm:
|
||||
1. Morphological line detection — finds bordered boxes via horizontal lines.
|
||||
2. Color/saturation fallback — finds shaded boxes without visible borders.
|
||||
|
||||
Lizenz: Apache 2.0 (kommerziell nutzbar)
|
||||
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from cv_vocab_types import DetectedBox, PageZone
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
__all__ = [
|
||||
"detect_boxes",
|
||||
"split_page_into_zones",
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stage 1: Morphological line detection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _detect_boxes_by_lines(
|
||||
gray: np.ndarray,
|
||||
content_x: int,
|
||||
content_w: int,
|
||||
content_y: int,
|
||||
content_h: int,
|
||||
) -> List[DetectedBox]:
|
||||
"""Find boxes defined by pairs of long horizontal border lines.
|
||||
|
||||
Args:
|
||||
gray: Grayscale image (full page).
|
||||
content_x, content_w: Horizontal content bounds.
|
||||
content_y, content_h: Vertical content bounds.
|
||||
|
||||
Returns:
|
||||
List of DetectedBox for each detected bordered box.
|
||||
"""
|
||||
h, w = gray.shape[:2]
|
||||
|
||||
# Binarize: dark pixels → white on black background
|
||||
_, binary = cv2.threshold(gray, 180, 255, cv2.THRESH_BINARY_INV)
|
||||
|
||||
# Horizontal morphology kernel — at least 50% of content width
|
||||
kernel_w = max(50, content_w // 2)
|
||||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_w, 1))
|
||||
lines_img = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
|
||||
|
||||
# Horizontal projection: count line pixels per row
|
||||
h_proj = np.sum(lines_img[:, content_x:content_x + content_w] > 0, axis=1)
|
||||
line_threshold = content_w * 0.30
|
||||
|
||||
# Group consecutive rows with enough line pixels into line segments
|
||||
line_segments: List[Tuple[int, int]] = [] # (y_start, y_end)
|
||||
seg_start: Optional[int] = None
|
||||
for y in range(h):
|
||||
if h_proj[y] >= line_threshold:
|
||||
if seg_start is None:
|
||||
seg_start = y
|
||||
else:
|
||||
if seg_start is not None:
|
||||
line_segments.append((seg_start, y))
|
||||
seg_start = None
|
||||
if seg_start is not None:
|
||||
line_segments.append((seg_start, h))
|
||||
|
||||
if len(line_segments) < 2:
|
||||
return []
|
||||
|
||||
# Pair lines into boxes: top-line + bottom-line
|
||||
# Minimum box height: 30px. Maximum: 70% of content height.
|
||||
min_box_h = 30
|
||||
max_box_h = int(content_h * 0.70)
|
||||
|
||||
boxes: List[DetectedBox] = []
|
||||
used = set()
|
||||
for i, (top_start, top_end) in enumerate(line_segments):
|
||||
if i in used:
|
||||
continue
|
||||
for j in range(i + 1, len(line_segments)):
|
||||
if j in used:
|
||||
continue
|
||||
bot_start, bot_end = line_segments[j]
|
||||
box_y = top_start
|
||||
box_h = bot_end - top_start
|
||||
if box_h < min_box_h or box_h > max_box_h:
|
||||
continue
|
||||
|
||||
# Estimate border thickness from line segment heights
|
||||
border_top = top_end - top_start
|
||||
border_bot = bot_end - bot_start
|
||||
|
||||
box = DetectedBox(
|
||||
x=content_x,
|
||||
y=box_y,
|
||||
width=content_w,
|
||||
height=box_h,
|
||||
confidence=0.8,
|
||||
border_thickness=max(border_top, border_bot),
|
||||
)
|
||||
boxes.append(box)
|
||||
used.add(i)
|
||||
used.add(j)
|
||||
break # move to next top-line candidate
|
||||
|
||||
return boxes
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stage 2: Color / saturation fallback
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _detect_boxes_by_color(
|
||||
img_bgr: np.ndarray,
|
||||
content_x: int,
|
||||
content_w: int,
|
||||
content_y: int,
|
||||
content_h: int,
|
||||
) -> List[DetectedBox]:
|
||||
"""Find boxes with shaded/colored background (no visible border lines).
|
||||
|
||||
Args:
|
||||
img_bgr: BGR color image (full page).
|
||||
content_x, content_w: Horizontal content bounds.
|
||||
content_y, content_h: Vertical content bounds.
|
||||
|
||||
Returns:
|
||||
List of DetectedBox for each detected shaded box.
|
||||
"""
|
||||
h, w = img_bgr.shape[:2]
|
||||
|
||||
hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
|
||||
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
# Mask: pixels that are saturated OR noticeably darker than white
|
||||
sat_mask = hsv[:, :, 1] > 25
|
||||
dark_mask = gray < 220
|
||||
combined = (sat_mask | dark_mask).astype(np.uint8) * 255
|
||||
|
||||
# Close small gaps in the mask
|
||||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 15))
|
||||
combined = cv2.morphologyEx(combined, cv2.MORPH_CLOSE, kernel)
|
||||
|
||||
contours, _ = cv2.findContours(combined, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
min_area = content_w * content_h * 0.05
|
||||
min_box_h = 30
|
||||
max_box_h = int(content_h * 0.70)
|
||||
min_width_ratio = 0.60
|
||||
|
||||
boxes: List[DetectedBox] = []
|
||||
for cnt in contours:
|
||||
area = cv2.contourArea(cnt)
|
||||
if area < min_area:
|
||||
continue
|
||||
|
||||
# Approximate to polygon — check if roughly rectangular
|
||||
peri = cv2.arcLength(cnt, True)
|
||||
approx = cv2.approxPolyDP(cnt, 0.04 * peri, True)
|
||||
if len(approx) < 4 or len(approx) > 8:
|
||||
continue
|
||||
|
||||
bx, by, bw, bh = cv2.boundingRect(cnt)
|
||||
|
||||
# Width filter: must span most of the page
|
||||
if bw < content_w * min_width_ratio:
|
||||
continue
|
||||
|
||||
# Height filter
|
||||
if bh < min_box_h or bh > max_box_h:
|
||||
continue
|
||||
|
||||
boxes.append(DetectedBox(
|
||||
x=bx,
|
||||
y=by,
|
||||
width=bw,
|
||||
height=bh,
|
||||
confidence=0.6,
|
||||
border_thickness=0,
|
||||
))
|
||||
|
||||
return boxes
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Validation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _validate_box(
|
||||
box: DetectedBox,
|
||||
gray: np.ndarray,
|
||||
content_w: int,
|
||||
content_h: int,
|
||||
median_row_gap: int,
|
||||
) -> bool:
|
||||
"""Validate that a detected box is genuine (not a table-row separator etc.)."""
|
||||
# Must span > 60% of content width
|
||||
if box.width < content_w * 0.60:
|
||||
return False
|
||||
|
||||
# Height constraints
|
||||
if box.height < 30 or box.height > content_h * 0.70:
|
||||
return False
|
||||
|
||||
# Must not be confused with a table-row separator:
|
||||
# real boxes are at least 3x the median row gap
|
||||
if median_row_gap > 0 and box.height < median_row_gap * 3:
|
||||
return False
|
||||
|
||||
# Must contain some text (ink density check)
|
||||
roi = gray[box.y:box.y + box.height, box.x:box.x + box.width]
|
||||
if roi.size == 0:
|
||||
return False
|
||||
ink_ratio = np.sum(roi < 128) / roi.size
|
||||
if ink_ratio < 0.002: # nearly empty → not a real content box
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API: detect_boxes
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def detect_boxes(
|
||||
img_bgr: np.ndarray,
|
||||
content_x: int,
|
||||
content_w: int,
|
||||
content_y: int,
|
||||
content_h: int,
|
||||
median_row_gap: int = 0,
|
||||
) -> List[DetectedBox]:
|
||||
"""Detect embedded boxes on a page image.
|
||||
|
||||
Runs line-based detection first, then color-based fallback if no
|
||||
bordered boxes are found.
|
||||
|
||||
Args:
|
||||
img_bgr: BGR color image (full page or cropped).
|
||||
content_x, content_w: Horizontal content bounds.
|
||||
content_y, content_h: Vertical content bounds.
|
||||
median_row_gap: Median row gap height (for filtering out table separators).
|
||||
|
||||
Returns:
|
||||
List of validated DetectedBox instances, sorted by y position.
|
||||
"""
|
||||
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
# Stage 1: Line-based detection
|
||||
boxes = _detect_boxes_by_lines(gray, content_x, content_w, content_y, content_h)
|
||||
|
||||
# Stage 2: Color fallback if no bordered boxes found
|
||||
if not boxes:
|
||||
boxes = _detect_boxes_by_color(img_bgr, content_x, content_w, content_y, content_h)
|
||||
|
||||
# Validate
|
||||
validated = [b for b in boxes if _validate_box(b, gray, content_w, content_h, median_row_gap)]
|
||||
|
||||
# Sort top to bottom
|
||||
validated.sort(key=lambda b: b.y)
|
||||
|
||||
if validated:
|
||||
logger.info(f"BoxDetect: {len(validated)} box(es) detected "
|
||||
f"(from {len(boxes)} candidates)")
|
||||
else:
|
||||
logger.debug("BoxDetect: no boxes detected")
|
||||
|
||||
return validated
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Zone Splitting
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def split_page_into_zones(
|
||||
content_x: int,
|
||||
content_y: int,
|
||||
content_w: int,
|
||||
content_h: int,
|
||||
boxes: List[DetectedBox],
|
||||
min_zone_height: int = 40,
|
||||
) -> List[PageZone]:
|
||||
"""Split a page into vertical zones based on detected boxes.
|
||||
|
||||
Regions above, between, and below boxes become 'content' zones;
|
||||
box regions become 'box' zones.
|
||||
|
||||
Args:
|
||||
content_x, content_y, content_w, content_h: Content area bounds.
|
||||
boxes: Detected boxes, sorted by y position.
|
||||
min_zone_height: Minimum height for a content zone to be kept.
|
||||
|
||||
Returns:
|
||||
List of PageZone, ordered top to bottom.
|
||||
"""
|
||||
if not boxes:
|
||||
# Single zone: entire content area
|
||||
return [PageZone(
|
||||
index=0,
|
||||
zone_type='content',
|
||||
y=content_y,
|
||||
height=content_h,
|
||||
x=content_x,
|
||||
width=content_w,
|
||||
)]
|
||||
|
||||
zones: List[PageZone] = []
|
||||
zone_idx = 0
|
||||
cursor_y = content_y
|
||||
content_bottom = content_y + content_h
|
||||
|
||||
for box in boxes:
|
||||
# Content zone above this box
|
||||
gap_above = box.y - cursor_y
|
||||
if gap_above >= min_zone_height:
|
||||
zones.append(PageZone(
|
||||
index=zone_idx,
|
||||
zone_type='content',
|
||||
y=cursor_y,
|
||||
height=gap_above,
|
||||
x=content_x,
|
||||
width=content_w,
|
||||
))
|
||||
zone_idx += 1
|
||||
|
||||
# Box zone
|
||||
zones.append(PageZone(
|
||||
index=zone_idx,
|
||||
zone_type='box',
|
||||
y=box.y,
|
||||
height=box.height,
|
||||
x=box.x,
|
||||
width=box.width,
|
||||
box=box,
|
||||
))
|
||||
zone_idx += 1
|
||||
|
||||
cursor_y = box.y + box.height
|
||||
|
||||
# Content zone below last box
|
||||
remaining = content_bottom - cursor_y
|
||||
if remaining >= min_zone_height:
|
||||
zones.append(PageZone(
|
||||
index=zone_idx,
|
||||
zone_type='content',
|
||||
y=cursor_y,
|
||||
height=remaining,
|
||||
x=content_x,
|
||||
width=content_w,
|
||||
))
|
||||
|
||||
logger.info(f"ZoneSplit: {len(zones)} zones from {len(boxes)} box(es): "
|
||||
f"{[z.zone_type for z in zones]}")
|
||||
|
||||
return zones
|
||||
Reference in New Issue
Block a user