Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 29s
CI / test-go-edu-search (push) Successful in 30s
CI / test-python-klausur (push) Failing after 1m59s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 19s
- Neue Datei cv_box_detect.py: 2-Stufen-Algorithmus (Linien + Farbe) - DetectedBox/PageZone Dataclasses in cv_vocab_types.py - detect_column_geometry_zoned() in cv_layout.py - API-Endpoints erweitert: zones/boxes_detected im column_result - Overlay-Funktionen zeichnen Box-Grenzen als gestrichelte Rechtecke - Fix: numpy array or-Verknuepfung an 7 Stellen in ocr_pipeline_api.py - 12 Unit-Tests fuer Box-Erkennung und Zone-Splitting Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
227 lines
8.4 KiB
Python
227 lines
8.4 KiB
Python
"""
|
|
Tests for cv_box_detect.py — box detection and page zone splitting.
|
|
|
|
Lizenz: Apache 2.0
|
|
"""
|
|
|
|
import numpy as np
|
|
import pytest
|
|
|
|
import cv2
|
|
|
|
from cv_box_detect import detect_boxes, split_page_into_zones
|
|
from cv_vocab_types import DetectedBox, PageZone
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _white_image(width: int = 1200, height: int = 1800) -> np.ndarray:
|
|
"""Create a plain white BGR image."""
|
|
return np.ones((height, width, 3), dtype=np.uint8) * 255
|
|
|
|
|
|
def _draw_bordered_box(img: np.ndarray, x: int, y: int, w: int, h: int,
|
|
thickness: int = 3, fill_text: bool = True) -> np.ndarray:
|
|
"""Draw a bordered box (rectangle) on the image with some inner text."""
|
|
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 0), thickness)
|
|
if fill_text:
|
|
# Add some dark text inside so the box passes ink-density validation
|
|
cv2.putText(img, "Grammar Tip: Use the present perfect.",
|
|
(x + 20, y + h // 2),
|
|
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 1)
|
|
return img
|
|
|
|
|
|
def _draw_colored_box(img: np.ndarray, x: int, y: int, w: int, h: int,
|
|
color: tuple = (200, 230, 255)) -> np.ndarray:
|
|
"""Draw a shaded/colored box (no border lines) with some inner text."""
|
|
cv2.rectangle(img, (x, y), (x + w, y + h), color, -1)
|
|
cv2.putText(img, "Exercise: Fill in the blanks.",
|
|
(x + 20, y + h // 2),
|
|
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 1)
|
|
return img
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# detect_boxes tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestDetectBoxes:
|
|
"""Tests for the detect_boxes() function."""
|
|
|
|
def test_no_boxes_returns_empty(self):
|
|
"""A plain white image should produce no detected boxes."""
|
|
img = _white_image()
|
|
boxes = detect_boxes(img, content_x=50, content_w=1100,
|
|
content_y=50, content_h=1700)
|
|
assert boxes == []
|
|
|
|
def test_single_border_box(self):
|
|
"""A single bordered rectangle should be detected."""
|
|
img = _white_image()
|
|
_draw_bordered_box(img, x=60, y=500, w=1080, h=200, thickness=3)
|
|
|
|
boxes = detect_boxes(img, content_x=50, content_w=1100,
|
|
content_y=50, content_h=1700)
|
|
|
|
assert len(boxes) >= 1
|
|
box = boxes[0]
|
|
assert isinstance(box, DetectedBox)
|
|
assert box.confidence > 0
|
|
# Box should roughly be in the right area
|
|
assert 400 <= box.y <= 600
|
|
assert box.height >= 100
|
|
|
|
def test_colored_box_fallback(self):
|
|
"""A colored box without border lines should be detected by color fallback."""
|
|
img = _white_image()
|
|
_draw_colored_box(img, x=60, y=600, w=1080, h=180, color=(140, 200, 240))
|
|
|
|
boxes = detect_boxes(img, content_x=50, content_w=1100,
|
|
content_y=50, content_h=1700)
|
|
|
|
assert len(boxes) >= 1
|
|
box = boxes[0]
|
|
assert isinstance(box, DetectedBox)
|
|
# Color-detected boxes have lower confidence
|
|
assert box.confidence > 0
|
|
|
|
def test_box_too_small_filtered(self):
|
|
"""A box shorter than 30px should be filtered out."""
|
|
img = _white_image()
|
|
# Draw a thin horizontal band (20px high) — should not count as a box
|
|
_draw_bordered_box(img, x=60, y=500, w=1080, h=20, thickness=1)
|
|
|
|
boxes = detect_boxes(img, content_x=50, content_w=1100,
|
|
content_y=50, content_h=1700)
|
|
|
|
assert len(boxes) == 0
|
|
|
|
def test_box_too_narrow_filtered(self):
|
|
"""A box narrower than 60% of content width should be filtered out."""
|
|
img = _white_image()
|
|
# Draw a narrow box (only 400px wide on a 1100px content area = 36%)
|
|
_draw_bordered_box(img, x=60, y=500, w=400, h=200, thickness=3)
|
|
|
|
boxes = detect_boxes(img, content_x=50, content_w=1100,
|
|
content_y=50, content_h=1700)
|
|
|
|
assert len(boxes) == 0
|
|
|
|
def test_boxes_sorted_by_y(self):
|
|
"""Multiple boxes should be returned sorted top to bottom."""
|
|
img = _white_image()
|
|
_draw_bordered_box(img, x=60, y=1000, w=1080, h=150, thickness=3)
|
|
_draw_bordered_box(img, x=60, y=400, w=1080, h=150, thickness=3)
|
|
|
|
boxes = detect_boxes(img, content_x=50, content_w=1100,
|
|
content_y=50, content_h=1700)
|
|
|
|
if len(boxes) >= 2:
|
|
assert boxes[0].y <= boxes[1].y
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# split_page_into_zones tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestSplitPageIntoZones:
|
|
"""Tests for the split_page_into_zones() function."""
|
|
|
|
def test_split_zones_no_boxes(self):
|
|
"""Without boxes, should return a single content zone."""
|
|
zones = split_page_into_zones(
|
|
content_x=50, content_y=100, content_w=1100, content_h=1600,
|
|
boxes=[],
|
|
)
|
|
|
|
assert len(zones) == 1
|
|
assert zones[0].zone_type == 'content'
|
|
assert zones[0].y == 100
|
|
assert zones[0].height == 1600
|
|
|
|
def test_split_zones_one_box(self):
|
|
"""One box should create up to 3 zones: above, box, below."""
|
|
box = DetectedBox(x=50, y=500, width=1100, height=200,
|
|
confidence=0.8, border_thickness=3)
|
|
zones = split_page_into_zones(
|
|
content_x=50, content_y=100, content_w=1100, content_h=1600,
|
|
boxes=[box],
|
|
)
|
|
|
|
# Should have 3 zones: content above, box, content below
|
|
assert len(zones) == 3
|
|
assert zones[0].zone_type == 'content'
|
|
assert zones[0].y == 100
|
|
assert zones[0].height == 400 # 500 - 100
|
|
|
|
assert zones[1].zone_type == 'box'
|
|
assert zones[1].y == 500
|
|
assert zones[1].height == 200
|
|
assert zones[1].box is not None
|
|
|
|
assert zones[2].zone_type == 'content'
|
|
assert zones[2].y == 700 # 500 + 200
|
|
assert zones[2].height == 1000 # (100+1600) - 700
|
|
|
|
def test_split_zones_two_boxes(self):
|
|
"""Two boxes should create up to 5 zones."""
|
|
box1 = DetectedBox(x=50, y=400, width=1100, height=150,
|
|
confidence=0.8, border_thickness=3)
|
|
box2 = DetectedBox(x=50, y=900, width=1100, height=150,
|
|
confidence=0.8, border_thickness=3)
|
|
zones = split_page_into_zones(
|
|
content_x=50, content_y=100, content_w=1100, content_h=1600,
|
|
boxes=[box1, box2],
|
|
)
|
|
|
|
assert len(zones) == 5
|
|
types = [z.zone_type for z in zones]
|
|
assert types == ['content', 'box', 'content', 'box', 'content']
|
|
|
|
def test_split_zones_min_height(self):
|
|
"""Content zones smaller than min_zone_height should be dropped."""
|
|
# Box very close to the top — gap above is only 10px
|
|
box = DetectedBox(x=50, y=110, width=1100, height=200,
|
|
confidence=0.8, border_thickness=3)
|
|
zones = split_page_into_zones(
|
|
content_x=50, content_y=100, content_w=1100, content_h=1600,
|
|
boxes=[box],
|
|
min_zone_height=40,
|
|
)
|
|
|
|
# Gap above box is only 10px < 40px min → should be skipped
|
|
assert zones[0].zone_type == 'box'
|
|
# Remaining should be content below the box
|
|
assert any(z.zone_type == 'content' for z in zones)
|
|
|
|
def test_zone_indices_sequential(self):
|
|
"""Zone indices should be sequential starting from 0."""
|
|
box = DetectedBox(x=50, y=500, width=1100, height=200,
|
|
confidence=0.8, border_thickness=3)
|
|
zones = split_page_into_zones(
|
|
content_x=50, content_y=100, content_w=1100, content_h=1600,
|
|
boxes=[box],
|
|
)
|
|
|
|
indices = [z.index for z in zones]
|
|
assert indices == list(range(len(zones)))
|
|
|
|
def test_backward_compat_no_boxes(self):
|
|
"""Without boxes, result should be identical: single zone covering full area."""
|
|
zones = split_page_into_zones(
|
|
content_x=50, content_y=100, content_w=1100, content_h=1600,
|
|
boxes=[],
|
|
)
|
|
|
|
assert len(zones) == 1
|
|
z = zones[0]
|
|
assert z.zone_type == 'content'
|
|
assert z.x == 50
|
|
assert z.y == 100
|
|
assert z.width == 1100
|
|
assert z.height == 1600
|
|
assert z.box is None
|