Files
breakpilot-lehrer/klausur-service/backend/tests/test_cv_box_detect.py
Benjamin Admin 7005b18561
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 29s
CI / test-go-edu-search (push) Successful in 30s
CI / test-python-klausur (push) Failing after 1m59s
CI / test-python-agent-core (push) Successful in 17s
CI / test-nodejs-website (push) Successful in 19s
feat: generische Box-Erkennung fuer zonenbasierte Spaltenerkennung
- Neue Datei cv_box_detect.py: 2-Stufen-Algorithmus (Linien + Farbe)
- DetectedBox/PageZone Dataclasses in cv_vocab_types.py
- detect_column_geometry_zoned() in cv_layout.py
- API-Endpoints erweitert: zones/boxes_detected im column_result
- Overlay-Funktionen zeichnen Box-Grenzen als gestrichelte Rechtecke
- Fix: numpy array or-Verknuepfung an 7 Stellen in ocr_pipeline_api.py
- 12 Unit-Tests fuer Box-Erkennung und Zone-Splitting

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-09 15:06:23 +01:00

227 lines
8.4 KiB
Python

"""
Tests for cv_box_detect.py — box detection and page zone splitting.
Lizenz: Apache 2.0
"""
import numpy as np
import pytest
import cv2
from cv_box_detect import detect_boxes, split_page_into_zones
from cv_vocab_types import DetectedBox, PageZone
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _white_image(width: int = 1200, height: int = 1800) -> np.ndarray:
"""Create a plain white BGR image."""
return np.ones((height, width, 3), dtype=np.uint8) * 255
def _draw_bordered_box(img: np.ndarray, x: int, y: int, w: int, h: int,
thickness: int = 3, fill_text: bool = True) -> np.ndarray:
"""Draw a bordered box (rectangle) on the image with some inner text."""
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 0), thickness)
if fill_text:
# Add some dark text inside so the box passes ink-density validation
cv2.putText(img, "Grammar Tip: Use the present perfect.",
(x + 20, y + h // 2),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 1)
return img
def _draw_colored_box(img: np.ndarray, x: int, y: int, w: int, h: int,
color: tuple = (200, 230, 255)) -> np.ndarray:
"""Draw a shaded/colored box (no border lines) with some inner text."""
cv2.rectangle(img, (x, y), (x + w, y + h), color, -1)
cv2.putText(img, "Exercise: Fill in the blanks.",
(x + 20, y + h // 2),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 1)
return img
# ---------------------------------------------------------------------------
# detect_boxes tests
# ---------------------------------------------------------------------------
class TestDetectBoxes:
"""Tests for the detect_boxes() function."""
def test_no_boxes_returns_empty(self):
"""A plain white image should produce no detected boxes."""
img = _white_image()
boxes = detect_boxes(img, content_x=50, content_w=1100,
content_y=50, content_h=1700)
assert boxes == []
def test_single_border_box(self):
"""A single bordered rectangle should be detected."""
img = _white_image()
_draw_bordered_box(img, x=60, y=500, w=1080, h=200, thickness=3)
boxes = detect_boxes(img, content_x=50, content_w=1100,
content_y=50, content_h=1700)
assert len(boxes) >= 1
box = boxes[0]
assert isinstance(box, DetectedBox)
assert box.confidence > 0
# Box should roughly be in the right area
assert 400 <= box.y <= 600
assert box.height >= 100
def test_colored_box_fallback(self):
"""A colored box without border lines should be detected by color fallback."""
img = _white_image()
_draw_colored_box(img, x=60, y=600, w=1080, h=180, color=(140, 200, 240))
boxes = detect_boxes(img, content_x=50, content_w=1100,
content_y=50, content_h=1700)
assert len(boxes) >= 1
box = boxes[0]
assert isinstance(box, DetectedBox)
# Color-detected boxes have lower confidence
assert box.confidence > 0
def test_box_too_small_filtered(self):
"""A box shorter than 30px should be filtered out."""
img = _white_image()
# Draw a thin horizontal band (20px high) — should not count as a box
_draw_bordered_box(img, x=60, y=500, w=1080, h=20, thickness=1)
boxes = detect_boxes(img, content_x=50, content_w=1100,
content_y=50, content_h=1700)
assert len(boxes) == 0
def test_box_too_narrow_filtered(self):
"""A box narrower than 60% of content width should be filtered out."""
img = _white_image()
# Draw a narrow box (only 400px wide on a 1100px content area = 36%)
_draw_bordered_box(img, x=60, y=500, w=400, h=200, thickness=3)
boxes = detect_boxes(img, content_x=50, content_w=1100,
content_y=50, content_h=1700)
assert len(boxes) == 0
def test_boxes_sorted_by_y(self):
"""Multiple boxes should be returned sorted top to bottom."""
img = _white_image()
_draw_bordered_box(img, x=60, y=1000, w=1080, h=150, thickness=3)
_draw_bordered_box(img, x=60, y=400, w=1080, h=150, thickness=3)
boxes = detect_boxes(img, content_x=50, content_w=1100,
content_y=50, content_h=1700)
if len(boxes) >= 2:
assert boxes[0].y <= boxes[1].y
# ---------------------------------------------------------------------------
# split_page_into_zones tests
# ---------------------------------------------------------------------------
class TestSplitPageIntoZones:
"""Tests for the split_page_into_zones() function."""
def test_split_zones_no_boxes(self):
"""Without boxes, should return a single content zone."""
zones = split_page_into_zones(
content_x=50, content_y=100, content_w=1100, content_h=1600,
boxes=[],
)
assert len(zones) == 1
assert zones[0].zone_type == 'content'
assert zones[0].y == 100
assert zones[0].height == 1600
def test_split_zones_one_box(self):
"""One box should create up to 3 zones: above, box, below."""
box = DetectedBox(x=50, y=500, width=1100, height=200,
confidence=0.8, border_thickness=3)
zones = split_page_into_zones(
content_x=50, content_y=100, content_w=1100, content_h=1600,
boxes=[box],
)
# Should have 3 zones: content above, box, content below
assert len(zones) == 3
assert zones[0].zone_type == 'content'
assert zones[0].y == 100
assert zones[0].height == 400 # 500 - 100
assert zones[1].zone_type == 'box'
assert zones[1].y == 500
assert zones[1].height == 200
assert zones[1].box is not None
assert zones[2].zone_type == 'content'
assert zones[2].y == 700 # 500 + 200
assert zones[2].height == 1000 # (100+1600) - 700
def test_split_zones_two_boxes(self):
"""Two boxes should create up to 5 zones."""
box1 = DetectedBox(x=50, y=400, width=1100, height=150,
confidence=0.8, border_thickness=3)
box2 = DetectedBox(x=50, y=900, width=1100, height=150,
confidence=0.8, border_thickness=3)
zones = split_page_into_zones(
content_x=50, content_y=100, content_w=1100, content_h=1600,
boxes=[box1, box2],
)
assert len(zones) == 5
types = [z.zone_type for z in zones]
assert types == ['content', 'box', 'content', 'box', 'content']
def test_split_zones_min_height(self):
"""Content zones smaller than min_zone_height should be dropped."""
# Box very close to the top — gap above is only 10px
box = DetectedBox(x=50, y=110, width=1100, height=200,
confidence=0.8, border_thickness=3)
zones = split_page_into_zones(
content_x=50, content_y=100, content_w=1100, content_h=1600,
boxes=[box],
min_zone_height=40,
)
# Gap above box is only 10px < 40px min → should be skipped
assert zones[0].zone_type == 'box'
# Remaining should be content below the box
assert any(z.zone_type == 'content' for z in zones)
def test_zone_indices_sequential(self):
"""Zone indices should be sequential starting from 0."""
box = DetectedBox(x=50, y=500, width=1100, height=200,
confidence=0.8, border_thickness=3)
zones = split_page_into_zones(
content_x=50, content_y=100, content_w=1100, content_h=1600,
boxes=[box],
)
indices = [z.index for z in zones]
assert indices == list(range(len(zones)))
def test_backward_compat_no_boxes(self):
"""Without boxes, result should be identical: single zone covering full area."""
zones = split_page_into_zones(
content_x=50, content_y=100, content_w=1100, content_h=1600,
boxes=[],
)
assert len(zones) == 1
z = zones[0]
assert z.zone_type == 'content'
assert z.x == 50
assert z.y == 100
assert z.width == 1100
assert z.height == 1600
assert z.box is None