""" Tests for cv_box_detect.py — box detection and page zone splitting. Lizenz: Apache 2.0 """ import numpy as np import pytest import cv2 from cv_box_detect import detect_boxes, split_page_into_zones from cv_vocab_types import DetectedBox, PageZone # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _white_image(width: int = 1200, height: int = 1800) -> np.ndarray: """Create a plain white BGR image.""" return np.ones((height, width, 3), dtype=np.uint8) * 255 def _draw_bordered_box(img: np.ndarray, x: int, y: int, w: int, h: int, thickness: int = 3, fill_text: bool = True) -> np.ndarray: """Draw a bordered box (rectangle) on the image with some inner text.""" cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 0), thickness) if fill_text: # Add some dark text inside so the box passes ink-density validation cv2.putText(img, "Grammar Tip: Use the present perfect.", (x + 20, y + h // 2), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 1) return img def _draw_colored_box(img: np.ndarray, x: int, y: int, w: int, h: int, color: tuple = (200, 230, 255)) -> np.ndarray: """Draw a shaded/colored box (no border lines) with some inner text.""" cv2.rectangle(img, (x, y), (x + w, y + h), color, -1) cv2.putText(img, "Exercise: Fill in the blanks.", (x + 20, y + h // 2), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 1) return img # --------------------------------------------------------------------------- # detect_boxes tests # --------------------------------------------------------------------------- class TestDetectBoxes: """Tests for the detect_boxes() function.""" def test_no_boxes_returns_empty(self): """A plain white image should produce no detected boxes.""" img = _white_image() boxes = detect_boxes(img, content_x=50, content_w=1100, content_y=50, content_h=1700) assert boxes == [] def test_single_border_box(self): """A single bordered rectangle should be detected.""" img = _white_image() _draw_bordered_box(img, x=60, y=500, w=1080, h=200, thickness=3) boxes = detect_boxes(img, content_x=50, content_w=1100, content_y=50, content_h=1700) assert len(boxes) >= 1 box = boxes[0] assert isinstance(box, DetectedBox) assert box.confidence > 0 # Box should roughly be in the right area assert 400 <= box.y <= 600 assert box.height >= 100 def test_colored_box_fallback(self): """A colored box without border lines should be detected by color fallback.""" img = _white_image() _draw_colored_box(img, x=60, y=600, w=1080, h=180, color=(140, 200, 240)) boxes = detect_boxes(img, content_x=50, content_w=1100, content_y=50, content_h=1700) assert len(boxes) >= 1 box = boxes[0] assert isinstance(box, DetectedBox) # Color-detected boxes have lower confidence assert box.confidence > 0 def test_box_too_small_filtered(self): """A box shorter than 30px should be filtered out.""" img = _white_image() # Draw a thin horizontal band (20px high) — should not count as a box _draw_bordered_box(img, x=60, y=500, w=1080, h=20, thickness=1) boxes = detect_boxes(img, content_x=50, content_w=1100, content_y=50, content_h=1700) assert len(boxes) == 0 def test_box_too_narrow_filtered(self): """A box narrower than 60% of content width should be filtered out.""" img = _white_image() # Draw a narrow box (only 400px wide on a 1100px content area = 36%) _draw_bordered_box(img, x=60, y=500, w=400, h=200, thickness=3) boxes = detect_boxes(img, content_x=50, content_w=1100, content_y=50, content_h=1700) assert len(boxes) == 0 def test_boxes_sorted_by_y(self): """Multiple boxes should be returned sorted top to bottom.""" img = _white_image() _draw_bordered_box(img, x=60, y=1000, w=1080, h=150, thickness=3) _draw_bordered_box(img, x=60, y=400, w=1080, h=150, thickness=3) boxes = detect_boxes(img, content_x=50, content_w=1100, content_y=50, content_h=1700) if len(boxes) >= 2: assert boxes[0].y <= boxes[1].y # --------------------------------------------------------------------------- # split_page_into_zones tests # --------------------------------------------------------------------------- class TestSplitPageIntoZones: """Tests for the split_page_into_zones() function.""" def test_split_zones_no_boxes(self): """Without boxes, should return a single content zone.""" zones = split_page_into_zones( content_x=50, content_y=100, content_w=1100, content_h=1600, boxes=[], ) assert len(zones) == 1 assert zones[0].zone_type == 'content' assert zones[0].y == 100 assert zones[0].height == 1600 def test_split_zones_one_box(self): """One box should create up to 3 zones: above, box, below.""" box = DetectedBox(x=50, y=500, width=1100, height=200, confidence=0.8, border_thickness=3) zones = split_page_into_zones( content_x=50, content_y=100, content_w=1100, content_h=1600, boxes=[box], ) # Should have 3 zones: content above, box, content below assert len(zones) == 3 assert zones[0].zone_type == 'content' assert zones[0].y == 100 assert zones[0].height == 400 # 500 - 100 assert zones[1].zone_type == 'box' assert zones[1].y == 500 assert zones[1].height == 200 assert zones[1].box is not None assert zones[2].zone_type == 'content' assert zones[2].y == 700 # 500 + 200 assert zones[2].height == 1000 # (100+1600) - 700 def test_split_zones_two_boxes(self): """Two boxes should create up to 5 zones.""" box1 = DetectedBox(x=50, y=400, width=1100, height=150, confidence=0.8, border_thickness=3) box2 = DetectedBox(x=50, y=900, width=1100, height=150, confidence=0.8, border_thickness=3) zones = split_page_into_zones( content_x=50, content_y=100, content_w=1100, content_h=1600, boxes=[box1, box2], ) assert len(zones) == 5 types = [z.zone_type for z in zones] assert types == ['content', 'box', 'content', 'box', 'content'] def test_split_zones_min_height(self): """Content zones smaller than min_zone_height should be dropped.""" # Box very close to the top — gap above is only 10px box = DetectedBox(x=50, y=110, width=1100, height=200, confidence=0.8, border_thickness=3) zones = split_page_into_zones( content_x=50, content_y=100, content_w=1100, content_h=1600, boxes=[box], min_zone_height=40, ) # Gap above box is only 10px < 40px min → should be skipped assert zones[0].zone_type == 'box' # Remaining should be content below the box assert any(z.zone_type == 'content' for z in zones) def test_zone_indices_sequential(self): """Zone indices should be sequential starting from 0.""" box = DetectedBox(x=50, y=500, width=1100, height=200, confidence=0.8, border_thickness=3) zones = split_page_into_zones( content_x=50, content_y=100, content_w=1100, content_h=1600, boxes=[box], ) indices = [z.index for z in zones] assert indices == list(range(len(zones))) def test_backward_compat_no_boxes(self): """Without boxes, result should be identical: single zone covering full area.""" zones = split_page_into_zones( content_x=50, content_y=100, content_w=1100, content_h=1600, boxes=[], ) assert len(zones) == 1 z = zones[0] assert z.zone_type == 'content' assert z.x == 50 assert z.y == 100 assert z.width == 1100 assert z.height == 1600 assert z.box is None