Files
breakpilot-lehrer/klausur-service/backend/tests/test_box_layout.py
Benjamin Admin 5f2ed44654
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 41s
CI / test-go-edu-search (push) Successful in 32s
CI / test-python-klausur (push) Failing after 2m41s
CI / test-python-agent-core (push) Successful in 34s
CI / test-nodejs-website (push) Successful in 39s
Cleanup: Delete ALL 242 shims, update ALL consumer imports
klausur-service: 183 shims deleted, 26 test files + 8 source files updated
backend-lehrer: 59 shims deleted, main.py + 8 source files updated

All imports now use the new package paths directly.
Zero shims remaining in the entire codebase.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-26 00:11:33 +02:00

125 lines
4.7 KiB
Python

"""Tests for cv_box_layout.py — box layout classification and grid building."""
import pytest
import sys, os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from ocr.detect.box_layout import classify_box_layout, build_box_zone_grid, _group_into_lines
def _make_words(lines_data):
"""Create word dicts from [(y, x, text), ...] tuples."""
words = []
for y, x, text in lines_data:
words.append({"top": y, "left": x, "width": len(text) * 10, "height": 25, "text": text})
return words
class TestClassifyBoxLayout:
def test_header_only(self):
words = _make_words([(100, 50, "Unit 3")])
assert classify_box_layout(words, 500, 50) == "header_only"
def test_empty(self):
assert classify_box_layout([], 500, 200) == "header_only"
def test_flowing(self):
"""Multiple lines without bullet patterns → flowing."""
words = _make_words([
(100, 50, "German leihen title"),
(130, 50, "etwas ausleihen von jm"),
(160, 70, "borrow sth from sb"),
(190, 70, "Can I borrow your CD"),
(220, 50, "etwas verleihen an jn"),
(250, 70, "OK I can lend you my"),
])
assert classify_box_layout(words, 500, 200) == "flowing"
def test_bullet_list(self):
"""Lines starting with bullet markers → bullet_list."""
words = _make_words([
(100, 50, "Title of the box"),
(130, 50, "• First item text here"),
(160, 50, "• Second item text here"),
(190, 50, "• Third item text here"),
(220, 50, "• Fourth item text here"),
(250, 50, "• Fifth item text here"),
])
assert classify_box_layout(words, 500, 150) == "bullet_list"
class TestGroupIntoLines:
def test_single_line(self):
words = _make_words([(100, 50, "hello"), (100, 120, "world")])
lines = _group_into_lines(words)
assert len(lines) == 1
assert len(lines[0]) == 2
def test_two_lines(self):
words = _make_words([(100, 50, "line1"), (150, 50, "line2")])
lines = _group_into_lines(words)
assert len(lines) == 2
def test_y_proximity(self):
"""Words within y-tolerance are on same line."""
words = _make_words([(100, 50, "a"), (103, 120, "b")]) # 3px apart
lines = _group_into_lines(words)
assert len(lines) == 1
class TestBuildBoxZoneGrid:
def test_flowing_groups_by_indent(self):
"""Flowing layout groups continuation lines by indentation."""
words = _make_words([
(100, 50, "Header Title"),
(130, 50, "Bullet start text"),
(160, 80, "continuation line 1"),
(190, 80, "continuation line 2"),
])
result = build_box_zone_grid(words, 40, 90, 500, 120, 0, 1600, 2200, layout_type="flowing")
# Header + 1 grouped bullet = 2 rows
assert len(result["rows"]) == 2
assert len(result["cells"]) == 2
# Second cell should have \n (multi-line)
bullet_cell = result["cells"][1]
assert "\n" in bullet_cell["text"]
def test_header_only_single_cell(self):
words = _make_words([(100, 50, "Just a title")])
result = build_box_zone_grid(words, 40, 90, 500, 50, 0, 1600, 2200, layout_type="header_only")
assert len(result["cells"]) == 1
assert result["box_layout_type"] == "header_only"
def test_columnar_delegates_to_zone_grid(self):
"""Columnar layout uses standard grid builder."""
words = _make_words([
(100, 50, "Col A header"),
(100, 300, "Col B header"),
(130, 50, "A data"),
(130, 300, "B data"),
])
result = build_box_zone_grid(words, 40, 90, 500, 80, 0, 1600, 2200, layout_type="columnar")
assert result["box_layout_type"] == "columnar"
# Should have detected columns
assert len(result.get("columns", [])) >= 1
def test_row_fields_for_gridtable(self):
"""Rows must have y_min_px, y_max_px, is_header for GridTable."""
words = _make_words([(100, 50, "Title"), (130, 50, "Body")])
result = build_box_zone_grid(words, 40, 90, 500, 80, 0, 1600, 2200, layout_type="flowing")
for row in result["rows"]:
assert "y_min_px" in row
assert "y_max_px" in row
assert "is_header" in row
def test_column_fields_for_gridtable(self):
"""Columns must have x_min_px, x_max_px for GridTable width calculation."""
words = _make_words([(100, 50, "Text")])
result = build_box_zone_grid(words, 40, 90, 500, 50, 0, 1600, 2200, layout_type="flowing")
for col in result["columns"]:
assert "x_min_px" in col
assert "x_max_px" in col