Files
breakpilot-lehrer/klausur-service/backend/tests/test_box_layout.py
Benjamin Admin 3d3c2b30db
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 50s
CI / test-go-edu-search (push) Successful in 45s
CI / test-python-klausur (push) Failing after 2m30s
CI / test-python-agent-core (push) Successful in 31s
CI / test-nodejs-website (push) Successful in 34s
Add tests for unified_grid and cv_box_layout
test_unified_grid.py (10 tests):
- Dominant row height calculation (regular, gaps filtered, single row)
- Box classification (full-width, partial left/right, text line count)
- Unified grid building (content-only, box integration, cell tagging)

test_box_layout.py (13 tests):
- Layout classification (header_only, flowing, bullet_list)
- Line grouping by y-proximity
- Flowing layout indent grouping (bullet + continuations → \n)
- Row/column field completeness for GridTable compatibility

Total: 66 tests passing (43 smart_spell + 13 box_layout + 10 unified)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-15 18:18:52 +02:00

125 lines
4.7 KiB
Python

"""Tests for cv_box_layout.py — box layout classification and grid building."""
import pytest
import sys, os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from cv_box_layout import classify_box_layout, build_box_zone_grid, _group_into_lines
def _make_words(lines_data):
"""Create word dicts from [(y, x, text), ...] tuples."""
words = []
for y, x, text in lines_data:
words.append({"top": y, "left": x, "width": len(text) * 10, "height": 25, "text": text})
return words
class TestClassifyBoxLayout:
def test_header_only(self):
words = _make_words([(100, 50, "Unit 3")])
assert classify_box_layout(words, 500, 50) == "header_only"
def test_empty(self):
assert classify_box_layout([], 500, 200) == "header_only"
def test_flowing(self):
"""Multiple lines without bullet patterns → flowing."""
words = _make_words([
(100, 50, "German leihen title"),
(130, 50, "etwas ausleihen von jm"),
(160, 70, "borrow sth from sb"),
(190, 70, "Can I borrow your CD"),
(220, 50, "etwas verleihen an jn"),
(250, 70, "OK I can lend you my"),
])
assert classify_box_layout(words, 500, 200) == "flowing"
def test_bullet_list(self):
"""Lines starting with bullet markers → bullet_list."""
words = _make_words([
(100, 50, "Title of the box"),
(130, 50, "• First item text here"),
(160, 50, "• Second item text here"),
(190, 50, "• Third item text here"),
(220, 50, "• Fourth item text here"),
(250, 50, "• Fifth item text here"),
])
assert classify_box_layout(words, 500, 150) == "bullet_list"
class TestGroupIntoLines:
def test_single_line(self):
words = _make_words([(100, 50, "hello"), (100, 120, "world")])
lines = _group_into_lines(words)
assert len(lines) == 1
assert len(lines[0]) == 2
def test_two_lines(self):
words = _make_words([(100, 50, "line1"), (150, 50, "line2")])
lines = _group_into_lines(words)
assert len(lines) == 2
def test_y_proximity(self):
"""Words within y-tolerance are on same line."""
words = _make_words([(100, 50, "a"), (103, 120, "b")]) # 3px apart
lines = _group_into_lines(words)
assert len(lines) == 1
class TestBuildBoxZoneGrid:
def test_flowing_groups_by_indent(self):
"""Flowing layout groups continuation lines by indentation."""
words = _make_words([
(100, 50, "Header Title"),
(130, 50, "Bullet start text"),
(160, 80, "continuation line 1"),
(190, 80, "continuation line 2"),
])
result = build_box_zone_grid(words, 40, 90, 500, 120, 0, 1600, 2200, layout_type="flowing")
# Header + 1 grouped bullet = 2 rows
assert len(result["rows"]) == 2
assert len(result["cells"]) == 2
# Second cell should have \n (multi-line)
bullet_cell = result["cells"][1]
assert "\n" in bullet_cell["text"]
def test_header_only_single_cell(self):
words = _make_words([(100, 50, "Just a title")])
result = build_box_zone_grid(words, 40, 90, 500, 50, 0, 1600, 2200, layout_type="header_only")
assert len(result["cells"]) == 1
assert result["box_layout_type"] == "header_only"
def test_columnar_delegates_to_zone_grid(self):
"""Columnar layout uses standard grid builder."""
words = _make_words([
(100, 50, "Col A header"),
(100, 300, "Col B header"),
(130, 50, "A data"),
(130, 300, "B data"),
])
result = build_box_zone_grid(words, 40, 90, 500, 80, 0, 1600, 2200, layout_type="columnar")
assert result["box_layout_type"] == "columnar"
# Should have detected columns
assert len(result.get("columns", [])) >= 1
def test_row_fields_for_gridtable(self):
"""Rows must have y_min_px, y_max_px, is_header for GridTable."""
words = _make_words([(100, 50, "Title"), (130, 50, "Body")])
result = build_box_zone_grid(words, 40, 90, 500, 80, 0, 1600, 2200, layout_type="flowing")
for row in result["rows"]:
assert "y_min_px" in row
assert "y_max_px" in row
assert "is_header" in row
def test_column_fields_for_gridtable(self):
"""Columns must have x_min_px, x_max_px for GridTable width calculation."""
words = _make_words([(100, 50, "Text")])
result = build_box_zone_grid(words, 40, 90, 500, 50, 0, 1600, 2200, layout_type="flowing")
for col in result["columns"]:
assert "x_min_px" in col
assert "x_max_px" in col