Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 50s
CI / test-go-edu-search (push) Successful in 45s
CI / test-python-klausur (push) Failing after 2m30s
CI / test-python-agent-core (push) Successful in 31s
CI / test-nodejs-website (push) Successful in 34s
test_unified_grid.py (10 tests): - Dominant row height calculation (regular, gaps filtered, single row) - Box classification (full-width, partial left/right, text line count) - Unified grid building (content-only, box integration, cell tagging) test_box_layout.py (13 tests): - Layout classification (header_only, flowing, bullet_list) - Line grouping by y-proximity - Flowing layout indent grouping (bullet + continuations → \n) - Row/column field completeness for GridTable compatibility Total: 66 tests passing (43 smart_spell + 13 box_layout + 10 unified) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
125 lines
4.7 KiB
Python
125 lines
4.7 KiB
Python
"""Tests for cv_box_layout.py — box layout classification and grid building."""
|
|
|
|
import pytest
|
|
import sys, os
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
|
|
|
from cv_box_layout import classify_box_layout, build_box_zone_grid, _group_into_lines
|
|
|
|
|
|
def _make_words(lines_data):
|
|
"""Create word dicts from [(y, x, text), ...] tuples."""
|
|
words = []
|
|
for y, x, text in lines_data:
|
|
words.append({"top": y, "left": x, "width": len(text) * 10, "height": 25, "text": text})
|
|
return words
|
|
|
|
|
|
class TestClassifyBoxLayout:
|
|
|
|
def test_header_only(self):
|
|
words = _make_words([(100, 50, "Unit 3")])
|
|
assert classify_box_layout(words, 500, 50) == "header_only"
|
|
|
|
def test_empty(self):
|
|
assert classify_box_layout([], 500, 200) == "header_only"
|
|
|
|
def test_flowing(self):
|
|
"""Multiple lines without bullet patterns → flowing."""
|
|
words = _make_words([
|
|
(100, 50, "German leihen title"),
|
|
(130, 50, "etwas ausleihen von jm"),
|
|
(160, 70, "borrow sth from sb"),
|
|
(190, 70, "Can I borrow your CD"),
|
|
(220, 50, "etwas verleihen an jn"),
|
|
(250, 70, "OK I can lend you my"),
|
|
])
|
|
assert classify_box_layout(words, 500, 200) == "flowing"
|
|
|
|
def test_bullet_list(self):
|
|
"""Lines starting with bullet markers → bullet_list."""
|
|
words = _make_words([
|
|
(100, 50, "Title of the box"),
|
|
(130, 50, "• First item text here"),
|
|
(160, 50, "• Second item text here"),
|
|
(190, 50, "• Third item text here"),
|
|
(220, 50, "• Fourth item text here"),
|
|
(250, 50, "• Fifth item text here"),
|
|
])
|
|
assert classify_box_layout(words, 500, 150) == "bullet_list"
|
|
|
|
|
|
class TestGroupIntoLines:
|
|
|
|
def test_single_line(self):
|
|
words = _make_words([(100, 50, "hello"), (100, 120, "world")])
|
|
lines = _group_into_lines(words)
|
|
assert len(lines) == 1
|
|
assert len(lines[0]) == 2
|
|
|
|
def test_two_lines(self):
|
|
words = _make_words([(100, 50, "line1"), (150, 50, "line2")])
|
|
lines = _group_into_lines(words)
|
|
assert len(lines) == 2
|
|
|
|
def test_y_proximity(self):
|
|
"""Words within y-tolerance are on same line."""
|
|
words = _make_words([(100, 50, "a"), (103, 120, "b")]) # 3px apart
|
|
lines = _group_into_lines(words)
|
|
assert len(lines) == 1
|
|
|
|
|
|
class TestBuildBoxZoneGrid:
|
|
|
|
def test_flowing_groups_by_indent(self):
|
|
"""Flowing layout groups continuation lines by indentation."""
|
|
words = _make_words([
|
|
(100, 50, "Header Title"),
|
|
(130, 50, "Bullet start text"),
|
|
(160, 80, "continuation line 1"),
|
|
(190, 80, "continuation line 2"),
|
|
])
|
|
result = build_box_zone_grid(words, 40, 90, 500, 120, 0, 1600, 2200, layout_type="flowing")
|
|
# Header + 1 grouped bullet = 2 rows
|
|
assert len(result["rows"]) == 2
|
|
assert len(result["cells"]) == 2
|
|
# Second cell should have \n (multi-line)
|
|
bullet_cell = result["cells"][1]
|
|
assert "\n" in bullet_cell["text"]
|
|
|
|
def test_header_only_single_cell(self):
|
|
words = _make_words([(100, 50, "Just a title")])
|
|
result = build_box_zone_grid(words, 40, 90, 500, 50, 0, 1600, 2200, layout_type="header_only")
|
|
assert len(result["cells"]) == 1
|
|
assert result["box_layout_type"] == "header_only"
|
|
|
|
def test_columnar_delegates_to_zone_grid(self):
|
|
"""Columnar layout uses standard grid builder."""
|
|
words = _make_words([
|
|
(100, 50, "Col A header"),
|
|
(100, 300, "Col B header"),
|
|
(130, 50, "A data"),
|
|
(130, 300, "B data"),
|
|
])
|
|
result = build_box_zone_grid(words, 40, 90, 500, 80, 0, 1600, 2200, layout_type="columnar")
|
|
assert result["box_layout_type"] == "columnar"
|
|
# Should have detected columns
|
|
assert len(result.get("columns", [])) >= 1
|
|
|
|
def test_row_fields_for_gridtable(self):
|
|
"""Rows must have y_min_px, y_max_px, is_header for GridTable."""
|
|
words = _make_words([(100, 50, "Title"), (130, 50, "Body")])
|
|
result = build_box_zone_grid(words, 40, 90, 500, 80, 0, 1600, 2200, layout_type="flowing")
|
|
for row in result["rows"]:
|
|
assert "y_min_px" in row
|
|
assert "y_max_px" in row
|
|
assert "is_header" in row
|
|
|
|
def test_column_fields_for_gridtable(self):
|
|
"""Columns must have x_min_px, x_max_px for GridTable width calculation."""
|
|
words = _make_words([(100, 50, "Text")])
|
|
result = build_box_zone_grid(words, 40, 90, 500, 50, 0, 1600, 2200, layout_type="flowing")
|
|
for col in result["columns"]:
|
|
assert "x_min_px" in col
|
|
assert "x_max_px" in col
|