Files
breakpilot-lehrer/klausur-service/backend/tests/test_unified_grid.py
Benjamin Admin 3d3c2b30db
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 50s
CI / test-go-edu-search (push) Successful in 45s
CI / test-python-klausur (push) Failing after 2m30s
CI / test-python-agent-core (push) Successful in 31s
CI / test-nodejs-website (push) Successful in 34s
Add tests for unified_grid and cv_box_layout
test_unified_grid.py (10 tests):
- Dominant row height calculation (regular, gaps filtered, single row)
- Box classification (full-width, partial left/right, text line count)
- Unified grid building (content-only, box integration, cell tagging)

test_box_layout.py (13 tests):
- Layout classification (header_only, flowing, bullet_list)
- Line grouping by y-proximity
- Flowing layout indent grouping (bullet + continuations → \n)
- Row/column field completeness for GridTable compatibility

Total: 66 tests passing (43 smart_spell + 13 box_layout + 10 unified)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-15 18:18:52 +02:00

142 lines
6.6 KiB
Python

"""Tests for unified_grid.py — merging multi-zone grids into single zone."""
import pytest
import sys, os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from unified_grid import (
_compute_dominant_row_height,
_classify_boxes,
build_unified_grid,
)
def _make_content_zone(rows_y, num_cols=4, bbox_w=1400):
"""Helper: create a content zone with rows at given y positions."""
rows = [{"index": i, "y_min_px": y, "y_max_px": y + 30, "y_min": y, "y_max": y + 30,
"is_header": False} for i, y in enumerate(rows_y)]
cols = [{"index": i, "x_min_px": i * (bbox_w // num_cols), "x_max_px": (i + 1) * (bbox_w // num_cols)}
for i in range(num_cols)]
cells = [{"row_index": r["index"], "col_index": c["index"], "col_type": f"column_{c['index']+1}",
"text": f"R{r['index']}C{c['index']}", "cell_id": f"R{r['index']}C{c['index']}",
"word_boxes": [], "confidence": 90, "is_bold": False, "ocr_engine": "test",
"bbox_px": {"x": 0, "y": 0, "w": 100, "h": 30}, "bbox_pct": {"x": 0, "y": 0, "w": 10, "h": 2}}
for r in rows for c in cols]
return {
"zone_index": 1, "zone_type": "content",
"bbox_px": {"x": 50, "y": rows_y[0] - 10, "w": bbox_w, "h": rows_y[-1] - rows_y[0] + 50},
"bbox_pct": {"x": 3, "y": 10, "w": 85, "h": 80},
"columns": cols, "rows": rows, "cells": cells,
"header_rows": [], "border": None, "word_count": len(cells),
}
def _make_box_zone(zone_index, bbox, cells_data, bg_hex="#2563eb", layout_type="flowing"):
"""Helper: create a box zone."""
rows = [{"index": i, "y_min_px": bbox["y"] + i * 30, "y_max_px": bbox["y"] + (i + 1) * 30,
"is_header": i == 0} for i in range(len(cells_data))]
cols = [{"index": 0, "x_min_px": bbox["x"], "x_max_px": bbox["x"] + bbox["w"]}]
cells = [{"row_index": i, "col_index": 0, "col_type": "column_1",
"text": text, "cell_id": f"Z{zone_index}_R{i}C0",
"word_boxes": [], "confidence": 90, "is_bold": False, "ocr_engine": "test",
"bbox_px": {"x": bbox["x"], "y": bbox["y"] + i * 30, "w": bbox["w"], "h": 30},
"bbox_pct": {"x": 50, "y": 50, "w": 30, "h": 10}}
for i, text in enumerate(cells_data)]
return {
"zone_index": zone_index, "zone_type": "box",
"bbox_px": bbox, "bbox_pct": {"x": 50, "y": 50, "w": 30, "h": 10},
"columns": cols, "rows": rows, "cells": cells,
"header_rows": [0], "border": None, "word_count": len(cells),
"box_bg_hex": bg_hex, "box_bg_color": "blue", "box_layout_type": layout_type,
}
class TestDominantRowHeight:
def test_regular_spacing(self):
"""Rows with uniform spacing → median = that spacing."""
zone = _make_content_zone([100, 147, 194, 241, 288])
h = _compute_dominant_row_height(zone)
assert h == 47
def test_filters_large_gaps(self):
"""Large gaps (box interruptions) are filtered out."""
zone = _make_content_zone([100, 147, 194, 600, 647, 694])
# spacings: 47, 47, 406(!), 47, 47 → filter >100 → median of [47,47,47,47] = 47
h = _compute_dominant_row_height(zone)
assert h == 47
def test_single_row(self):
"""Single row → default 47."""
zone = _make_content_zone([100])
h = _compute_dominant_row_height(zone)
assert h == 47.0
class TestClassifyBoxes:
def test_full_width(self):
"""Box wider than 85% of content → full_width."""
boxes = [_make_box_zone(2, {"x": 50, "y": 500, "w": 1300, "h": 200}, ["Header", "Text"])]
result = _classify_boxes(boxes, content_width=1400)
assert result[0]["classification"] == "full_width"
def test_partial_width_right(self):
"""Narrow box on right side → partial_width, side=right."""
boxes = [_make_box_zone(2, {"x": 800, "y": 500, "w": 500, "h": 200}, ["Header", "Text"])]
result = _classify_boxes(boxes, content_width=1400)
assert result[0]["classification"] == "partial_width"
assert result[0]["side"] == "right"
def test_partial_width_left(self):
"""Narrow box on left side → partial_width, side=left."""
boxes = [_make_box_zone(2, {"x": 50, "y": 500, "w": 500, "h": 200}, ["Header", "Text"])]
result = _classify_boxes(boxes, content_width=1400)
assert result[0]["classification"] == "partial_width"
assert result[0]["side"] == "left"
def test_text_line_count(self):
"""Total text lines counted including \\n."""
boxes = [_make_box_zone(2, {"x": 50, "y": 500, "w": 500, "h": 200},
["Header", "Line1\nLine2\nLine3"])]
result = _classify_boxes(boxes, content_width=1400)
assert result[0]["total_lines"] == 4 # "Header" (1) + "Line1\nLine2\nLine3" (3)
class TestBuildUnifiedGrid:
def test_content_only(self):
"""Content zone without boxes → single unified zone."""
content = _make_content_zone([100, 147, 194, 241])
result = build_unified_grid([content], 1600, 2200, {})
assert result["is_unified"] is True
assert len(result["zones"]) == 1
assert result["zones"][0]["zone_type"] == "unified"
assert result["summary"]["total_rows"] == 4
def test_full_width_box_integration(self):
"""Full-width box rows are integrated into unified grid."""
content = _make_content_zone([100, 147, 194, 600, 647])
box = _make_box_zone(2, {"x": 50, "y": 300, "w": 1300, "h": 200},
["Box Header", "Box Row 1", "Box Row 2"])
result = build_unified_grid([content, box], 1600, 2200, {})
assert result["is_unified"] is True
total_rows = result["summary"]["total_rows"]
# 5 content rows + 3 box rows = 8
assert total_rows == 8
def test_box_cells_tagged(self):
"""Box-origin cells have source_zone_type and box_region."""
content = _make_content_zone([100, 147, 600, 647])
box = _make_box_zone(2, {"x": 50, "y": 300, "w": 1300, "h": 200}, ["Box Text"])
result = build_unified_grid([content, box], 1600, 2200, {})
box_cells = [c for c in result["zones"][0]["cells"] if c.get("source_zone_type") == "box"]
assert len(box_cells) > 0
assert box_cells[0]["box_region"]["bg_hex"] == "#2563eb"
def test_no_content_zone(self):
"""No content zone → returns zones as-is."""
box = _make_box_zone(2, {"x": 50, "y": 300, "w": 500, "h": 200}, ["Text"])
result = build_unified_grid([box], 1600, 2200, {})
assert "zones" in result