Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 25s
CI / test-go-edu-search (push) Successful in 25s
CI / test-python-klausur (push) Failing after 1m54s
CI / test-python-agent-core (push) Successful in 15s
CI / test-nodejs-website (push) Successful in 17s
1. page_crop: Score all dark runs by center-proximity × darkness × narrowness instead of picking the widest. Fixes ad810209 where a wide dark area at 35% was chosen over the actual spine at 50%. 2. cv_words_first: Replace x-center-only word→column assignment with overlap-based three-pass strategy (overlap → midpoint-range → nearest). Fixes truncated German translations like "Schal" instead of "Schal - die Schals" in session 079cd0d9. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
567 lines
22 KiB
Python
567 lines
22 KiB
Python
"""
|
|
Tests for page_crop.py — content-based crop algorithm.
|
|
|
|
Tests cover:
|
|
- Edge detection via ink projections
|
|
- Spine shadow detection for book scans
|
|
- Narrow run filtering
|
|
- Paper format detection
|
|
- Sanity checks (min area, min border)
|
|
- End-to-end crop on synthetic images
|
|
"""
|
|
|
|
import numpy as np
|
|
import pytest
|
|
|
|
from page_crop import (
|
|
detect_and_crop_page,
|
|
detect_page_splits,
|
|
_detect_format,
|
|
_detect_edge_projection,
|
|
_detect_left_edge_shadow,
|
|
_detect_right_edge_shadow,
|
|
_detect_spine_shadow,
|
|
_filter_narrow_runs,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helper: create synthetic images
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _make_white_image(h: int, w: int) -> np.ndarray:
|
|
"""Create a white BGR image."""
|
|
return np.full((h, w, 3), 255, dtype=np.uint8)
|
|
|
|
|
|
def _make_image_with_content(
|
|
h: int, w: int,
|
|
content_rect: tuple, # (y1, y2, x1, x2)
|
|
bg_color: int = 255,
|
|
content_color: int = 0,
|
|
) -> np.ndarray:
|
|
"""Create an image with a dark content rectangle on a light background."""
|
|
img = np.full((h, w, 3), bg_color, dtype=np.uint8)
|
|
y1, y2, x1, x2 = content_rect
|
|
img[y1:y2, x1:x2] = content_color
|
|
return img
|
|
|
|
|
|
def _make_book_scan(h: int = 1000, w: int = 800) -> np.ndarray:
|
|
"""Create a synthetic book scan with V-shaped spine shadow on the left.
|
|
|
|
Left region has a V-shaped brightness dip (spine center at ~5% of width):
|
|
x=0..spine_center: scanner bed or page edge (bright ~200) → spine (dark ~60)
|
|
x=spine_center..shadow_end: spine (dark ~60) → white paper (bright ~240)
|
|
Content area: scattered dark pixels (simulate text lines)
|
|
Top/bottom 5%: white margins
|
|
"""
|
|
img = np.full((h, w, 3), 240, dtype=np.uint8)
|
|
|
|
# V-shaped spine shadow: center at ~5% of width
|
|
spine_center = w * 5 // 100 # e.g. 40 for 800px
|
|
shadow_half_w = w * 6 // 100 # e.g. 48 for 800px
|
|
|
|
for x in range(spine_center + shadow_half_w + 1):
|
|
dist = abs(x - spine_center)
|
|
# Brightness dips from 200 (edge) to 60 (spine center)
|
|
brightness = int(60 + (200 - 60) * min(dist / shadow_half_w, 1.0))
|
|
img[:, x] = brightness
|
|
|
|
# Content area: scatter some dark pixels (simulate text)
|
|
content_top = h // 20 # 5% top margin
|
|
content_bottom = h - h // 20 # 5% bottom margin
|
|
content_left = spine_center + shadow_half_w + w // 20 # past shadow + margin
|
|
content_right = w - w // 20 # 5% right margin
|
|
|
|
rng = np.random.RandomState(42)
|
|
for _ in range(500):
|
|
y = rng.randint(content_top, content_bottom)
|
|
x = rng.randint(content_left, content_right)
|
|
# Small text-like blob
|
|
y2 = min(y + 3, h)
|
|
x2 = min(x + 10, w)
|
|
img[y:y2, x:x2] = 20
|
|
|
|
return img
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests: _filter_narrow_runs
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestFilterNarrowRuns:
|
|
def test_removes_short_runs(self):
|
|
mask = np.array([False, True, True, False, False, True, False])
|
|
result = _filter_narrow_runs(mask, min_run=3)
|
|
# The run [True, True] (length 2) and [True] (length 1) should be removed
|
|
assert not result.any()
|
|
|
|
def test_keeps_long_runs(self):
|
|
mask = np.array([False, True, True, True, True, False])
|
|
result = _filter_narrow_runs(mask, min_run=3)
|
|
expected = np.array([False, True, True, True, True, False])
|
|
np.testing.assert_array_equal(result, expected)
|
|
|
|
def test_min_run_1_keeps_all(self):
|
|
mask = np.array([True, False, True])
|
|
result = _filter_narrow_runs(mask, min_run=1)
|
|
np.testing.assert_array_equal(result, mask)
|
|
|
|
def test_empty_mask(self):
|
|
mask = np.array([], dtype=bool)
|
|
result = _filter_narrow_runs(mask, min_run=5)
|
|
assert len(result) == 0
|
|
|
|
def test_mixed_runs(self):
|
|
mask = np.array([True, False, True, True, True, True, True, False, True, True])
|
|
result = _filter_narrow_runs(mask, min_run=3)
|
|
# Run of 1 at [0]: removed
|
|
# Run of 5 at [2:7]: kept
|
|
# Run of 2 at [8:10]: removed
|
|
expected = np.array([False, False, True, True, True, True, True, False, False, False])
|
|
np.testing.assert_array_equal(result, expected)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests: _detect_format
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestDetectFormat:
|
|
def test_a4_portrait(self):
|
|
fmt, conf = _detect_format(210, 297)
|
|
assert fmt == "A4"
|
|
assert conf > 0.8
|
|
|
|
def test_a4_landscape(self):
|
|
fmt, conf = _detect_format(297, 210)
|
|
assert fmt == "A4"
|
|
assert conf > 0.8
|
|
|
|
def test_letter(self):
|
|
fmt, conf = _detect_format(850, 1100)
|
|
assert fmt == "Letter"
|
|
assert conf > 0.5
|
|
|
|
def test_unknown_square(self):
|
|
fmt, conf = _detect_format(100, 100)
|
|
# Aspect ratio 1.0 doesn't match any paper format well
|
|
assert fmt == "unknown" or conf < 0.5
|
|
|
|
def test_zero_dimensions(self):
|
|
fmt, conf = _detect_format(0, 100)
|
|
assert fmt == "unknown"
|
|
assert conf == 0.0
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests: _detect_edge_projection
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestDetectEdgeProjection:
|
|
def test_finds_first_ink_column(self):
|
|
"""Binary image with ink starting at column 50."""
|
|
binary = np.zeros((100, 200), dtype=np.uint8)
|
|
binary[10:90, 50:180] = 255 # Content from x=50 to x=180
|
|
|
|
edge = _detect_edge_projection(binary, axis=0, from_start=True, dim=200)
|
|
assert edge == 50
|
|
|
|
def test_finds_last_ink_column(self):
|
|
binary = np.zeros((100, 200), dtype=np.uint8)
|
|
binary[10:90, 50:180] = 255
|
|
|
|
edge = _detect_edge_projection(binary, axis=0, from_start=False, dim=200)
|
|
assert edge == 179 # last column with ink
|
|
|
|
def test_finds_first_ink_row(self):
|
|
binary = np.zeros((200, 100), dtype=np.uint8)
|
|
binary[30:170, 10:90] = 255
|
|
|
|
edge = _detect_edge_projection(binary, axis=1, from_start=True, dim=200)
|
|
assert edge == 30
|
|
|
|
def test_finds_last_ink_row(self):
|
|
binary = np.zeros((200, 100), dtype=np.uint8)
|
|
binary[30:170, 10:90] = 255
|
|
|
|
edge = _detect_edge_projection(binary, axis=1, from_start=False, dim=200)
|
|
assert edge == 169
|
|
|
|
def test_empty_image_returns_boundary(self):
|
|
binary = np.zeros((100, 100), dtype=np.uint8)
|
|
assert _detect_edge_projection(binary, axis=0, from_start=True, dim=100) == 0
|
|
assert _detect_edge_projection(binary, axis=0, from_start=False, dim=100) == 100
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests: _detect_left_edge_shadow
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestDetectSpineShadow:
|
|
def test_detects_real_spine_v_shape(self):
|
|
"""V-shaped brightness dip (real spine shadow) should be detected."""
|
|
h, w = 500, 800
|
|
gray = np.full((h, w), 240, dtype=np.uint8)
|
|
# Create a V-shaped spine shadow in the left 25% (200px)
|
|
# Center of spine at x=30, brightness dips to 80
|
|
for x in range(80):
|
|
dist_from_center = abs(x - 30)
|
|
brightness = int(80 + (240 - 80) * min(dist_from_center / 40, 1.0))
|
|
gray[:, x] = brightness
|
|
|
|
search_region = gray[:, :200]
|
|
result = _detect_spine_shadow(gray, search_region, 0, w, "left")
|
|
# Should find the spine near x=30
|
|
assert result is not None
|
|
assert 20 <= result <= 40
|
|
|
|
def test_rejects_text_content_edge(self):
|
|
"""Sharp text edge (white margin → dense text) should NOT trigger."""
|
|
h, w = 500, 800
|
|
gray = np.full((h, w), 240, dtype=np.uint8)
|
|
# Simulate text content starting at x=60: columns 60+ have
|
|
# alternating bright/dark rows (text lines) → mean ~170
|
|
for x in range(60, 200):
|
|
for y_start in range(0, h, 20):
|
|
gray[y_start:min(y_start + 8, h), x] = 30 # text line
|
|
|
|
search_region = gray[:, :200]
|
|
result = _detect_spine_shadow(gray, search_region, 0, w, "left")
|
|
# Should NOT detect a spine — this is text content, not a shadow
|
|
assert result is None
|
|
|
|
def test_rejects_uniform_region(self):
|
|
"""Uniform brightness region (no shadow) should NOT trigger."""
|
|
h, w = 500, 800
|
|
gray = np.full((h, w), 230, dtype=np.uint8)
|
|
search_region = gray[:, :200]
|
|
result = _detect_spine_shadow(gray, search_region, 0, w, "left")
|
|
assert result is None
|
|
|
|
def test_rejects_bright_minimum(self):
|
|
"""Region where darkest column is still bright (>180) should NOT trigger."""
|
|
h, w = 500, 800
|
|
gray = np.full((h, w), 240, dtype=np.uint8)
|
|
# Slight variation but everything stays bright
|
|
gray[:, 50:80] = 195
|
|
search_region = gray[:, :200]
|
|
result = _detect_spine_shadow(gray, search_region, 0, w, "left")
|
|
assert result is None
|
|
|
|
def test_right_side_spine(self):
|
|
"""V-shaped spine shadow in right search region should be detected."""
|
|
h, w = 500, 800
|
|
gray = np.full((h, w), 240, dtype=np.uint8)
|
|
# Spine shadow at x=750 (right side)
|
|
for x in range(680, 800):
|
|
dist_from_center = abs(x - 750)
|
|
brightness = int(80 + (240 - 80) * min(dist_from_center / 40, 1.0))
|
|
gray[:, x] = brightness
|
|
|
|
right_start = w - w // 4 # 600
|
|
search_region = gray[:, right_start:]
|
|
result = _detect_spine_shadow(gray, search_region, right_start, w, "right")
|
|
assert result is not None
|
|
assert 740 <= result <= 760
|
|
|
|
|
|
class TestDetectLeftEdgeShadow:
|
|
def test_detects_shadow_gradient(self):
|
|
"""Synthetic image with left-side V-shaped shadow gradient."""
|
|
h, w = 500, 400
|
|
gray = np.full((h, w), 240, dtype=np.uint8)
|
|
binary = np.zeros((h, w), dtype=np.uint8)
|
|
|
|
# V-shaped shadow: center at x=20, dips to brightness 60
|
|
shadow_center = 20
|
|
shadow_half_w = 30
|
|
for x in range(shadow_center + shadow_half_w):
|
|
dist = abs(x - shadow_center)
|
|
brightness = int(60 + (240 - 60) * min(dist / shadow_half_w, 1.0))
|
|
gray[:, x] = brightness
|
|
|
|
# Content starts after shadow
|
|
binary[:, shadow_center + shadow_half_w + 10:w - 10] = 255
|
|
|
|
edge = _detect_left_edge_shadow(gray, binary, w, h)
|
|
# Edge should be near the spine center (x~20)
|
|
assert 10 <= edge <= 35
|
|
|
|
def test_no_shadow_uses_binary_fallback(self):
|
|
"""When shadow range is small, falls back to binary projection."""
|
|
h, w = 400, 400
|
|
gray = np.full((h, w), 200, dtype=np.uint8)
|
|
binary = np.zeros((h, w), dtype=np.uint8)
|
|
# Content block from x=80 onward (large enough to survive noise filtering)
|
|
binary[50:350, 80:380] = 255
|
|
|
|
edge = _detect_left_edge_shadow(gray, binary, w, h)
|
|
# Should find content start via projection fallback (near x=80)
|
|
assert edge <= 85
|
|
|
|
def test_text_content_uses_binary_fallback(self):
|
|
"""Dense text in left region should NOT trigger spine detection."""
|
|
h, w = 500, 800
|
|
gray = np.full((h, w), 240, dtype=np.uint8)
|
|
binary = np.zeros((h, w), dtype=np.uint8)
|
|
|
|
# Simulate text content from x=50 onward
|
|
for x in range(50, w - 20):
|
|
for y_start in range(20, h - 20, 20):
|
|
gray[y_start:min(y_start + 8, h), x] = 30
|
|
binary[y_start:min(y_start + 8, h), x] = 255
|
|
|
|
edge = _detect_left_edge_shadow(gray, binary, w, h)
|
|
# Should use binary fallback and find content at ~x=50
|
|
assert 40 <= edge <= 60
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests: detect_and_crop_page (end-to-end)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestDetectAndCropPage:
|
|
def test_no_crop_needed_all_content(self):
|
|
"""Image that is all content — no borders to crop."""
|
|
img = np.full((100, 80, 3), 40, dtype=np.uint8) # Dark content everywhere
|
|
cropped, result = detect_and_crop_page(img)
|
|
# Should return original (all borders < 2%)
|
|
assert not result["crop_applied"]
|
|
assert result["cropped_size"] == {"width": 80, "height": 100}
|
|
|
|
def test_crops_white_borders(self):
|
|
"""Image with white borders around dark content."""
|
|
h, w = 400, 300
|
|
# Content area big enough to pass the 40% sanity check
|
|
img = _make_image_with_content(h, w, (40, 360, 30, 270))
|
|
|
|
cropped, result = detect_and_crop_page(img)
|
|
assert result["crop_applied"]
|
|
# Cropped size should be close to the content area (with margin)
|
|
assert result["cropped_size"]["width"] < w
|
|
assert result["cropped_size"]["height"] < h
|
|
|
|
def test_book_scan_detects_spine_shadow(self):
|
|
"""Synthetic book scan with spine shadow on left."""
|
|
img = _make_book_scan(1000, 800)
|
|
cropped, result = detect_and_crop_page(img)
|
|
|
|
# Should crop the spine shadow area
|
|
left_border = result["border_fractions"]["left"]
|
|
# Spine center is at ~5% of width, so left border should be >= 4%
|
|
assert left_border >= 0.04 # At least 4% left border detected
|
|
|
|
def test_sanity_check_too_small_crop(self):
|
|
"""If detected content area is too small, skip crop."""
|
|
h, w = 500, 500
|
|
# Tiny content area (5x5 pixels) — should fail sanity check
|
|
img = _make_white_image(h, w)
|
|
# Add tiny dark spot
|
|
img[248:253, 248:253] = 0
|
|
|
|
cropped, result = detect_and_crop_page(img)
|
|
# Should either not crop or crop is too small (< 40%)
|
|
if result["crop_applied"]:
|
|
crop_area = result["cropped_size"]["width"] * result["cropped_size"]["height"]
|
|
assert crop_area >= 0.4 * h * w
|
|
|
|
def test_crop_preserves_content(self):
|
|
"""Verify that content is preserved after cropping."""
|
|
h, w = 300, 200
|
|
img = _make_image_with_content(h, w, (50, 250, 40, 160))
|
|
cropped, result = detect_and_crop_page(img)
|
|
|
|
if result["crop_applied"]:
|
|
# Cropped image should contain dark pixels (content)
|
|
gray = np.mean(cropped, axis=2)
|
|
assert np.min(gray) < 50 # Content is dark
|
|
|
|
def test_result_structure(self):
|
|
"""Verify all expected keys are present in result dict."""
|
|
img = _make_white_image(100, 100)
|
|
_, result = detect_and_crop_page(img)
|
|
|
|
assert "crop_applied" in result
|
|
assert "original_size" in result
|
|
assert "cropped_size" in result
|
|
assert "border_fractions" in result
|
|
assert "detected_format" in result
|
|
assert "format_confidence" in result
|
|
assert "aspect_ratio" in result
|
|
|
|
def test_margin_parameter(self):
|
|
"""Custom margin_frac should affect crop bounds."""
|
|
h, w = 400, 300
|
|
img = _make_image_with_content(h, w, (80, 320, 60, 240))
|
|
|
|
_, result_small = detect_and_crop_page(img, margin_frac=0.005)
|
|
_, result_large = detect_and_crop_page(img, margin_frac=0.05)
|
|
|
|
if result_small["crop_applied"] and result_large["crop_applied"]:
|
|
# Larger margin should produce a larger crop
|
|
small_area = result_small["cropped_size"]["width"] * result_small["cropped_size"]["height"]
|
|
large_area = result_large["cropped_size"]["width"] * result_large["cropped_size"]["height"]
|
|
assert large_area >= small_area
|
|
|
|
def test_crop_rect_pct_values(self):
|
|
"""crop_rect_pct values should be in 0-100 range."""
|
|
h, w = 400, 300
|
|
img = _make_image_with_content(h, w, (80, 320, 60, 240))
|
|
_, result = detect_and_crop_page(img)
|
|
|
|
if result["crop_applied"] and result["crop_rect_pct"]:
|
|
pct = result["crop_rect_pct"]
|
|
assert 0 <= pct["x"] <= 100
|
|
assert 0 <= pct["y"] <= 100
|
|
assert 0 < pct["width"] <= 100
|
|
assert 0 < pct["height"] <= 100
|
|
|
|
|
|
class TestCropDeterminism:
|
|
"""A3: Verify that page crop produces identical results across N runs."""
|
|
|
|
@pytest.mark.parametrize("image_factory,desc", [
|
|
(
|
|
lambda: _make_image_with_content(800, 600, (100, 700, 80, 520)),
|
|
"standard content",
|
|
),
|
|
(
|
|
lambda: _make_book_scan(1000, 800),
|
|
"book scan with spine shadow",
|
|
),
|
|
])
|
|
def test_determinism_10_runs(self, image_factory, desc):
|
|
"""Same image must produce identical crops in 10 consecutive runs."""
|
|
img = image_factory()
|
|
results = []
|
|
for _ in range(10):
|
|
cropped, result = detect_and_crop_page(img.copy())
|
|
results.append({
|
|
"crop_applied": result["crop_applied"],
|
|
"cropped_size": result["cropped_size"],
|
|
"border_fractions": result["border_fractions"],
|
|
"shape": cropped.shape,
|
|
})
|
|
|
|
first = results[0]
|
|
for i, r in enumerate(results[1:], 1):
|
|
assert r["crop_applied"] == first["crop_applied"], (
|
|
f"Run {i} crop_applied differs from run 0 ({desc})"
|
|
)
|
|
assert r["cropped_size"] == first["cropped_size"], (
|
|
f"Run {i} cropped_size differs from run 0 ({desc})"
|
|
)
|
|
assert r["shape"] == first["shape"], (
|
|
f"Run {i} output shape differs from run 0 ({desc})"
|
|
)
|
|
|
|
def test_determinism_pixel_identical(self):
|
|
"""Crop output pixels must be identical across runs."""
|
|
img = _make_image_with_content(800, 600, (100, 700, 80, 520))
|
|
ref_crop, _ = detect_and_crop_page(img.copy())
|
|
|
|
for i in range(5):
|
|
crop, _ = detect_and_crop_page(img.copy())
|
|
assert np.array_equal(ref_crop, crop), (
|
|
f"Run {i} produced different pixel output"
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests: detect_page_splits — spine scoring logic
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _make_book_spread(h: int = 1616, w: int = 2288) -> np.ndarray:
|
|
"""Create a synthetic landscape book spread (two pages side by side).
|
|
|
|
Simulates the ad810209 failure case:
|
|
- A narrow spine shadow near the center (~50% of width)
|
|
- A wider dark area off-center (~35% of width), simulating a text column
|
|
- Bright paper flanking the spine on both sides
|
|
"""
|
|
img = np.full((h, w, 3), 230, dtype=np.uint8)
|
|
|
|
# --- Spine shadow: narrow dark valley centered at x = w/2 (1144) ---
|
|
spine_center = w // 2
|
|
spine_half_w = 30 # ~60px wide total
|
|
for x in range(spine_center - spine_half_w, spine_center + spine_half_w + 1):
|
|
dist = abs(x - spine_center)
|
|
# Brightness dips from 230 (paper) to 130 (spine)
|
|
brightness = int(130 + (230 - 130) * min(dist / spine_half_w, 1.0))
|
|
img[:, x] = brightness
|
|
|
|
# --- Off-center dark area at ~35% of width (x=799), wider than spine ---
|
|
dark_center = int(w * 0.35)
|
|
dark_half_w = 80 # ~160px wide total (wider than spine)
|
|
for x in range(dark_center - dark_half_w, dark_center + dark_half_w + 1):
|
|
dist = abs(x - dark_center)
|
|
# Brightness dips from 230 to 140 (slightly less dark than spine)
|
|
brightness = int(140 + (230 - 140) * min(dist / dark_half_w, 1.0))
|
|
img[:, x] = min(img[0, x, 0], brightness) # don't overwrite spine if overlapping
|
|
|
|
return img
|
|
|
|
|
|
class TestDetectPageSplits:
|
|
def test_portrait_image_returns_empty(self):
|
|
"""Portrait images (width < height * 1.15) should not be split."""
|
|
img = np.full((1000, 800, 3), 200, dtype=np.uint8)
|
|
assert detect_page_splits(img) == []
|
|
|
|
def test_uniform_image_returns_empty(self):
|
|
"""Uniform brightness image should not detect any spine."""
|
|
img = np.full((800, 1600, 3), 220, dtype=np.uint8)
|
|
assert detect_page_splits(img) == []
|
|
|
|
def test_prefers_centered_spine_over_wider_offcenter_dark(self):
|
|
"""Scoring should pick the centered narrow spine over a wider off-center dark area.
|
|
|
|
This is the regression test for session ad810209 where the old algorithm
|
|
picked x=799 (35%) instead of x=1144 (50%).
|
|
"""
|
|
img = _make_book_spread(h=1616, w=2288)
|
|
pages = detect_page_splits(img)
|
|
|
|
assert len(pages) == 2, f"Expected 2 pages, got {len(pages)}"
|
|
|
|
# Split point should be near the center (x ~ 1144), not at ~799
|
|
split_x = pages[0]["width"] # pages[0] width = split point
|
|
center = 2288 / 2 # 1144
|
|
|
|
assert abs(split_x - center) < 100, (
|
|
f"Split at x={split_x}, expected near center {center:.0f}. "
|
|
f"Old bug would have split at ~799."
|
|
)
|
|
|
|
def test_split_produces_two_reasonable_pages(self):
|
|
"""Both pages should be at least 15% of total width."""
|
|
img = _make_book_spread()
|
|
pages = detect_page_splits(img)
|
|
|
|
if len(pages) == 2:
|
|
w = img.shape[1]
|
|
for p in pages:
|
|
assert p["width"] >= w * 0.15, (
|
|
f"Page {p['page_index']} too narrow: {p['width']}px "
|
|
f"(< {w * 0.15:.0f}px)"
|
|
)
|
|
|
|
def test_page_indices_sequential(self):
|
|
"""Page indices should be 0, 1, ..."""
|
|
img = _make_book_spread()
|
|
pages = detect_page_splits(img)
|
|
if pages:
|
|
indices = [p["page_index"] for p in pages]
|
|
assert indices == list(range(len(pages)))
|
|
|
|
def test_pages_cover_full_width(self):
|
|
"""Pages should cover the full image width without gaps or overlaps."""
|
|
img = _make_book_spread()
|
|
pages = detect_page_splits(img)
|
|
if len(pages) >= 2:
|
|
w = img.shape[1]
|
|
assert pages[0]["x"] == 0
|
|
total_w = sum(p["width"] for p in pages)
|
|
assert total_w == w, f"Total page width {total_w} != image width {w}"
|