Add camera gutter detection via vertical continuity analysis
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 45s
CI / test-go-edu-search (push) Successful in 32s
CI / test-python-klausur (push) Failing after 2m49s
CI / test-python-agent-core (push) Successful in 30s
CI / test-nodejs-website (push) Successful in 32s

Scanner shadow detection (range > 40, darkest < 180) fails on camera
book scans where the gutter shadow is subtle (range ~25, darkest ~214).

New _detect_gutter_continuity() detects gutters by their unique property:
the shadow runs continuously from top to bottom without interruption.
Divides the image into horizontal strips and checks what fraction of
strips are darker than the page median at each column. A gutter column
has >= 75% of strips darker. The transition point where the smoothed
dark fraction drops below 50% marks the crop boundary.

Integrated as fallback between scanner shadow and binary projection.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-11 13:58:14 +02:00
parent 9b5e8c6b35
commit 633e301bfd
2 changed files with 275 additions and 6 deletions

View File

@@ -457,6 +457,153 @@ def _detect_spine_shadow(
return spine_x
def _detect_gutter_continuity(
gray: np.ndarray,
search_region: np.ndarray,
offset_x: int,
w: int,
side: str,
) -> Optional[int]:
"""Detect gutter shadow via vertical continuity analysis.
Camera book scans produce a subtle brightness gradient at the gutter
that is too faint for scanner-shadow detection (range < 40). However,
the gutter shadow has a unique property: it runs **continuously from
top to bottom** without interruption. Text and images always have
vertical gaps between lines, paragraphs, or sections.
Algorithm:
1. Divide image into N horizontal strips (~60px each)
2. For each column, compute what fraction of strips are darker than
the page median (from the center 50% of the full image)
3. A "gutter column" has ≥ 75% of strips darker than page_median δ
4. Smooth the dark-fraction profile and find the transition point
from the edge inward where the fraction drops below 0.50
5. Validate: gutter band must be 0.5%-10% of image width
Args:
gray: Full grayscale image.
search_region: Edge slice of the grayscale image.
offset_x: X offset of search_region relative to full image.
w: Full image width.
side: 'left' or 'right'.
Returns:
X coordinate (in full image) of the gutter inner edge, or None.
"""
region_h, region_w = search_region.shape[:2]
if region_w < 20 or region_h < 100:
return None
# --- 1. Divide into horizontal strips ---
strip_target_h = 60 # ~60px per strip
n_strips = max(10, region_h // strip_target_h)
strip_h = region_h // n_strips
strip_means = np.zeros((n_strips, region_w), dtype=np.float64)
for s in range(n_strips):
y0 = s * strip_h
y1 = min((s + 1) * strip_h, region_h)
strip_means[s] = np.mean(search_region[y0:y1, :], axis=0)
# --- 2. Page median from center 50% of full image ---
center_lo = w // 4
center_hi = 3 * w // 4
page_median = float(np.median(gray[:, center_lo:center_hi]))
# Camera shadows are subtle — threshold just 5 levels below page median
dark_thresh = page_median - 5.0
# If page is very dark overall (e.g. photo, not a book page), bail out
if page_median < 180:
return None
# --- 3. Per-column dark fraction ---
dark_count = np.sum(strip_means < dark_thresh, axis=0).astype(np.float64)
dark_frac = dark_count / n_strips # shape: (region_w,)
# --- 4. Smooth and find transition ---
# Rolling mean (window = 1% of image width, min 5)
smooth_w = max(5, w // 100)
if smooth_w % 2 == 0:
smooth_w += 1
kernel = np.ones(smooth_w) / smooth_w
frac_smooth = np.convolve(dark_frac, kernel, mode="same")
# Trim convolution edges
margin = smooth_w // 2
if region_w <= 2 * margin + 10:
return None
# Scan from edge inward to find where frac drops below transition threshold
transition_thresh = 0.50
gutter_inner = None # local x in search_region
if side == "right":
# Scan from right edge (region_w - 1) inward (toward 0)
for x in range(region_w - 1 - margin, margin, -1):
if frac_smooth[x] < transition_thresh:
gutter_inner = x + 1 # crop just past the transition
break
else:
# Scan from left edge (0) inward (toward region_w)
for x in range(margin, region_w - margin):
if frac_smooth[x] < transition_thresh:
gutter_inner = x - 1
break
if gutter_inner is None:
return None
# --- 5. Validate gutter width ---
if side == "right":
gutter_width = region_w - gutter_inner
else:
gutter_width = gutter_inner
min_gutter = max(3, int(w * 0.005)) # at least 0.5% of image
max_gutter = int(w * 0.10) # at most 10% of image
if gutter_width < min_gutter:
logger.debug(
"%s gutter: too narrow (%dpx < %dpx)", side.capitalize(),
gutter_width, min_gutter,
)
return None
if gutter_width > max_gutter:
logger.debug(
"%s gutter: too wide (%dpx > %dpx)", side.capitalize(),
gutter_width, max_gutter,
)
return None
# Check that the gutter band is meaningfully darker than the page
if side == "right":
gutter_brightness = float(np.mean(strip_means[:, gutter_inner:]))
else:
gutter_brightness = float(np.mean(strip_means[:, :gutter_inner]))
brightness_drop = page_median - gutter_brightness
if brightness_drop < 3:
logger.debug(
"%s gutter: insufficient brightness drop (%.1f levels)",
side.capitalize(), brightness_drop,
)
return None
gutter_x = offset_x + gutter_inner
logger.info(
"%s gutter (continuity): x=%d, width=%dpx (%.1f%%), "
"brightness=%.0f vs page=%.0f (drop=%.0f), frac@edge=%.2f",
side.capitalize(), gutter_x, gutter_width,
100.0 * gutter_width / w, gutter_brightness, page_median,
brightness_drop, float(frac_smooth[gutter_inner]),
)
return gutter_x
def _detect_left_edge_shadow(
gray: np.ndarray,
binary: np.ndarray,
@@ -465,15 +612,22 @@ def _detect_left_edge_shadow(
) -> int:
"""Detect left content edge, accounting for book-spine shadow.
Looks at the left 25% for a scanner gray strip. Cuts at the
darkest column (= spine center). Fallback: binary projection.
Tries three methods in order:
1. Scanner spine-shadow (dark gradient, range > 40)
2. Camera gutter continuity (subtle shadow running top-to-bottom)
3. Binary projection fallback (first ink column)
"""
search_w = max(1, w // 4)
spine_x = _detect_spine_shadow(gray, gray[:, :search_w], 0, w, "left")
if spine_x is not None:
return spine_x
# Fallback: binary vertical projection
# Fallback 1: vertical continuity (camera gutter shadow)
gutter_x = _detect_gutter_continuity(gray, gray[:, :search_w], 0, w, "left")
if gutter_x is not None:
return gutter_x
# Fallback 2: binary vertical projection
return _detect_edge_projection(binary, axis=0, from_start=True, dim=w)
@@ -485,8 +639,10 @@ def _detect_right_edge_shadow(
) -> int:
"""Detect right content edge, accounting for book-spine shadow.
Looks at the right 25% for a scanner gray strip. Cuts at the
darkest column (= spine center). Fallback: binary projection.
Tries three methods in order:
1. Scanner spine-shadow (dark gradient, range > 40)
2. Camera gutter continuity (subtle shadow running top-to-bottom)
3. Binary projection fallback (last ink column)
"""
search_w = max(1, w // 4)
right_start = w - search_w
@@ -494,7 +650,12 @@ def _detect_right_edge_shadow(
if spine_x is not None:
return spine_x
# Fallback: binary vertical projection
# Fallback 1: vertical continuity (camera gutter shadow)
gutter_x = _detect_gutter_continuity(gray, gray[:, right_start:], right_start, w, "right")
if gutter_x is not None:
return gutter_x
# Fallback 2: binary vertical projection
return _detect_edge_projection(binary, axis=0, from_start=False, dim=w)

View File

@@ -18,6 +18,7 @@ from page_crop import (
detect_page_splits,
_detect_format,
_detect_edge_projection,
_detect_gutter_continuity,
_detect_left_edge_shadow,
_detect_right_edge_shadow,
_detect_spine_shadow,
@@ -564,3 +565,110 @@ class TestDetectPageSplits:
assert pages[0]["x"] == 0
total_w = sum(p["width"] for p in pages)
assert total_w == w, f"Total page width {total_w} != image width {w}"
# ---------------------------------------------------------------------------
# Tests: _detect_gutter_continuity (camera book scans)
# ---------------------------------------------------------------------------
def _make_camera_book_scan(h: int = 2400, w: int = 1700, gutter_side: str = "right") -> np.ndarray:
"""Create a synthetic camera book scan with a subtle gutter shadow.
Camera gutter shadows are much subtler than scanner shadows:
- Page brightness ~250 (well-lit)
- Gutter brightness ~210-230 (slight shadow)
- Shadow runs continuously from top to bottom
- Gradient is ~40px wide
"""
img = np.full((h, w, 3), 250, dtype=np.uint8)
# Add some variation to make it realistic
rng = np.random.RandomState(99)
# Subtle gutter gradient at the specified side
gutter_w = int(w * 0.04) # ~4% of width
gradient_w = int(w * 0.03) # transition zone
if gutter_side == "right":
gutter_start = w - gutter_w - gradient_w
for x in range(gutter_start, w):
dist_from_start = x - gutter_start
# Linear gradient from 250 down to 210
brightness = int(250 - 40 * min(dist_from_start / (gutter_w + gradient_w), 1.0))
img[:, x] = brightness
else:
gutter_end = gutter_w + gradient_w
for x in range(gutter_end):
dist_from_edge = gutter_end - x
brightness = int(250 - 40 * min(dist_from_edge / (gutter_w + gradient_w), 1.0))
img[:, x] = brightness
# Scatter some text (dark pixels) in the content area
content_left = gutter_end + 20 if gutter_side == "left" else 50
content_right = gutter_start - 20 if gutter_side == "right" else w - 50
for _ in range(800):
y = rng.randint(h // 10, h - h // 10)
x = rng.randint(content_left, content_right)
y2 = min(y + 3, h)
x2 = min(x + 15, w)
img[y:y2, x:x2] = 20
return img
class TestDetectGutterContinuity:
"""Tests for camera gutter shadow detection via vertical continuity."""
def test_detects_right_gutter(self):
"""Should detect a subtle gutter shadow on the right side."""
img = _make_camera_book_scan(gutter_side="right")
h, w = img.shape[:2]
gray = np.mean(img, axis=2).astype(np.uint8)
search_w = w // 4
right_start = w - search_w
result = _detect_gutter_continuity(
gray, gray[:, right_start:], right_start, w, "right",
)
assert result is not None
# Gutter starts roughly at 93% of width (w - 4% - 3%)
assert result > w * 0.85, f"Gutter x={result} too far left"
assert result < w * 0.98, f"Gutter x={result} too close to edge"
def test_detects_left_gutter(self):
"""Should detect a subtle gutter shadow on the left side."""
img = _make_camera_book_scan(gutter_side="left")
h, w = img.shape[:2]
gray = np.mean(img, axis=2).astype(np.uint8)
search_w = w // 4
result = _detect_gutter_continuity(
gray, gray[:, :search_w], 0, w, "left",
)
assert result is not None
assert result > w * 0.02, f"Gutter x={result} too close to edge"
assert result < w * 0.15, f"Gutter x={result} too far right"
def test_no_gutter_on_clean_page(self):
"""Should NOT detect a gutter on a uniformly bright page."""
img = np.full((2000, 1600, 3), 250, dtype=np.uint8)
# Add some text but no gutter
rng = np.random.RandomState(42)
for _ in range(500):
y = rng.randint(100, 1900)
x = rng.randint(100, 1500)
img[y:min(y+3, 2000), x:min(x+15, 1600)] = 20
gray = np.mean(img, axis=2).astype(np.uint8)
w = 1600
search_w = w // 4
right_start = w - search_w
result_r = _detect_gutter_continuity(gray, gray[:, right_start:], right_start, w, "right")
result_l = _detect_gutter_continuity(gray, gray[:, :search_w], 0, w, "left")
assert result_r is None, f"False positive on right: x={result_r}"
assert result_l is None, f"False positive on left: x={result_l}"
def test_integrated_with_crop(self):
"""End-to-end: detect_and_crop_page should crop at the gutter."""
img = _make_camera_book_scan(gutter_side="right")
cropped, result = detect_and_crop_page(img)
# The right border should be > 0 (gutter cropped)
right_border = result["border_fractions"]["right"]
assert right_border > 0.01, f"Right border {right_border} — gutter not cropped"