Cut at spine center (darkest point) instead of shadow edge
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 2m5s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 18s

Refactor left/right shadow detection into shared _detect_spine_shadow()
that finds the darkest column (= book spine center) via argmin of
smoothed brightness. Both sides now cut at the spine center, ensuring
equal page sizes in double-page scans regardless of shadow position.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-19 07:54:33 +01:00
parent e56391b0c3
commit 3fd6523872

View File

@@ -278,24 +278,31 @@ def detect_and_crop_page(
# Edge detection helpers
# ---------------------------------------------------------------------------
def _detect_left_edge_shadow(
def _detect_spine_shadow(
gray: np.ndarray,
binary: np.ndarray,
search_region: np.ndarray,
offset_x: int,
w: int,
h: int,
) -> int:
"""Detect left content edge, accounting for book-spine shadow.
side: str,
) -> Optional[int]:
"""Find the book spine center (darkest point) in a scanner shadow.
Strategy: look at the left 25% of the image.
1. Compute column-mean brightness in grayscale.
2. Smooth with a boxcar kernel.
3. Find the transition from shadow (dark) to page (bright).
4. Fallback: use binary vertical projection if no shadow detected.
The scanner produces a gray strip where the book spine presses against
the glass. The darkest column in that strip is the spine center —
that's where we crop.
Args:
gray: Full grayscale image (for context).
search_region: Column slice of the grayscale image to search in.
offset_x: X offset of search_region relative to full image.
w: Full image width.
side: 'left' or 'right' (for logging).
Returns:
X coordinate (in full image) of the spine center, or None.
"""
search_w = max(1, w // 4)
# Column-mean brightness in the left quarter
col_means = np.mean(gray[:, :search_w], axis=0).astype(np.float64)
# Column-mean brightness in the search region
col_means = np.mean(search_region, axis=0).astype(np.float64)
# Smooth with boxcar kernel (width = 1% of image width, min 5)
kernel_size = max(5, w // 100)
@@ -304,20 +311,40 @@ def _detect_left_edge_shadow(
kernel = np.ones(kernel_size) / kernel_size
smoothed = np.convolve(col_means, kernel, mode="same")
# Determine brightness threshold: midpoint between darkest and brightest
val_min = float(np.min(smoothed))
val_max = float(np.max(smoothed))
shadow_range = val_max - val_min
# Only use shadow detection if there is a meaningful brightness gradient (> 20 levels)
if shadow_range > 20:
threshold = val_min + shadow_range * 0.6
# Find first column where brightness exceeds threshold
above = np.where(smoothed >= threshold)[0]
if len(above) > 0:
shadow_edge = int(above[0])
logger.debug("Left edge: shadow detected at x=%d (range=%.0f)", shadow_edge, shadow_range)
return shadow_edge
# Only detect if meaningful brightness gradient (> 20 levels)
if shadow_range <= 20:
return None
# The darkest column is the spine center — crop exactly there
spine_local = int(np.argmin(smoothed))
spine_x = offset_x + spine_local
logger.debug(
"%s edge: spine center at x=%d (brightness=%.0f, range=%.0f)",
side.capitalize(), spine_x, val_min, shadow_range,
)
return spine_x
def _detect_left_edge_shadow(
gray: np.ndarray,
binary: np.ndarray,
w: int,
h: int,
) -> int:
"""Detect left content edge, accounting for book-spine shadow.
Looks at the left 25% for a scanner gray strip. Cuts at the
darkest column (= spine center). Fallback: binary projection.
"""
search_w = max(1, w // 4)
spine_x = _detect_spine_shadow(gray, gray[:, :search_w], 0, w, "left")
if spine_x is not None:
return spine_x
# Fallback: binary vertical projection
return _detect_edge_projection(binary, axis=0, from_start=True, dim=w)
@@ -331,40 +358,14 @@ def _detect_right_edge_shadow(
) -> int:
"""Detect right content edge, accounting for book-spine shadow.
Mirror of _detect_left_edge_shadow: look at the right 25% of the image
for a brightness dip (scanner gray strip at book spine).
The darkest point in the gradient marks the spine center; crop there.
Looks at the right 25% for a scanner gray strip. Cuts at the
darkest column (= spine center). Fallback: binary projection.
"""
search_w = max(1, w // 4)
right_start = w - search_w
# Column-mean brightness in the right quarter
col_means = np.mean(gray[:, right_start:], axis=0).astype(np.float64)
# Smooth with boxcar kernel (width = 1% of image width, min 5)
kernel_size = max(5, w // 100)
if kernel_size % 2 == 0:
kernel_size += 1
kernel = np.ones(kernel_size) / kernel_size
smoothed = np.convolve(col_means, kernel, mode="same")
# Determine brightness threshold: midpoint between darkest and brightest
val_min = float(np.min(smoothed))
val_max = float(np.max(smoothed))
shadow_range = val_max - val_min
# Only use shadow detection if there is a meaningful brightness gradient (> 20 levels)
if shadow_range > 20:
threshold = val_min + shadow_range * 0.6
# Find LAST column (from right) where brightness exceeds threshold
# = first column from right that drops below threshold marks the spine
above = np.where(smoothed >= threshold)[0]
if len(above) > 0:
# The last bright column before it drops into shadow
shadow_edge = right_start + int(above[-1])
logger.debug("Right edge: shadow detected at x=%d (range=%.0f)",
shadow_edge, shadow_range)
return shadow_edge
spine_x = _detect_spine_shadow(gray, gray[:, right_start:], right_start, w, "right")
if spine_x is not None:
return spine_x
# Fallback: binary vertical projection
return _detect_edge_projection(binary, axis=0, from_start=False, dim=w)