Cut at spine center (darkest point) instead of shadow edge
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 2m5s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 18s

Refactor left/right shadow detection into shared _detect_spine_shadow()
that finds the darkest column (= book spine center) via argmin of
smoothed brightness. Both sides now cut at the spine center, ensuring
equal page sizes in double-page scans regardless of shadow position.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-19 07:54:33 +01:00
parent e56391b0c3
commit 3fd6523872

View File

@@ -278,24 +278,31 @@ def detect_and_crop_page(
# Edge detection helpers # Edge detection helpers
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def _detect_left_edge_shadow( def _detect_spine_shadow(
gray: np.ndarray, gray: np.ndarray,
binary: np.ndarray, search_region: np.ndarray,
offset_x: int,
w: int, w: int,
h: int, side: str,
) -> int: ) -> Optional[int]:
"""Detect left content edge, accounting for book-spine shadow. """Find the book spine center (darkest point) in a scanner shadow.
Strategy: look at the left 25% of the image. The scanner produces a gray strip where the book spine presses against
1. Compute column-mean brightness in grayscale. the glass. The darkest column in that strip is the spine center —
2. Smooth with a boxcar kernel. that's where we crop.
3. Find the transition from shadow (dark) to page (bright).
4. Fallback: use binary vertical projection if no shadow detected. Args:
gray: Full grayscale image (for context).
search_region: Column slice of the grayscale image to search in.
offset_x: X offset of search_region relative to full image.
w: Full image width.
side: 'left' or 'right' (for logging).
Returns:
X coordinate (in full image) of the spine center, or None.
""" """
search_w = max(1, w // 4) # Column-mean brightness in the search region
col_means = np.mean(search_region, axis=0).astype(np.float64)
# Column-mean brightness in the left quarter
col_means = np.mean(gray[:, :search_w], axis=0).astype(np.float64)
# Smooth with boxcar kernel (width = 1% of image width, min 5) # Smooth with boxcar kernel (width = 1% of image width, min 5)
kernel_size = max(5, w // 100) kernel_size = max(5, w // 100)
@@ -304,20 +311,40 @@ def _detect_left_edge_shadow(
kernel = np.ones(kernel_size) / kernel_size kernel = np.ones(kernel_size) / kernel_size
smoothed = np.convolve(col_means, kernel, mode="same") smoothed = np.convolve(col_means, kernel, mode="same")
# Determine brightness threshold: midpoint between darkest and brightest
val_min = float(np.min(smoothed)) val_min = float(np.min(smoothed))
val_max = float(np.max(smoothed)) val_max = float(np.max(smoothed))
shadow_range = val_max - val_min shadow_range = val_max - val_min
# Only use shadow detection if there is a meaningful brightness gradient (> 20 levels) # Only detect if meaningful brightness gradient (> 20 levels)
if shadow_range > 20: if shadow_range <= 20:
threshold = val_min + shadow_range * 0.6 return None
# Find first column where brightness exceeds threshold
above = np.where(smoothed >= threshold)[0] # The darkest column is the spine center — crop exactly there
if len(above) > 0: spine_local = int(np.argmin(smoothed))
shadow_edge = int(above[0]) spine_x = offset_x + spine_local
logger.debug("Left edge: shadow detected at x=%d (range=%.0f)", shadow_edge, shadow_range)
return shadow_edge logger.debug(
"%s edge: spine center at x=%d (brightness=%.0f, range=%.0f)",
side.capitalize(), spine_x, val_min, shadow_range,
)
return spine_x
def _detect_left_edge_shadow(
gray: np.ndarray,
binary: np.ndarray,
w: int,
h: int,
) -> int:
"""Detect left content edge, accounting for book-spine shadow.
Looks at the left 25% for a scanner gray strip. Cuts at the
darkest column (= spine center). Fallback: binary projection.
"""
search_w = max(1, w // 4)
spine_x = _detect_spine_shadow(gray, gray[:, :search_w], 0, w, "left")
if spine_x is not None:
return spine_x
# Fallback: binary vertical projection # Fallback: binary vertical projection
return _detect_edge_projection(binary, axis=0, from_start=True, dim=w) return _detect_edge_projection(binary, axis=0, from_start=True, dim=w)
@@ -331,40 +358,14 @@ def _detect_right_edge_shadow(
) -> int: ) -> int:
"""Detect right content edge, accounting for book-spine shadow. """Detect right content edge, accounting for book-spine shadow.
Mirror of _detect_left_edge_shadow: look at the right 25% of the image Looks at the right 25% for a scanner gray strip. Cuts at the
for a brightness dip (scanner gray strip at book spine). darkest column (= spine center). Fallback: binary projection.
The darkest point in the gradient marks the spine center; crop there.
""" """
search_w = max(1, w // 4) search_w = max(1, w // 4)
right_start = w - search_w right_start = w - search_w
spine_x = _detect_spine_shadow(gray, gray[:, right_start:], right_start, w, "right")
# Column-mean brightness in the right quarter if spine_x is not None:
col_means = np.mean(gray[:, right_start:], axis=0).astype(np.float64) return spine_x
# Smooth with boxcar kernel (width = 1% of image width, min 5)
kernel_size = max(5, w // 100)
if kernel_size % 2 == 0:
kernel_size += 1
kernel = np.ones(kernel_size) / kernel_size
smoothed = np.convolve(col_means, kernel, mode="same")
# Determine brightness threshold: midpoint between darkest and brightest
val_min = float(np.min(smoothed))
val_max = float(np.max(smoothed))
shadow_range = val_max - val_min
# Only use shadow detection if there is a meaningful brightness gradient (> 20 levels)
if shadow_range > 20:
threshold = val_min + shadow_range * 0.6
# Find LAST column (from right) where brightness exceeds threshold
# = first column from right that drops below threshold marks the spine
above = np.where(smoothed >= threshold)[0]
if len(above) > 0:
# The last bright column before it drops into shadow
shadow_edge = right_start + int(above[-1])
logger.debug("Right edge: shadow detected at x=%d (range=%.0f)",
shadow_edge, shadow_range)
return shadow_edge
# Fallback: binary vertical projection # Fallback: binary vertical projection
return _detect_edge_projection(binary, axis=0, from_start=False, dim=w) return _detect_edge_projection(binary, axis=0, from_start=False, dim=w)