Cut at spine center (darkest point) instead of shadow edge

Refactor left/right shadow detection into shared _detect_spine_shadow() that finds the darkest column (= book spine center) via argmin of smoothed brightness. Both sides now cut at the spine center, ensuring equal page sizes in double-page scans regardless of shadow position. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-19 07:54:33 +01:00
parent e56391b0c3
commit 3fd6523872
1 changed files with 56 additions and 55 deletions
@@ -278,24 +278,31 @@ def detect_and_crop_page(
 # Edge detection helpers
 # ---------------------------------------------------------------------------
-def _detect_left_edge_shadow(
+def _detect_spine_shadow(
    gray: np.ndarray,
-    binary: np.ndarray,
+    search_region: np.ndarray,
    offset_x: int,
    w: int,
-    h: int,
+    side: str,
-) -> int:
+) -> Optional[int]:
-    """Detect left content edge, accounting for book-spine shadow.
+    """Find the book spine center (darkest point) in a scanner shadow.
-    Strategy: look at the left 25% of the image.
+    The scanner produces a gray strip where the book spine presses against
-    1. Compute column-mean brightness in grayscale.
+    the glass.  The darkest column in that strip is the spine center —
-    2. Smooth with a boxcar kernel.
+    that's where we crop.
-    3. Find the transition from shadow (dark) to page (bright).
+
-    4. Fallback: use binary vertical projection if no shadow detected.
+    Args:
        gray: Full grayscale image (for context).
        search_region: Column slice of the grayscale image to search in.
        offset_x: X offset of search_region relative to full image.
        w: Full image width.
        side: 'left' or 'right' (for logging).
    Returns:
        X coordinate (in full image) of the spine center, or None.
    """
-    search_w = max(1, w // 4)
+    # Column-mean brightness in the search region
-
+    col_means = np.mean(search_region, axis=0).astype(np.float64)
    # Column-mean brightness in the left quarter
    col_means = np.mean(gray[:, :search_w], axis=0).astype(np.float64)
    # Smooth with boxcar kernel (width = 1% of image width, min 5)
    kernel_size = max(5, w // 100)
@@ -304,20 +311,40 @@ def _detect_left_edge_shadow(
    kernel = np.ones(kernel_size) / kernel_size
    smoothed = np.convolve(col_means, kernel, mode="same")
    # Determine brightness threshold: midpoint between darkest and brightest
    val_min = float(np.min(smoothed))
    val_max = float(np.max(smoothed))
    shadow_range = val_max - val_min
-    # Only use shadow detection if there is a meaningful brightness gradient (> 20 levels)
+    # Only detect if meaningful brightness gradient (> 20 levels)
-    if shadow_range > 20:
+    if shadow_range <= 20:
-        threshold = val_min + shadow_range * 0.6
+        return None
-        # Find first column where brightness exceeds threshold
+
-        above = np.where(smoothed >= threshold)[0]
+    # The darkest column is the spine center — crop exactly there
-        if len(above) > 0:
+    spine_local = int(np.argmin(smoothed))
-            shadow_edge = int(above[0])
+    spine_x = offset_x + spine_local
-            logger.debug("Left edge: shadow detected at x=%d (range=%.0f)", shadow_edge, shadow_range)
+
-            return shadow_edge
+    logger.debug(
        "%s edge: spine center at x=%d (brightness=%.0f, range=%.0f)",
        side.capitalize(), spine_x, val_min, shadow_range,
    )
    return spine_x
 def _detect_left_edge_shadow(
    gray: np.ndarray,
    binary: np.ndarray,
    w: int,
    h: int,
 ) -> int:
    """Detect left content edge, accounting for book-spine shadow.
    Looks at the left 25% for a scanner gray strip.  Cuts at the
    darkest column (= spine center).  Fallback: binary projection.
    """
    search_w = max(1, w // 4)
    spine_x = _detect_spine_shadow(gray, gray[:, :search_w], 0, w, "left")
    if spine_x is not None:
        return spine_x
    # Fallback: binary vertical projection
    return _detect_edge_projection(binary, axis=0, from_start=True, dim=w)
@@ -331,40 +358,14 @@ def _detect_right_edge_shadow(
 ) -> int:
    """Detect right content edge, accounting for book-spine shadow.
-    Mirror of _detect_left_edge_shadow: look at the right 25% of the image
+    Looks at the right 25% for a scanner gray strip.  Cuts at the
-    for a brightness dip (scanner gray strip at book spine).
+    darkest column (= spine center).  Fallback: binary projection.
    The darkest point in the gradient marks the spine center; crop there.
    """
    search_w = max(1, w // 4)
    right_start = w - search_w
-
+    spine_x = _detect_spine_shadow(gray, gray[:, right_start:], right_start, w, "right")
-    # Column-mean brightness in the right quarter
+    if spine_x is not None:
-    col_means = np.mean(gray[:, right_start:], axis=0).astype(np.float64)
+        return spine_x
    # Smooth with boxcar kernel (width = 1% of image width, min 5)
    kernel_size = max(5, w // 100)
    if kernel_size % 2 == 0:
        kernel_size += 1
    kernel = np.ones(kernel_size) / kernel_size
    smoothed = np.convolve(col_means, kernel, mode="same")
    # Determine brightness threshold: midpoint between darkest and brightest
    val_min = float(np.min(smoothed))
    val_max = float(np.max(smoothed))
    shadow_range = val_max - val_min
    # Only use shadow detection if there is a meaningful brightness gradient (> 20 levels)
    if shadow_range > 20:
        threshold = val_min + shadow_range * 0.6
        # Find LAST column (from right) where brightness exceeds threshold
        # = first column from right that drops below threshold marks the spine
        above = np.where(smoothed >= threshold)[0]
        if len(above) > 0:
            # The last bright column before it drops into shadow
            shadow_edge = right_start + int(above[-1])
            logger.debug("Right edge: shadow detected at x=%d (range=%.0f)",
                         shadow_edge, shadow_range)
            return shadow_edge
    # Fallback: binary vertical projection
    return _detect_edge_projection(binary, axis=0, from_start=False, dim=w)