Cut at spine center (darkest point) instead of shadow edge

Refactor left/right shadow detection into shared _detect_spine_shadow() that finds the darkest column (= book spine center) via argmin of smoothed brightness. Both sides now cut at the spine center, ensuring equal page sizes in double-page scans regardless of shadow position. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-19 07:54:33 +01:00
parent e56391b0c3
commit 3fd6523872
1 changed files with 56 additions and 55 deletions
@@ -278,24 +278,31 @@ def detect_and_crop_page(
 # Edge detection helpers
 # ---------------------------------------------------------------------------

-def _detect_left_edge_shadow(
+def _detect_spine_shadow(
    gray: np.ndarray,
-    binary: np.ndarray,
+    search_region: np.ndarray,
+    offset_x: int,
    w: int,
-    h: int,
-) -> int:
-    """Detect left content edge, accounting for book-spine shadow.
+    side: str,
+) -> Optional[int]:
+    """Find the book spine center (darkest point) in a scanner shadow.

-    Strategy: look at the left 25% of the image.
-    1. Compute column-mean brightness in grayscale.
-    2. Smooth with a boxcar kernel.
-    3. Find the transition from shadow (dark) to page (bright).
-    4. Fallback: use binary vertical projection if no shadow detected.
+    The scanner produces a gray strip where the book spine presses against
+    the glass.  The darkest column in that strip is the spine center —
+    that's where we crop.
+
+    Args:
+        gray: Full grayscale image (for context).
+        search_region: Column slice of the grayscale image to search in.
+        offset_x: X offset of search_region relative to full image.
+        w: Full image width.
+        side: 'left' or 'right' (for logging).
+
+    Returns:
+        X coordinate (in full image) of the spine center, or None.
    """
-    search_w = max(1, w // 4)
-
-    # Column-mean brightness in the left quarter
-    col_means = np.mean(gray[:, :search_w], axis=0).astype(np.float64)
+    # Column-mean brightness in the search region
+    col_means = np.mean(search_region, axis=0).astype(np.float64)

    # Smooth with boxcar kernel (width = 1% of image width, min 5)
    kernel_size = max(5, w // 100)
@@ -304,20 +311,40 @@ def _detect_left_edge_shadow(
    kernel = np.ones(kernel_size) / kernel_size
    smoothed = np.convolve(col_means, kernel, mode="same")

-    # Determine brightness threshold: midpoint between darkest and brightest
    val_min = float(np.min(smoothed))
    val_max = float(np.max(smoothed))
    shadow_range = val_max - val_min

-    # Only use shadow detection if there is a meaningful brightness gradient (> 20 levels)
-    if shadow_range > 20:
-        threshold = val_min + shadow_range * 0.6
-        # Find first column where brightness exceeds threshold
-        above = np.where(smoothed >= threshold)[0]
-        if len(above) > 0:
-            shadow_edge = int(above[0])
-            logger.debug("Left edge: shadow detected at x=%d (range=%.0f)", shadow_edge, shadow_range)
-            return shadow_edge
+    # Only detect if meaningful brightness gradient (> 20 levels)
+    if shadow_range <= 20:
+        return None
+
+    # The darkest column is the spine center — crop exactly there
+    spine_local = int(np.argmin(smoothed))
+    spine_x = offset_x + spine_local
+
+    logger.debug(
+        "%s edge: spine center at x=%d (brightness=%.0f, range=%.0f)",
+        side.capitalize(), spine_x, val_min, shadow_range,
+    )
+    return spine_x
+
+
+def _detect_left_edge_shadow(
+    gray: np.ndarray,
+    binary: np.ndarray,
+    w: int,
+    h: int,
+) -> int:
+    """Detect left content edge, accounting for book-spine shadow.
+
+    Looks at the left 25% for a scanner gray strip.  Cuts at the
+    darkest column (= spine center).  Fallback: binary projection.
+    """
+    search_w = max(1, w // 4)
+    spine_x = _detect_spine_shadow(gray, gray[:, :search_w], 0, w, "left")
+    if spine_x is not None:
+        return spine_x

    # Fallback: binary vertical projection
    return _detect_edge_projection(binary, axis=0, from_start=True, dim=w)
@@ -331,40 +358,14 @@ def _detect_right_edge_shadow(
 ) -> int:
    """Detect right content edge, accounting for book-spine shadow.

-    Mirror of _detect_left_edge_shadow: look at the right 25% of the image
-    for a brightness dip (scanner gray strip at book spine).
-    The darkest point in the gradient marks the spine center; crop there.
+    Looks at the right 25% for a scanner gray strip.  Cuts at the
+    darkest column (= spine center).  Fallback: binary projection.
    """
    search_w = max(1, w // 4)
    right_start = w - search_w
-
-    # Column-mean brightness in the right quarter
-    col_means = np.mean(gray[:, right_start:], axis=0).astype(np.float64)
-
-    # Smooth with boxcar kernel (width = 1% of image width, min 5)
-    kernel_size = max(5, w // 100)
-    if kernel_size % 2 == 0:
-        kernel_size += 1
-    kernel = np.ones(kernel_size) / kernel_size
-    smoothed = np.convolve(col_means, kernel, mode="same")
-
-    # Determine brightness threshold: midpoint between darkest and brightest
-    val_min = float(np.min(smoothed))
-    val_max = float(np.max(smoothed))
-    shadow_range = val_max - val_min
-
-    # Only use shadow detection if there is a meaningful brightness gradient (> 20 levels)
-    if shadow_range > 20:
-        threshold = val_min + shadow_range * 0.6
-        # Find LAST column (from right) where brightness exceeds threshold
-        # = first column from right that drops below threshold marks the spine
-        above = np.where(smoothed >= threshold)[0]
-        if len(above) > 0:
-            # The last bright column before it drops into shadow
-            shadow_edge = right_start + int(above[-1])
-            logger.debug("Right edge: shadow detected at x=%d (range=%.0f)",
-                         shadow_edge, shadow_range)
-            return shadow_edge
+    spine_x = _detect_spine_shadow(gray, gray[:, right_start:], right_start, w, "right")
+    if spine_x is not None:
+        return spine_x

    # Fallback: binary vertical projection
    return _detect_edge_projection(binary, axis=0, from_start=False, dim=w)