fix: detect spine by brightness, not ink density

The previous algorithm used binary ink projection and found false splits at normal text column gaps. The spine of a book on a scanner has a characteristic DARK gray strip (scanner bed) flanked by bright white paper on both sides. New approach: column-mean brightness with heavy smoothing, looking for a dark valley (< 88% of paper brightness) in the center region that has bright paper on both sides. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-17 16:52:29 +01:00
parent f30e526917
commit d36972b464
1 changed files with 66 additions and 66 deletions
@@ -34,99 +34,99 @@ _MIN_RUN_FRAC = 0.005  # 0.5%
 def detect_page_splits(
    img_bgr: np.ndarray,
    min_gap_frac: float = 0.008,
 ) -> list:
    """Detect if the image is a multi-page spread and return split rectangles.
-    Checks for wide vertical gaps (spine area) that indicate the image
+    Uses **brightness** (not ink density) to find the spine area:
-    contains multiple pages side by side (e.g. book on scanner).
+    the scanner bed produces a characteristic gray strip where pages meet,
    which is darker than the white paper on either side.
    Returns a list of page dicts ``{x, y, width, height, page_index}``
    or an empty list if only one page is detected.
    """
    h, w = img_bgr.shape[:2]
-    # Only check landscape-ish images (width > height * 0.85)
+    # Only check landscape-ish images (width > height * 1.15)
    if w < h * 1.15:
        return []
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    binary = cv2.adaptiveThreshold(
        gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY_INV, blockSize=51, C=15,
    )
-    # Vertical projection: mean ink density per column
+    # Column-mean brightness (0-255) — the spine is darker (gray scanner bed)
-    v_proj = np.mean(binary, axis=0) / 255.0
+    col_brightness = np.mean(gray, axis=0).astype(np.float64)
-    # Smooth with boxcar (width = 0.5% of image width, min 5)
+    # Heavy smoothing to ignore individual text lines
-    kern = max(5, w // 200)
+    kern = max(11, w // 50)
    if kern % 2 == 0:
        kern += 1
-    v_smooth = np.convolve(v_proj, np.ones(kern) / kern, mode="same")
+    brightness_smooth = np.convolve(col_brightness, np.ones(kern) / kern, mode="same")
-    peak = float(np.max(v_smooth))
+    # Page paper is bright (typically > 200), spine/scanner bed is darker
-    if peak < 0.005:
+    page_brightness = float(np.max(brightness_smooth))
    if page_brightness < 100:
        return []  # Very dark image, skip
    # Spine threshold: significantly darker than the page
    # Spine is typically 60-80% of paper brightness
    spine_thresh = page_brightness * 0.88
    # Search in center region (30-70% of width)
    center_lo = int(w * 0.30)
    center_hi = int(w * 0.70)
    # Find the darkest valley in the center region
    center_brightness = brightness_smooth[center_lo:center_hi]
    darkest_val = float(np.min(center_brightness))
    if darkest_val >= spine_thresh:
        logger.debug("No spine detected: min brightness %.0f >= threshold %.0f",
                      darkest_val, spine_thresh)
        return []
-    # Look for valleys in center region (25-75% of width)
+    # Find the contiguous dark region (spine area)
-    gap_thresh = peak * 0.15  # valley must be < 15% of peak density
+    is_dark = center_brightness < spine_thresh
-    center_lo = int(w * 0.25)
+    # Find the widest dark run
-    center_hi = int(w * 0.75)
+    best_start, best_end = 0, 0
-    min_gap_px = max(5, int(w * min_gap_frac))
+    run_start = -1
-
+    for i in range(len(is_dark)):
-    # Find contiguous gap runs in the center region
+        if is_dark[i]:
-    gaps: list = []
+            if run_start < 0:
-    in_gap = False
+                run_start = i
    gap_start = 0
    for x in range(center_lo, center_hi):
        if v_smooth[x] < gap_thresh:
            if not in_gap:
                gap_start = x
                in_gap = True
        else:
-            if in_gap:
+            if run_start >= 0:
-                gap_w = x - gap_start
+                if i - run_start > best_end - best_start:
-                if gap_w >= min_gap_px:
+                    best_start, best_end = run_start, i
-                    gaps.append({"x": gap_start, "width": gap_w,
+                run_start = -1
-                                 "center": gap_start + gap_w // 2})
+    if run_start >= 0 and len(is_dark) - run_start > best_end - best_start:
-                in_gap = False
+        best_start, best_end = run_start, len(is_dark)
    if in_gap:
        gap_w = center_hi - gap_start
        if gap_w >= min_gap_px:
            gaps.append({"x": gap_start, "width": gap_w,
                         "center": gap_start + gap_w // 2})
-    if not gaps:
+    spine_w = best_end - best_start
    if spine_w < w * 0.01:
        logger.debug("Spine too narrow: %dpx (< %dpx)", spine_w, int(w * 0.01))
        return []
-    # Merge nearby gaps (< 5% of width apart) — the spine area may have
+    spine_x = center_lo + best_start
-    # thin ink strips between multiple gap segments
+    spine_center = spine_x + spine_w // 2
    merge_dist = max(20, int(w * 0.05))
    merged: list = [gaps[0]]
    for g in gaps[1:]:
        prev = merged[-1]
        prev_end = prev["x"] + prev["width"]
        if g["x"] - prev_end < merge_dist:
            # Merge: extend previous gap to cover both
            new_end = g["x"] + g["width"]
            prev["width"] = new_end - prev["x"]
            prev["center"] = prev["x"] + prev["width"] // 2
        else:
            merged.append(g)
    gaps = merged
-    # Sort gaps by width (largest = most likely spine)
+    # Verify: must have bright (paper) content on BOTH sides
-    gaps.sort(key=lambda g: g["width"], reverse=True)
+    left_brightness = float(np.mean(brightness_smooth[max(0, spine_x - w // 10):spine_x]))
    right_end = center_lo + best_end
    right_brightness = float(np.mean(brightness_smooth[right_end:min(w, right_end + w // 10)]))
-    # Use only gaps that are significant (>= 2% of image width)
+    if left_brightness < spine_thresh or right_brightness < spine_thresh:
-    significant_gaps = [g for g in gaps if g["width"] >= w * 0.02]
+        logger.debug("No bright paper flanking spine: left=%.0f right=%.0f thresh=%.0f",
-    if not significant_gaps:
+                      left_brightness, right_brightness, spine_thresh)
-        # Fall back to widest gap
+        return []
        significant_gaps = [gaps[0]]
-    # Use the significant gap(s) as split points
+    logger.info(
-    split_points = sorted(g["center"] for g in significant_gaps[:3])
+        "Spine detected: x=%d..%d (w=%d), brightness=%.0f vs paper=%.0f, "
        "left_paper=%.0f, right_paper=%.0f",
        spine_x, right_end, spine_w, darkest_val, page_brightness,
        left_brightness, right_brightness,
    )
    # Split at the spine center
    split_points = [spine_center]
    # Build page rectangles
    pages: list = []