fix: detect spine by brightness, not ink density

The previous algorithm used binary ink projection and found false splits at normal text column gaps. The spine of a book on a scanner has a characteristic DARK gray strip (scanner bed) flanked by bright white paper on both sides. New approach: column-mean brightness with heavy smoothing, looking for a dark valley (< 88% of paper brightness) in the center region that has bright paper on both sides. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-17 16:52:29 +01:00
parent f30e526917
commit d36972b464
1 changed files with 66 additions and 66 deletions
@@ -34,99 +34,99 @@ _MIN_RUN_FRAC = 0.005  # 0.5%

 def detect_page_splits(
    img_bgr: np.ndarray,
-    min_gap_frac: float = 0.008,
 ) -> list:
    """Detect if the image is a multi-page spread and return split rectangles.

-    Checks for wide vertical gaps (spine area) that indicate the image
-    contains multiple pages side by side (e.g. book on scanner).
+    Uses **brightness** (not ink density) to find the spine area:
+    the scanner bed produces a characteristic gray strip where pages meet,
+    which is darker than the white paper on either side.

    Returns a list of page dicts ``{x, y, width, height, page_index}``
    or an empty list if only one page is detected.
    """
    h, w = img_bgr.shape[:2]

-    # Only check landscape-ish images (width > height * 0.85)
+    # Only check landscape-ish images (width > height * 1.15)
    if w < h * 1.15:
        return []

    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
-    binary = cv2.adaptiveThreshold(
-        gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-        cv2.THRESH_BINARY_INV, blockSize=51, C=15,
-    )

-    # Vertical projection: mean ink density per column
-    v_proj = np.mean(binary, axis=0) / 255.0
+    # Column-mean brightness (0-255) — the spine is darker (gray scanner bed)
+    col_brightness = np.mean(gray, axis=0).astype(np.float64)

-    # Smooth with boxcar (width = 0.5% of image width, min 5)
-    kern = max(5, w // 200)
+    # Heavy smoothing to ignore individual text lines
+    kern = max(11, w // 50)
    if kern % 2 == 0:
        kern += 1
-    v_smooth = np.convolve(v_proj, np.ones(kern) / kern, mode="same")
+    brightness_smooth = np.convolve(col_brightness, np.ones(kern) / kern, mode="same")

-    peak = float(np.max(v_smooth))
-    if peak < 0.005:
+    # Page paper is bright (typically > 200), spine/scanner bed is darker
+    page_brightness = float(np.max(brightness_smooth))
+    if page_brightness < 100:
+        return []  # Very dark image, skip
+
+    # Spine threshold: significantly darker than the page
+    # Spine is typically 60-80% of paper brightness
+    spine_thresh = page_brightness * 0.88
+
+    # Search in center region (30-70% of width)
+    center_lo = int(w * 0.30)
+    center_hi = int(w * 0.70)
+
+    # Find the darkest valley in the center region
+    center_brightness = brightness_smooth[center_lo:center_hi]
+    darkest_val = float(np.min(center_brightness))
+
+    if darkest_val >= spine_thresh:
+        logger.debug("No spine detected: min brightness %.0f >= threshold %.0f",
+                      darkest_val, spine_thresh)
        return []

-    # Look for valleys in center region (25-75% of width)
-    gap_thresh = peak * 0.15  # valley must be < 15% of peak density
-    center_lo = int(w * 0.25)
-    center_hi = int(w * 0.75)
-    min_gap_px = max(5, int(w * min_gap_frac))
-
-    # Find contiguous gap runs in the center region
-    gaps: list = []
-    in_gap = False
-    gap_start = 0
-    for x in range(center_lo, center_hi):
-        if v_smooth[x] < gap_thresh:
-            if not in_gap:
-                gap_start = x
-                in_gap = True
+    # Find the contiguous dark region (spine area)
+    is_dark = center_brightness < spine_thresh
+    # Find the widest dark run
+    best_start, best_end = 0, 0
+    run_start = -1
+    for i in range(len(is_dark)):
+        if is_dark[i]:
+            if run_start < 0:
+                run_start = i
        else:
-            if in_gap:
-                gap_w = x - gap_start
-                if gap_w >= min_gap_px:
-                    gaps.append({"x": gap_start, "width": gap_w,
-                                 "center": gap_start + gap_w // 2})
-                in_gap = False
-    if in_gap:
-        gap_w = center_hi - gap_start
-        if gap_w >= min_gap_px:
-            gaps.append({"x": gap_start, "width": gap_w,
-                         "center": gap_start + gap_w // 2})
+            if run_start >= 0:
+                if i - run_start > best_end - best_start:
+                    best_start, best_end = run_start, i
+                run_start = -1
+    if run_start >= 0 and len(is_dark) - run_start > best_end - best_start:
+        best_start, best_end = run_start, len(is_dark)

-    if not gaps:
+    spine_w = best_end - best_start
+    if spine_w < w * 0.01:
+        logger.debug("Spine too narrow: %dpx (< %dpx)", spine_w, int(w * 0.01))
        return []

-    # Merge nearby gaps (< 5% of width apart) — the spine area may have
-    # thin ink strips between multiple gap segments
-    merge_dist = max(20, int(w * 0.05))
-    merged: list = [gaps[0]]
-    for g in gaps[1:]:
-        prev = merged[-1]
-        prev_end = prev["x"] + prev["width"]
-        if g["x"] - prev_end < merge_dist:
-            # Merge: extend previous gap to cover both
-            new_end = g["x"] + g["width"]
-            prev["width"] = new_end - prev["x"]
-            prev["center"] = prev["x"] + prev["width"] // 2
-        else:
-            merged.append(g)
-    gaps = merged
+    spine_x = center_lo + best_start
+    spine_center = spine_x + spine_w // 2

-    # Sort gaps by width (largest = most likely spine)
-    gaps.sort(key=lambda g: g["width"], reverse=True)
+    # Verify: must have bright (paper) content on BOTH sides
+    left_brightness = float(np.mean(brightness_smooth[max(0, spine_x - w // 10):spine_x]))
+    right_end = center_lo + best_end
+    right_brightness = float(np.mean(brightness_smooth[right_end:min(w, right_end + w // 10)]))

-    # Use only gaps that are significant (>= 2% of image width)
-    significant_gaps = [g for g in gaps if g["width"] >= w * 0.02]
-    if not significant_gaps:
-        # Fall back to widest gap
-        significant_gaps = [gaps[0]]
+    if left_brightness < spine_thresh or right_brightness < spine_thresh:
+        logger.debug("No bright paper flanking spine: left=%.0f right=%.0f thresh=%.0f",
+                      left_brightness, right_brightness, spine_thresh)
+        return []

-    # Use the significant gap(s) as split points
-    split_points = sorted(g["center"] for g in significant_gaps[:3])
+    logger.info(
+        "Spine detected: x=%d..%d (w=%d), brightness=%.0f vs paper=%.0f, "
+        "left_paper=%.0f, right_paper=%.0f",
+        spine_x, right_end, spine_w, darkest_val, page_brightness,
+        left_brightness, right_brightness,
+    )
+
+    # Split at the spine center
+    split_points = [spine_center]

    # Build page rectangles
    pages: list = []