fix: detect spine by brightness, not ink density
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 26s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Failing after 1m51s
CI / test-python-agent-core (push) Successful in 18s
CI / test-nodejs-website (push) Successful in 19s

The previous algorithm used binary ink projection and found false
splits at normal text column gaps. The spine of a book on a scanner
has a characteristic DARK gray strip (scanner bed) flanked by bright
white paper on both sides.

New approach: column-mean brightness with heavy smoothing, looking for
a dark valley (< 88% of paper brightness) in the center region that
has bright paper on both sides.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-17 16:52:29 +01:00
parent f30e526917
commit d36972b464

View File

@@ -34,99 +34,99 @@ _MIN_RUN_FRAC = 0.005 # 0.5%
def detect_page_splits(
img_bgr: np.ndarray,
min_gap_frac: float = 0.008,
) -> list:
"""Detect if the image is a multi-page spread and return split rectangles.
Checks for wide vertical gaps (spine area) that indicate the image
contains multiple pages side by side (e.g. book on scanner).
Uses **brightness** (not ink density) to find the spine area:
the scanner bed produces a characteristic gray strip where pages meet,
which is darker than the white paper on either side.
Returns a list of page dicts ``{x, y, width, height, page_index}``
or an empty list if only one page is detected.
"""
h, w = img_bgr.shape[:2]
# Only check landscape-ish images (width > height * 0.85)
# Only check landscape-ish images (width > height * 1.15)
if w < h * 1.15:
return []
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
binary = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, blockSize=51, C=15,
)
# Vertical projection: mean ink density per column
v_proj = np.mean(binary, axis=0) / 255.0
# Column-mean brightness (0-255) — the spine is darker (gray scanner bed)
col_brightness = np.mean(gray, axis=0).astype(np.float64)
# Smooth with boxcar (width = 0.5% of image width, min 5)
kern = max(5, w // 200)
# Heavy smoothing to ignore individual text lines
kern = max(11, w // 50)
if kern % 2 == 0:
kern += 1
v_smooth = np.convolve(v_proj, np.ones(kern) / kern, mode="same")
brightness_smooth = np.convolve(col_brightness, np.ones(kern) / kern, mode="same")
peak = float(np.max(v_smooth))
if peak < 0.005:
# Page paper is bright (typically > 200), spine/scanner bed is darker
page_brightness = float(np.max(brightness_smooth))
if page_brightness < 100:
return [] # Very dark image, skip
# Spine threshold: significantly darker than the page
# Spine is typically 60-80% of paper brightness
spine_thresh = page_brightness * 0.88
# Search in center region (30-70% of width)
center_lo = int(w * 0.30)
center_hi = int(w * 0.70)
# Find the darkest valley in the center region
center_brightness = brightness_smooth[center_lo:center_hi]
darkest_val = float(np.min(center_brightness))
if darkest_val >= spine_thresh:
logger.debug("No spine detected: min brightness %.0f >= threshold %.0f",
darkest_val, spine_thresh)
return []
# Look for valleys in center region (25-75% of width)
gap_thresh = peak * 0.15 # valley must be < 15% of peak density
center_lo = int(w * 0.25)
center_hi = int(w * 0.75)
min_gap_px = max(5, int(w * min_gap_frac))
# Find contiguous gap runs in the center region
gaps: list = []
in_gap = False
gap_start = 0
for x in range(center_lo, center_hi):
if v_smooth[x] < gap_thresh:
if not in_gap:
gap_start = x
in_gap = True
# Find the contiguous dark region (spine area)
is_dark = center_brightness < spine_thresh
# Find the widest dark run
best_start, best_end = 0, 0
run_start = -1
for i in range(len(is_dark)):
if is_dark[i]:
if run_start < 0:
run_start = i
else:
if in_gap:
gap_w = x - gap_start
if gap_w >= min_gap_px:
gaps.append({"x": gap_start, "width": gap_w,
"center": gap_start + gap_w // 2})
in_gap = False
if in_gap:
gap_w = center_hi - gap_start
if gap_w >= min_gap_px:
gaps.append({"x": gap_start, "width": gap_w,
"center": gap_start + gap_w // 2})
if run_start >= 0:
if i - run_start > best_end - best_start:
best_start, best_end = run_start, i
run_start = -1
if run_start >= 0 and len(is_dark) - run_start > best_end - best_start:
best_start, best_end = run_start, len(is_dark)
if not gaps:
spine_w = best_end - best_start
if spine_w < w * 0.01:
logger.debug("Spine too narrow: %dpx (< %dpx)", spine_w, int(w * 0.01))
return []
# Merge nearby gaps (< 5% of width apart) — the spine area may have
# thin ink strips between multiple gap segments
merge_dist = max(20, int(w * 0.05))
merged: list = [gaps[0]]
for g in gaps[1:]:
prev = merged[-1]
prev_end = prev["x"] + prev["width"]
if g["x"] - prev_end < merge_dist:
# Merge: extend previous gap to cover both
new_end = g["x"] + g["width"]
prev["width"] = new_end - prev["x"]
prev["center"] = prev["x"] + prev["width"] // 2
else:
merged.append(g)
gaps = merged
spine_x = center_lo + best_start
spine_center = spine_x + spine_w // 2
# Sort gaps by width (largest = most likely spine)
gaps.sort(key=lambda g: g["width"], reverse=True)
# Verify: must have bright (paper) content on BOTH sides
left_brightness = float(np.mean(brightness_smooth[max(0, spine_x - w // 10):spine_x]))
right_end = center_lo + best_end
right_brightness = float(np.mean(brightness_smooth[right_end:min(w, right_end + w // 10)]))
# Use only gaps that are significant (>= 2% of image width)
significant_gaps = [g for g in gaps if g["width"] >= w * 0.02]
if not significant_gaps:
# Fall back to widest gap
significant_gaps = [gaps[0]]
if left_brightness < spine_thresh or right_brightness < spine_thresh:
logger.debug("No bright paper flanking spine: left=%.0f right=%.0f thresh=%.0f",
left_brightness, right_brightness, spine_thresh)
return []
# Use the significant gap(s) as split points
split_points = sorted(g["center"] for g in significant_gaps[:3])
logger.info(
"Spine detected: x=%d..%d (w=%d), brightness=%.0f vs paper=%.0f, "
"left_paper=%.0f, right_paper=%.0f",
spine_x, right_end, spine_w, darkest_val, page_brightness,
left_brightness, right_brightness,
)
# Split at the spine center
split_points = [spine_center]
# Build page rectangles
pages: list = []