From d36972b464a08d1c608ec4e73a3b7c16810e62e4 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Tue, 17 Mar 2026 16:52:29 +0100 Subject: [PATCH] fix: detect spine by brightness, not ink density The previous algorithm used binary ink projection and found false splits at normal text column gaps. The spine of a book on a scanner has a characteristic DARK gray strip (scanner bed) flanked by bright white paper on both sides. New approach: column-mean brightness with heavy smoothing, looking for a dark valley (< 88% of paper brightness) in the center region that has bright paper on both sides. Co-Authored-By: Claude Opus 4.6 --- klausur-service/backend/page_crop.py | 132 +++++++++++++-------------- 1 file changed, 66 insertions(+), 66 deletions(-) diff --git a/klausur-service/backend/page_crop.py b/klausur-service/backend/page_crop.py index 8ac8e3c..c8919de 100644 --- a/klausur-service/backend/page_crop.py +++ b/klausur-service/backend/page_crop.py @@ -34,99 +34,99 @@ _MIN_RUN_FRAC = 0.005 # 0.5% def detect_page_splits( img_bgr: np.ndarray, - min_gap_frac: float = 0.008, ) -> list: """Detect if the image is a multi-page spread and return split rectangles. - Checks for wide vertical gaps (spine area) that indicate the image - contains multiple pages side by side (e.g. book on scanner). + Uses **brightness** (not ink density) to find the spine area: + the scanner bed produces a characteristic gray strip where pages meet, + which is darker than the white paper on either side. Returns a list of page dicts ``{x, y, width, height, page_index}`` or an empty list if only one page is detected. """ h, w = img_bgr.shape[:2] - # Only check landscape-ish images (width > height * 0.85) + # Only check landscape-ish images (width > height * 1.15) if w < h * 1.15: return [] gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY) - binary = cv2.adaptiveThreshold( - gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, - cv2.THRESH_BINARY_INV, blockSize=51, C=15, - ) - # Vertical projection: mean ink density per column - v_proj = np.mean(binary, axis=0) / 255.0 + # Column-mean brightness (0-255) — the spine is darker (gray scanner bed) + col_brightness = np.mean(gray, axis=0).astype(np.float64) - # Smooth with boxcar (width = 0.5% of image width, min 5) - kern = max(5, w // 200) + # Heavy smoothing to ignore individual text lines + kern = max(11, w // 50) if kern % 2 == 0: kern += 1 - v_smooth = np.convolve(v_proj, np.ones(kern) / kern, mode="same") + brightness_smooth = np.convolve(col_brightness, np.ones(kern) / kern, mode="same") - peak = float(np.max(v_smooth)) - if peak < 0.005: + # Page paper is bright (typically > 200), spine/scanner bed is darker + page_brightness = float(np.max(brightness_smooth)) + if page_brightness < 100: + return [] # Very dark image, skip + + # Spine threshold: significantly darker than the page + # Spine is typically 60-80% of paper brightness + spine_thresh = page_brightness * 0.88 + + # Search in center region (30-70% of width) + center_lo = int(w * 0.30) + center_hi = int(w * 0.70) + + # Find the darkest valley in the center region + center_brightness = brightness_smooth[center_lo:center_hi] + darkest_val = float(np.min(center_brightness)) + + if darkest_val >= spine_thresh: + logger.debug("No spine detected: min brightness %.0f >= threshold %.0f", + darkest_val, spine_thresh) return [] - # Look for valleys in center region (25-75% of width) - gap_thresh = peak * 0.15 # valley must be < 15% of peak density - center_lo = int(w * 0.25) - center_hi = int(w * 0.75) - min_gap_px = max(5, int(w * min_gap_frac)) - - # Find contiguous gap runs in the center region - gaps: list = [] - in_gap = False - gap_start = 0 - for x in range(center_lo, center_hi): - if v_smooth[x] < gap_thresh: - if not in_gap: - gap_start = x - in_gap = True + # Find the contiguous dark region (spine area) + is_dark = center_brightness < spine_thresh + # Find the widest dark run + best_start, best_end = 0, 0 + run_start = -1 + for i in range(len(is_dark)): + if is_dark[i]: + if run_start < 0: + run_start = i else: - if in_gap: - gap_w = x - gap_start - if gap_w >= min_gap_px: - gaps.append({"x": gap_start, "width": gap_w, - "center": gap_start + gap_w // 2}) - in_gap = False - if in_gap: - gap_w = center_hi - gap_start - if gap_w >= min_gap_px: - gaps.append({"x": gap_start, "width": gap_w, - "center": gap_start + gap_w // 2}) + if run_start >= 0: + if i - run_start > best_end - best_start: + best_start, best_end = run_start, i + run_start = -1 + if run_start >= 0 and len(is_dark) - run_start > best_end - best_start: + best_start, best_end = run_start, len(is_dark) - if not gaps: + spine_w = best_end - best_start + if spine_w < w * 0.01: + logger.debug("Spine too narrow: %dpx (< %dpx)", spine_w, int(w * 0.01)) return [] - # Merge nearby gaps (< 5% of width apart) — the spine area may have - # thin ink strips between multiple gap segments - merge_dist = max(20, int(w * 0.05)) - merged: list = [gaps[0]] - for g in gaps[1:]: - prev = merged[-1] - prev_end = prev["x"] + prev["width"] - if g["x"] - prev_end < merge_dist: - # Merge: extend previous gap to cover both - new_end = g["x"] + g["width"] - prev["width"] = new_end - prev["x"] - prev["center"] = prev["x"] + prev["width"] // 2 - else: - merged.append(g) - gaps = merged + spine_x = center_lo + best_start + spine_center = spine_x + spine_w // 2 - # Sort gaps by width (largest = most likely spine) - gaps.sort(key=lambda g: g["width"], reverse=True) + # Verify: must have bright (paper) content on BOTH sides + left_brightness = float(np.mean(brightness_smooth[max(0, spine_x - w // 10):spine_x])) + right_end = center_lo + best_end + right_brightness = float(np.mean(brightness_smooth[right_end:min(w, right_end + w // 10)])) - # Use only gaps that are significant (>= 2% of image width) - significant_gaps = [g for g in gaps if g["width"] >= w * 0.02] - if not significant_gaps: - # Fall back to widest gap - significant_gaps = [gaps[0]] + if left_brightness < spine_thresh or right_brightness < spine_thresh: + logger.debug("No bright paper flanking spine: left=%.0f right=%.0f thresh=%.0f", + left_brightness, right_brightness, spine_thresh) + return [] - # Use the significant gap(s) as split points - split_points = sorted(g["center"] for g in significant_gaps[:3]) + logger.info( + "Spine detected: x=%d..%d (w=%d), brightness=%.0f vs paper=%.0f, " + "left_paper=%.0f, right_paper=%.0f", + spine_x, right_end, spine_w, darkest_val, page_brightness, + left_brightness, right_brightness, + ) + + # Split at the spine center + split_points = [spine_center] # Build page rectangles pages: list = []