diff --git a/klausur-service/backend/page_crop.py b/klausur-service/backend/page_crop.py index 6caf979..1356fdd 100644 --- a/klausur-service/backend/page_crop.py +++ b/klausur-service/backend/page_crop.py @@ -457,6 +457,153 @@ def _detect_spine_shadow( return spine_x +def _detect_gutter_continuity( + gray: np.ndarray, + search_region: np.ndarray, + offset_x: int, + w: int, + side: str, +) -> Optional[int]: + """Detect gutter shadow via vertical continuity analysis. + + Camera book scans produce a subtle brightness gradient at the gutter + that is too faint for scanner-shadow detection (range < 40). However, + the gutter shadow has a unique property: it runs **continuously from + top to bottom** without interruption. Text and images always have + vertical gaps between lines, paragraphs, or sections. + + Algorithm: + 1. Divide image into N horizontal strips (~60px each) + 2. For each column, compute what fraction of strips are darker than + the page median (from the center 50% of the full image) + 3. A "gutter column" has ≥ 75% of strips darker than page_median − δ + 4. Smooth the dark-fraction profile and find the transition point + from the edge inward where the fraction drops below 0.50 + 5. Validate: gutter band must be 0.5%-10% of image width + + Args: + gray: Full grayscale image. + search_region: Edge slice of the grayscale image. + offset_x: X offset of search_region relative to full image. + w: Full image width. + side: 'left' or 'right'. + + Returns: + X coordinate (in full image) of the gutter inner edge, or None. + """ + region_h, region_w = search_region.shape[:2] + if region_w < 20 or region_h < 100: + return None + + # --- 1. Divide into horizontal strips --- + strip_target_h = 60 # ~60px per strip + n_strips = max(10, region_h // strip_target_h) + strip_h = region_h // n_strips + + strip_means = np.zeros((n_strips, region_w), dtype=np.float64) + for s in range(n_strips): + y0 = s * strip_h + y1 = min((s + 1) * strip_h, region_h) + strip_means[s] = np.mean(search_region[y0:y1, :], axis=0) + + # --- 2. Page median from center 50% of full image --- + center_lo = w // 4 + center_hi = 3 * w // 4 + page_median = float(np.median(gray[:, center_lo:center_hi])) + + # Camera shadows are subtle — threshold just 5 levels below page median + dark_thresh = page_median - 5.0 + + # If page is very dark overall (e.g. photo, not a book page), bail out + if page_median < 180: + return None + + # --- 3. Per-column dark fraction --- + dark_count = np.sum(strip_means < dark_thresh, axis=0).astype(np.float64) + dark_frac = dark_count / n_strips # shape: (region_w,) + + # --- 4. Smooth and find transition --- + # Rolling mean (window = 1% of image width, min 5) + smooth_w = max(5, w // 100) + if smooth_w % 2 == 0: + smooth_w += 1 + kernel = np.ones(smooth_w) / smooth_w + frac_smooth = np.convolve(dark_frac, kernel, mode="same") + + # Trim convolution edges + margin = smooth_w // 2 + if region_w <= 2 * margin + 10: + return None + + # Scan from edge inward to find where frac drops below transition threshold + transition_thresh = 0.50 + gutter_inner = None # local x in search_region + + if side == "right": + # Scan from right edge (region_w - 1) inward (toward 0) + for x in range(region_w - 1 - margin, margin, -1): + if frac_smooth[x] < transition_thresh: + gutter_inner = x + 1 # crop just past the transition + break + else: + # Scan from left edge (0) inward (toward region_w) + for x in range(margin, region_w - margin): + if frac_smooth[x] < transition_thresh: + gutter_inner = x - 1 + break + + if gutter_inner is None: + return None + + # --- 5. Validate gutter width --- + if side == "right": + gutter_width = region_w - gutter_inner + else: + gutter_width = gutter_inner + + min_gutter = max(3, int(w * 0.005)) # at least 0.5% of image + max_gutter = int(w * 0.10) # at most 10% of image + + if gutter_width < min_gutter: + logger.debug( + "%s gutter: too narrow (%dpx < %dpx)", side.capitalize(), + gutter_width, min_gutter, + ) + return None + + if gutter_width > max_gutter: + logger.debug( + "%s gutter: too wide (%dpx > %dpx)", side.capitalize(), + gutter_width, max_gutter, + ) + return None + + # Check that the gutter band is meaningfully darker than the page + if side == "right": + gutter_brightness = float(np.mean(strip_means[:, gutter_inner:])) + else: + gutter_brightness = float(np.mean(strip_means[:, :gutter_inner])) + + brightness_drop = page_median - gutter_brightness + if brightness_drop < 3: + logger.debug( + "%s gutter: insufficient brightness drop (%.1f levels)", + side.capitalize(), brightness_drop, + ) + return None + + gutter_x = offset_x + gutter_inner + + logger.info( + "%s gutter (continuity): x=%d, width=%dpx (%.1f%%), " + "brightness=%.0f vs page=%.0f (drop=%.0f), frac@edge=%.2f", + side.capitalize(), gutter_x, gutter_width, + 100.0 * gutter_width / w, gutter_brightness, page_median, + brightness_drop, float(frac_smooth[gutter_inner]), + ) + return gutter_x + + def _detect_left_edge_shadow( gray: np.ndarray, binary: np.ndarray, @@ -465,15 +612,22 @@ def _detect_left_edge_shadow( ) -> int: """Detect left content edge, accounting for book-spine shadow. - Looks at the left 25% for a scanner gray strip. Cuts at the - darkest column (= spine center). Fallback: binary projection. + Tries three methods in order: + 1. Scanner spine-shadow (dark gradient, range > 40) + 2. Camera gutter continuity (subtle shadow running top-to-bottom) + 3. Binary projection fallback (first ink column) """ search_w = max(1, w // 4) spine_x = _detect_spine_shadow(gray, gray[:, :search_w], 0, w, "left") if spine_x is not None: return spine_x - # Fallback: binary vertical projection + # Fallback 1: vertical continuity (camera gutter shadow) + gutter_x = _detect_gutter_continuity(gray, gray[:, :search_w], 0, w, "left") + if gutter_x is not None: + return gutter_x + + # Fallback 2: binary vertical projection return _detect_edge_projection(binary, axis=0, from_start=True, dim=w) @@ -485,8 +639,10 @@ def _detect_right_edge_shadow( ) -> int: """Detect right content edge, accounting for book-spine shadow. - Looks at the right 25% for a scanner gray strip. Cuts at the - darkest column (= spine center). Fallback: binary projection. + Tries three methods in order: + 1. Scanner spine-shadow (dark gradient, range > 40) + 2. Camera gutter continuity (subtle shadow running top-to-bottom) + 3. Binary projection fallback (last ink column) """ search_w = max(1, w // 4) right_start = w - search_w @@ -494,7 +650,12 @@ def _detect_right_edge_shadow( if spine_x is not None: return spine_x - # Fallback: binary vertical projection + # Fallback 1: vertical continuity (camera gutter shadow) + gutter_x = _detect_gutter_continuity(gray, gray[:, right_start:], right_start, w, "right") + if gutter_x is not None: + return gutter_x + + # Fallback 2: binary vertical projection return _detect_edge_projection(binary, axis=0, from_start=False, dim=w) diff --git a/klausur-service/backend/tests/test_page_crop.py b/klausur-service/backend/tests/test_page_crop.py index b791ac4..e1724a2 100644 --- a/klausur-service/backend/tests/test_page_crop.py +++ b/klausur-service/backend/tests/test_page_crop.py @@ -18,6 +18,7 @@ from page_crop import ( detect_page_splits, _detect_format, _detect_edge_projection, + _detect_gutter_continuity, _detect_left_edge_shadow, _detect_right_edge_shadow, _detect_spine_shadow, @@ -564,3 +565,110 @@ class TestDetectPageSplits: assert pages[0]["x"] == 0 total_w = sum(p["width"] for p in pages) assert total_w == w, f"Total page width {total_w} != image width {w}" + + +# --------------------------------------------------------------------------- +# Tests: _detect_gutter_continuity (camera book scans) +# --------------------------------------------------------------------------- + +def _make_camera_book_scan(h: int = 2400, w: int = 1700, gutter_side: str = "right") -> np.ndarray: + """Create a synthetic camera book scan with a subtle gutter shadow. + + Camera gutter shadows are much subtler than scanner shadows: + - Page brightness ~250 (well-lit) + - Gutter brightness ~210-230 (slight shadow) + - Shadow runs continuously from top to bottom + - Gradient is ~40px wide + """ + img = np.full((h, w, 3), 250, dtype=np.uint8) + + # Add some variation to make it realistic + rng = np.random.RandomState(99) + + # Subtle gutter gradient at the specified side + gutter_w = int(w * 0.04) # ~4% of width + gradient_w = int(w * 0.03) # transition zone + + if gutter_side == "right": + gutter_start = w - gutter_w - gradient_w + for x in range(gutter_start, w): + dist_from_start = x - gutter_start + # Linear gradient from 250 down to 210 + brightness = int(250 - 40 * min(dist_from_start / (gutter_w + gradient_w), 1.0)) + img[:, x] = brightness + else: + gutter_end = gutter_w + gradient_w + for x in range(gutter_end): + dist_from_edge = gutter_end - x + brightness = int(250 - 40 * min(dist_from_edge / (gutter_w + gradient_w), 1.0)) + img[:, x] = brightness + + # Scatter some text (dark pixels) in the content area + content_left = gutter_end + 20 if gutter_side == "left" else 50 + content_right = gutter_start - 20 if gutter_side == "right" else w - 50 + for _ in range(800): + y = rng.randint(h // 10, h - h // 10) + x = rng.randint(content_left, content_right) + y2 = min(y + 3, h) + x2 = min(x + 15, w) + img[y:y2, x:x2] = 20 + + return img + + +class TestDetectGutterContinuity: + """Tests for camera gutter shadow detection via vertical continuity.""" + + def test_detects_right_gutter(self): + """Should detect a subtle gutter shadow on the right side.""" + img = _make_camera_book_scan(gutter_side="right") + h, w = img.shape[:2] + gray = np.mean(img, axis=2).astype(np.uint8) + search_w = w // 4 + right_start = w - search_w + result = _detect_gutter_continuity( + gray, gray[:, right_start:], right_start, w, "right", + ) + assert result is not None + # Gutter starts roughly at 93% of width (w - 4% - 3%) + assert result > w * 0.85, f"Gutter x={result} too far left" + assert result < w * 0.98, f"Gutter x={result} too close to edge" + + def test_detects_left_gutter(self): + """Should detect a subtle gutter shadow on the left side.""" + img = _make_camera_book_scan(gutter_side="left") + h, w = img.shape[:2] + gray = np.mean(img, axis=2).astype(np.uint8) + search_w = w // 4 + result = _detect_gutter_continuity( + gray, gray[:, :search_w], 0, w, "left", + ) + assert result is not None + assert result > w * 0.02, f"Gutter x={result} too close to edge" + assert result < w * 0.15, f"Gutter x={result} too far right" + + def test_no_gutter_on_clean_page(self): + """Should NOT detect a gutter on a uniformly bright page.""" + img = np.full((2000, 1600, 3), 250, dtype=np.uint8) + # Add some text but no gutter + rng = np.random.RandomState(42) + for _ in range(500): + y = rng.randint(100, 1900) + x = rng.randint(100, 1500) + img[y:min(y+3, 2000), x:min(x+15, 1600)] = 20 + gray = np.mean(img, axis=2).astype(np.uint8) + w = 1600 + search_w = w // 4 + right_start = w - search_w + result_r = _detect_gutter_continuity(gray, gray[:, right_start:], right_start, w, "right") + result_l = _detect_gutter_continuity(gray, gray[:, :search_w], 0, w, "left") + assert result_r is None, f"False positive on right: x={result_r}" + assert result_l is None, f"False positive on left: x={result_l}" + + def test_integrated_with_crop(self): + """End-to-end: detect_and_crop_page should crop at the gutter.""" + img = _make_camera_book_scan(gutter_side="right") + cropped, result = detect_and_crop_page(img) + # The right border should be > 0 (gutter cropped) + right_border = result["border_fractions"]["right"] + assert right_border > 0.01, f"Right border {right_border} — gutter not cropped"