Add camera gutter detection via vertical continuity analysis

Scanner shadow detection (range > 40, darkest < 180) fails on camera book scans where the gutter shadow is subtle (range ~25, darkest ~214). New _detect_gutter_continuity() detects gutters by their unique property: the shadow runs continuously from top to bottom without interruption. Divides the image into horizontal strips and checks what fraction of strips are darker than the page median at each column. A gutter column has >= 75% of strips darker. The transition point where the smoothed dark fraction drops below 50% marks the crop boundary. Integrated as fallback between scanner shadow and binary projection. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-11 13:58:14 +02:00
parent 9b5e8c6b35
commit 633e301bfd
2 changed files with 275 additions and 6 deletions
--- a/klausur-service/backend/page_crop.py
+++ b/klausur-service/backend/page_crop.py
@@ -457,6 +457,153 @@ def _detect_spine_shadow(
    return spine_x


+def _detect_gutter_continuity(
+    gray: np.ndarray,
+    search_region: np.ndarray,
+    offset_x: int,
+    w: int,
+    side: str,
+) -> Optional[int]:
+    """Detect gutter shadow via vertical continuity analysis.
+
+    Camera book scans produce a subtle brightness gradient at the gutter
+    that is too faint for scanner-shadow detection (range < 40).  However,
+    the gutter shadow has a unique property: it runs **continuously from
+    top to bottom** without interruption.  Text and images always have
+    vertical gaps between lines, paragraphs, or sections.
+
+    Algorithm:
+    1. Divide image into N horizontal strips (~60px each)
+    2. For each column, compute what fraction of strips are darker than
+       the page median (from the center 50% of the full image)
+    3. A "gutter column" has ≥ 75% of strips darker than page_median − δ
+    4. Smooth the dark-fraction profile and find the transition point
+       from the edge inward where the fraction drops below 0.50
+    5. Validate: gutter band must be 0.5%-10% of image width
+
+    Args:
+        gray: Full grayscale image.
+        search_region: Edge slice of the grayscale image.
+        offset_x: X offset of search_region relative to full image.
+        w: Full image width.
+        side: 'left' or 'right'.
+
+    Returns:
+        X coordinate (in full image) of the gutter inner edge, or None.
+    """
+    region_h, region_w = search_region.shape[:2]
+    if region_w < 20 or region_h < 100:
+        return None
+
+    # --- 1. Divide into horizontal strips ---
+    strip_target_h = 60  # ~60px per strip
+    n_strips = max(10, region_h // strip_target_h)
+    strip_h = region_h // n_strips
+
+    strip_means = np.zeros((n_strips, region_w), dtype=np.float64)
+    for s in range(n_strips):
+        y0 = s * strip_h
+        y1 = min((s + 1) * strip_h, region_h)
+        strip_means[s] = np.mean(search_region[y0:y1, :], axis=0)
+
+    # --- 2. Page median from center 50% of full image ---
+    center_lo = w // 4
+    center_hi = 3 * w // 4
+    page_median = float(np.median(gray[:, center_lo:center_hi]))
+
+    # Camera shadows are subtle — threshold just 5 levels below page median
+    dark_thresh = page_median - 5.0
+
+    # If page is very dark overall (e.g. photo, not a book page), bail out
+    if page_median < 180:
+        return None
+
+    # --- 3. Per-column dark fraction ---
+    dark_count = np.sum(strip_means < dark_thresh, axis=0).astype(np.float64)
+    dark_frac = dark_count / n_strips  # shape: (region_w,)
+
+    # --- 4. Smooth and find transition ---
+    # Rolling mean (window = 1% of image width, min 5)
+    smooth_w = max(5, w // 100)
+    if smooth_w % 2 == 0:
+        smooth_w += 1
+    kernel = np.ones(smooth_w) / smooth_w
+    frac_smooth = np.convolve(dark_frac, kernel, mode="same")
+
+    # Trim convolution edges
+    margin = smooth_w // 2
+    if region_w <= 2 * margin + 10:
+        return None
+
+    # Scan from edge inward to find where frac drops below transition threshold
+    transition_thresh = 0.50
+    gutter_inner = None  # local x in search_region
+
+    if side == "right":
+        # Scan from right edge (region_w - 1) inward (toward 0)
+        for x in range(region_w - 1 - margin, margin, -1):
+            if frac_smooth[x] < transition_thresh:
+                gutter_inner = x + 1  # crop just past the transition
+                break
+    else:
+        # Scan from left edge (0) inward (toward region_w)
+        for x in range(margin, region_w - margin):
+            if frac_smooth[x] < transition_thresh:
+                gutter_inner = x - 1
+                break
+
+    if gutter_inner is None:
+        return None
+
+    # --- 5. Validate gutter width ---
+    if side == "right":
+        gutter_width = region_w - gutter_inner
+    else:
+        gutter_width = gutter_inner
+
+    min_gutter = max(3, int(w * 0.005))   # at least 0.5% of image
+    max_gutter = int(w * 0.10)            # at most 10% of image
+
+    if gutter_width < min_gutter:
+        logger.debug(
+            "%s gutter: too narrow (%dpx < %dpx)", side.capitalize(),
+            gutter_width, min_gutter,
+        )
+        return None
+
+    if gutter_width > max_gutter:
+        logger.debug(
+            "%s gutter: too wide (%dpx > %dpx)", side.capitalize(),
+            gutter_width, max_gutter,
+        )
+        return None
+
+    # Check that the gutter band is meaningfully darker than the page
+    if side == "right":
+        gutter_brightness = float(np.mean(strip_means[:, gutter_inner:]))
+    else:
+        gutter_brightness = float(np.mean(strip_means[:, :gutter_inner]))
+
+    brightness_drop = page_median - gutter_brightness
+    if brightness_drop < 3:
+        logger.debug(
+            "%s gutter: insufficient brightness drop (%.1f levels)",
+            side.capitalize(), brightness_drop,
+        )
+        return None
+
+    gutter_x = offset_x + gutter_inner
+
+    logger.info(
+        "%s gutter (continuity): x=%d, width=%dpx (%.1f%%), "
+        "brightness=%.0f vs page=%.0f (drop=%.0f), frac@edge=%.2f",
+        side.capitalize(), gutter_x, gutter_width,
+        100.0 * gutter_width / w, gutter_brightness, page_median,
+        brightness_drop, float(frac_smooth[gutter_inner]),
+    )
+    return gutter_x
+
+
 def _detect_left_edge_shadow(
    gray: np.ndarray,
    binary: np.ndarray,
@@ -465,15 +612,22 @@ def _detect_left_edge_shadow(
 ) -> int:
    """Detect left content edge, accounting for book-spine shadow.

-    Looks at the left 25% for a scanner gray strip.  Cuts at the
-    darkest column (= spine center).  Fallback: binary projection.
+    Tries three methods in order:
+    1. Scanner spine-shadow (dark gradient, range > 40)
+    2. Camera gutter continuity (subtle shadow running top-to-bottom)
+    3. Binary projection fallback (first ink column)
    """
    search_w = max(1, w // 4)
    spine_x = _detect_spine_shadow(gray, gray[:, :search_w], 0, w, "left")
    if spine_x is not None:
        return spine_x

-    # Fallback: binary vertical projection
+    # Fallback 1: vertical continuity (camera gutter shadow)
+    gutter_x = _detect_gutter_continuity(gray, gray[:, :search_w], 0, w, "left")
+    if gutter_x is not None:
+        return gutter_x
+
+    # Fallback 2: binary vertical projection
    return _detect_edge_projection(binary, axis=0, from_start=True, dim=w)


@@ -485,8 +639,10 @@ def _detect_right_edge_shadow(
 ) -> int:
    """Detect right content edge, accounting for book-spine shadow.

-    Looks at the right 25% for a scanner gray strip.  Cuts at the
-    darkest column (= spine center).  Fallback: binary projection.
+    Tries three methods in order:
+    1. Scanner spine-shadow (dark gradient, range > 40)
+    2. Camera gutter continuity (subtle shadow running top-to-bottom)
+    3. Binary projection fallback (last ink column)
    """
    search_w = max(1, w // 4)
    right_start = w - search_w
@@ -494,7 +650,12 @@ def _detect_right_edge_shadow(
    if spine_x is not None:
        return spine_x

-    # Fallback: binary vertical projection
+    # Fallback 1: vertical continuity (camera gutter shadow)
+    gutter_x = _detect_gutter_continuity(gray, gray[:, right_start:], right_start, w, "right")
+    if gutter_x is not None:
+        return gutter_x
+
+    # Fallback 2: binary vertical projection
    return _detect_edge_projection(binary, axis=0, from_start=False, dim=w)