Fix spine shadow false positives: require dark valley, brightness rise, trim convolution edges

The _detect_spine_shadow function was triggering on normal text content because shadow_range > 20 was too low and convolution edge artifacts created artificially low values. Now requires: range > 40, darkest < 180, narrow valley (not text plateau), and brightness rise toward page content. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-19 08:23:50 +01:00
parent 3fd6523872
commit c09838e91c
2 changed files with 204 additions and 36 deletions
--- a/klausur-service/backend/page_crop.py
+++ b/klausur-service/backend/page_crop.py
@@ -291,6 +291,12 @@ def _detect_spine_shadow(
    the glass.  The darkest column in that strip is the spine center —
    that's where we crop.

+    Distinguishes real spine shadows from text content by checking:
+    1. Strong brightness range (> 40 levels)
+    2. Darkest point is genuinely dark (< 180 mean brightness)
+    3. The dark area is a NARROW valley, not a text-content plateau
+    4. Brightness rises significantly toward the page content side
+
    Args:
        gray: Full grayscale image (for context).
        search_region: Column slice of the grayscale image to search in.
@@ -301,6 +307,10 @@ def _detect_spine_shadow(
    Returns:
        X coordinate (in full image) of the spine center, or None.
    """
+    region_w = search_region.shape[1]
+    if region_w < 10:
+        return None
+
    # Column-mean brightness in the search region
    col_means = np.mean(search_region, axis=0).astype(np.float64)

@@ -309,23 +319,91 @@ def _detect_spine_shadow(
    if kernel_size % 2 == 0:
        kernel_size += 1
    kernel = np.ones(kernel_size) / kernel_size
-    smoothed = np.convolve(col_means, kernel, mode="same")
+    smoothed_raw = np.convolve(col_means, kernel, mode="same")
+
+    # Trim convolution edge artifacts (edges are zero-padded → artificially low)
+    margin = kernel_size // 2
+    if region_w <= 2 * margin + 10:
+        return None
+    smoothed = smoothed_raw[margin:region_w - margin]
+    trim_offset = margin  # offset of smoothed[0] relative to search_region

    val_min = float(np.min(smoothed))
    val_max = float(np.max(smoothed))
    shadow_range = val_max - val_min

-    # Only detect if meaningful brightness gradient (> 20 levels)
-    if shadow_range <= 20:
+    # --- Check 1: Strong brightness gradient ---
+    if shadow_range <= 40:
+        logger.debug(
+            "%s edge: no spine (range=%.0f <= 40)", side.capitalize(), shadow_range,
+        )
        return None

-    # The darkest column is the spine center — crop exactly there
-    spine_local = int(np.argmin(smoothed))
+    # --- Check 2: Darkest point must be genuinely dark ---
+    # Spine shadows have mean column brightness 60-160.
+    # Text on white paper stays above 180.
+    if val_min > 180:
+        logger.debug(
+            "%s edge: no spine (darkest=%.0f > 180, likely text)", side.capitalize(), val_min,
+        )
+        return None
+
+    spine_idx = int(np.argmin(smoothed))  # index in trimmed array
+    spine_local = spine_idx + trim_offset  # index in search_region
+    trimmed_len = len(smoothed)
+
+    # --- Check 3: Valley width (spine is narrow, text plateau is wide) ---
+    # Count how many columns are within 20% of the shadow range above the min.
+    valley_thresh = val_min + shadow_range * 0.20
+    valley_mask = smoothed < valley_thresh
+    valley_width = int(np.sum(valley_mask))
+    # Spine valleys are typically 3-15% of image width (20-120px on a 800px image).
+    # Text content plateaus span 20%+ of the search region.
+    max_valley_frac = 0.50  # valley must not cover more than half the trimmed region
+    if valley_width > trimmed_len * max_valley_frac:
+        logger.debug(
+            "%s edge: no spine (valley too wide: %d/%d = %.0f%%)",
+            side.capitalize(), valley_width, trimmed_len,
+            100.0 * valley_width / trimmed_len,
+        )
+        return None
+
+    # --- Check 4: Brightness must rise toward page content ---
+    # For left edge: after spine, brightness should rise (= page paper)
+    # For right edge: before spine, brightness should rise
+    rise_check_w = max(5, trimmed_len // 5)  # check 20% of trimmed region
+    if side == "left":
+        # Check columns to the right of the spine (in trimmed array)
+        right_start = min(spine_idx + 5, trimmed_len - 1)
+        right_end = min(right_start + rise_check_w, trimmed_len)
+        if right_end > right_start:
+            rise_brightness = float(np.mean(smoothed[right_start:right_end]))
+            rise = rise_brightness - val_min
+            if rise < shadow_range * 0.3:
+                logger.debug(
+                    "%s edge: no spine (insufficient rise: %.0f, need %.0f)",
+                    side.capitalize(), rise, shadow_range * 0.3,
+                )
+                return None
+    else:  # right
+        # Check columns to the left of the spine (in trimmed array)
+        left_end = max(spine_idx - 5, 0)
+        left_start = max(left_end - rise_check_w, 0)
+        if left_end > left_start:
+            rise_brightness = float(np.mean(smoothed[left_start:left_end]))
+            rise = rise_brightness - val_min
+            if rise < shadow_range * 0.3:
+                logger.debug(
+                    "%s edge: no spine (insufficient rise: %.0f, need %.0f)",
+                    side.capitalize(), rise, shadow_range * 0.3,
+                )
+                return None
+
    spine_x = offset_x + spine_local

-    logger.debug(
-        "%s edge: spine center at x=%d (brightness=%.0f, range=%.0f)",
-        side.capitalize(), spine_x, val_min, shadow_range,
+    logger.info(
+        "%s edge: spine center at x=%d (brightness=%.0f, range=%.0f, valley=%dpx)",
+        side.capitalize(), spine_x, val_min, shadow_range, valley_width,
    )
    return spine_x