From c09838e91c66b8b815cab71e48868844345df71c Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Thu, 19 Mar 2026 08:23:50 +0100 Subject: [PATCH] Fix spine shadow false positives: require dark valley, brightness rise, trim convolution edges The _detect_spine_shadow function was triggering on normal text content because shadow_range > 20 was too low and convolution edge artifacts created artificially low values. Now requires: range > 40, darkest < 180, narrow valley (not text plateau), and brightness rise toward page content. Co-Authored-By: Claude Opus 4.6 --- klausur-service/backend/page_crop.py | 94 ++++++++++- .../backend/tests/test_page_crop.py | 146 ++++++++++++++---- 2 files changed, 204 insertions(+), 36 deletions(-) diff --git a/klausur-service/backend/page_crop.py b/klausur-service/backend/page_crop.py index 54d3d9f..7b63b9c 100644 --- a/klausur-service/backend/page_crop.py +++ b/klausur-service/backend/page_crop.py @@ -291,6 +291,12 @@ def _detect_spine_shadow( the glass. The darkest column in that strip is the spine center — that's where we crop. + Distinguishes real spine shadows from text content by checking: + 1. Strong brightness range (> 40 levels) + 2. Darkest point is genuinely dark (< 180 mean brightness) + 3. The dark area is a NARROW valley, not a text-content plateau + 4. Brightness rises significantly toward the page content side + Args: gray: Full grayscale image (for context). search_region: Column slice of the grayscale image to search in. @@ -301,6 +307,10 @@ def _detect_spine_shadow( Returns: X coordinate (in full image) of the spine center, or None. """ + region_w = search_region.shape[1] + if region_w < 10: + return None + # Column-mean brightness in the search region col_means = np.mean(search_region, axis=0).astype(np.float64) @@ -309,23 +319,91 @@ def _detect_spine_shadow( if kernel_size % 2 == 0: kernel_size += 1 kernel = np.ones(kernel_size) / kernel_size - smoothed = np.convolve(col_means, kernel, mode="same") + smoothed_raw = np.convolve(col_means, kernel, mode="same") + + # Trim convolution edge artifacts (edges are zero-padded → artificially low) + margin = kernel_size // 2 + if region_w <= 2 * margin + 10: + return None + smoothed = smoothed_raw[margin:region_w - margin] + trim_offset = margin # offset of smoothed[0] relative to search_region val_min = float(np.min(smoothed)) val_max = float(np.max(smoothed)) shadow_range = val_max - val_min - # Only detect if meaningful brightness gradient (> 20 levels) - if shadow_range <= 20: + # --- Check 1: Strong brightness gradient --- + if shadow_range <= 40: + logger.debug( + "%s edge: no spine (range=%.0f <= 40)", side.capitalize(), shadow_range, + ) return None - # The darkest column is the spine center — crop exactly there - spine_local = int(np.argmin(smoothed)) + # --- Check 2: Darkest point must be genuinely dark --- + # Spine shadows have mean column brightness 60-160. + # Text on white paper stays above 180. + if val_min > 180: + logger.debug( + "%s edge: no spine (darkest=%.0f > 180, likely text)", side.capitalize(), val_min, + ) + return None + + spine_idx = int(np.argmin(smoothed)) # index in trimmed array + spine_local = spine_idx + trim_offset # index in search_region + trimmed_len = len(smoothed) + + # --- Check 3: Valley width (spine is narrow, text plateau is wide) --- + # Count how many columns are within 20% of the shadow range above the min. + valley_thresh = val_min + shadow_range * 0.20 + valley_mask = smoothed < valley_thresh + valley_width = int(np.sum(valley_mask)) + # Spine valleys are typically 3-15% of image width (20-120px on a 800px image). + # Text content plateaus span 20%+ of the search region. + max_valley_frac = 0.50 # valley must not cover more than half the trimmed region + if valley_width > trimmed_len * max_valley_frac: + logger.debug( + "%s edge: no spine (valley too wide: %d/%d = %.0f%%)", + side.capitalize(), valley_width, trimmed_len, + 100.0 * valley_width / trimmed_len, + ) + return None + + # --- Check 4: Brightness must rise toward page content --- + # For left edge: after spine, brightness should rise (= page paper) + # For right edge: before spine, brightness should rise + rise_check_w = max(5, trimmed_len // 5) # check 20% of trimmed region + if side == "left": + # Check columns to the right of the spine (in trimmed array) + right_start = min(spine_idx + 5, trimmed_len - 1) + right_end = min(right_start + rise_check_w, trimmed_len) + if right_end > right_start: + rise_brightness = float(np.mean(smoothed[right_start:right_end])) + rise = rise_brightness - val_min + if rise < shadow_range * 0.3: + logger.debug( + "%s edge: no spine (insufficient rise: %.0f, need %.0f)", + side.capitalize(), rise, shadow_range * 0.3, + ) + return None + else: # right + # Check columns to the left of the spine (in trimmed array) + left_end = max(spine_idx - 5, 0) + left_start = max(left_end - rise_check_w, 0) + if left_end > left_start: + rise_brightness = float(np.mean(smoothed[left_start:left_end])) + rise = rise_brightness - val_min + if rise < shadow_range * 0.3: + logger.debug( + "%s edge: no spine (insufficient rise: %.0f, need %.0f)", + side.capitalize(), rise, shadow_range * 0.3, + ) + return None + spine_x = offset_x + spine_local - logger.debug( - "%s edge: spine center at x=%d (brightness=%.0f, range=%.0f)", - side.capitalize(), spine_x, val_min, shadow_range, + logger.info( + "%s edge: spine center at x=%d (brightness=%.0f, range=%.0f, valley=%dpx)", + side.capitalize(), spine_x, val_min, shadow_range, valley_width, ) return spine_x diff --git a/klausur-service/backend/tests/test_page_crop.py b/klausur-service/backend/tests/test_page_crop.py index d16d8b4..061e73e 100644 --- a/klausur-service/backend/tests/test_page_crop.py +++ b/klausur-service/backend/tests/test_page_crop.py @@ -18,6 +18,8 @@ from page_crop import ( _detect_format, _detect_edge_projection, _detect_left_edge_shadow, + _detect_right_edge_shadow, + _detect_spine_shadow, _filter_narrow_runs, ) @@ -45,25 +47,30 @@ def _make_image_with_content( def _make_book_scan(h: int = 1000, w: int = 800) -> np.ndarray: - """Create a synthetic book scan with spine shadow on the left. + """Create a synthetic book scan with V-shaped spine shadow on the left. - Left 10%: gradient from dark (50) to white (255) - Top 5%: white (empty scanner border) - Bottom 5%: white (empty scanner border) - Center: text-like content (dark pixels scattered) + Left region has a V-shaped brightness dip (spine center at ~5% of width): + x=0..spine_center: scanner bed or page edge (bright ~200) → spine (dark ~60) + x=spine_center..shadow_end: spine (dark ~60) → white paper (bright ~240) + Content area: scattered dark pixels (simulate text lines) + Top/bottom 5%: white margins """ - img = np.full((h, w, 3), 255, dtype=np.uint8) + img = np.full((h, w, 3), 240, dtype=np.uint8) - # Spine shadow: left 10% has gradient from dark to bright - shadow_w = w // 10 - for x in range(shadow_w): - brightness = int(50 + (255 - 50) * x / shadow_w) + # V-shaped spine shadow: center at ~5% of width + spine_center = w * 5 // 100 # e.g. 40 for 800px + shadow_half_w = w * 6 // 100 # e.g. 48 for 800px + + for x in range(spine_center + shadow_half_w + 1): + dist = abs(x - spine_center) + # Brightness dips from 200 (edge) to 60 (spine center) + brightness = int(60 + (200 - 60) * min(dist / shadow_half_w, 1.0)) img[:, x] = brightness # Content area: scatter some dark pixels (simulate text) content_top = h // 20 # 5% top margin content_bottom = h - h // 20 # 5% bottom margin - content_left = shadow_w + w // 20 # past shadow + small margin + content_left = spine_center + shadow_half_w + w // 20 # past shadow + margin content_right = w - w // 20 # 5% right margin rng = np.random.RandomState(42) @@ -190,26 +197,95 @@ class TestDetectEdgeProjection: # Tests: _detect_left_edge_shadow # --------------------------------------------------------------------------- +class TestDetectSpineShadow: + def test_detects_real_spine_v_shape(self): + """V-shaped brightness dip (real spine shadow) should be detected.""" + h, w = 500, 800 + gray = np.full((h, w), 240, dtype=np.uint8) + # Create a V-shaped spine shadow in the left 25% (200px) + # Center of spine at x=30, brightness dips to 80 + for x in range(80): + dist_from_center = abs(x - 30) + brightness = int(80 + (240 - 80) * min(dist_from_center / 40, 1.0)) + gray[:, x] = brightness + + search_region = gray[:, :200] + result = _detect_spine_shadow(gray, search_region, 0, w, "left") + # Should find the spine near x=30 + assert result is not None + assert 20 <= result <= 40 + + def test_rejects_text_content_edge(self): + """Sharp text edge (white margin → dense text) should NOT trigger.""" + h, w = 500, 800 + gray = np.full((h, w), 240, dtype=np.uint8) + # Simulate text content starting at x=60: columns 60+ have + # alternating bright/dark rows (text lines) → mean ~170 + for x in range(60, 200): + for y_start in range(0, h, 20): + gray[y_start:min(y_start + 8, h), x] = 30 # text line + + search_region = gray[:, :200] + result = _detect_spine_shadow(gray, search_region, 0, w, "left") + # Should NOT detect a spine — this is text content, not a shadow + assert result is None + + def test_rejects_uniform_region(self): + """Uniform brightness region (no shadow) should NOT trigger.""" + h, w = 500, 800 + gray = np.full((h, w), 230, dtype=np.uint8) + search_region = gray[:, :200] + result = _detect_spine_shadow(gray, search_region, 0, w, "left") + assert result is None + + def test_rejects_bright_minimum(self): + """Region where darkest column is still bright (>180) should NOT trigger.""" + h, w = 500, 800 + gray = np.full((h, w), 240, dtype=np.uint8) + # Slight variation but everything stays bright + gray[:, 50:80] = 195 + search_region = gray[:, :200] + result = _detect_spine_shadow(gray, search_region, 0, w, "left") + assert result is None + + def test_right_side_spine(self): + """V-shaped spine shadow in right search region should be detected.""" + h, w = 500, 800 + gray = np.full((h, w), 240, dtype=np.uint8) + # Spine shadow at x=750 (right side) + for x in range(680, 800): + dist_from_center = abs(x - 750) + brightness = int(80 + (240 - 80) * min(dist_from_center / 40, 1.0)) + gray[:, x] = brightness + + right_start = w - w // 4 # 600 + search_region = gray[:, right_start:] + result = _detect_spine_shadow(gray, search_region, right_start, w, "right") + assert result is not None + assert 740 <= result <= 760 + + class TestDetectLeftEdgeShadow: def test_detects_shadow_gradient(self): - """Synthetic image with left-side shadow gradient.""" + """Synthetic image with left-side V-shaped shadow gradient.""" h, w = 500, 400 - gray = np.full((h, w), 255, dtype=np.uint8) + gray = np.full((h, w), 240, dtype=np.uint8) binary = np.zeros((h, w), dtype=np.uint8) - # Shadow: left 15% gradually darkens - shadow_w = w * 15 // 100 - for x in range(shadow_w): - brightness = int(50 + (255 - 50) * x / shadow_w) + # V-shaped shadow: center at x=20, dips to brightness 60 + shadow_center = 20 + shadow_half_w = 30 + for x in range(shadow_center + shadow_half_w): + dist = abs(x - shadow_center) + brightness = int(60 + (240 - 60) * min(dist / shadow_half_w, 1.0)) gray[:, x] = brightness # Content starts after shadow - binary[:, shadow_w + 10:w - 10] = 255 + binary[:, shadow_center + shadow_half_w + 10:w - 10] = 255 edge = _detect_left_edge_shadow(gray, binary, w, h) - # Edge should be within the shadow transition zone - # The 60% threshold fires before the actual shadow boundary - assert 0 < edge < shadow_w + 20 + # Edge should be near the spine center (x~20) + assert 10 <= edge <= 35 def test_no_shadow_uses_binary_fallback(self): """When shadow range is small, falls back to binary projection.""" @@ -223,6 +299,22 @@ class TestDetectLeftEdgeShadow: # Should find content start via projection fallback (near x=80) assert edge <= 85 + def test_text_content_uses_binary_fallback(self): + """Dense text in left region should NOT trigger spine detection.""" + h, w = 500, 800 + gray = np.full((h, w), 240, dtype=np.uint8) + binary = np.zeros((h, w), dtype=np.uint8) + + # Simulate text content from x=50 onward + for x in range(50, w - 20): + for y_start in range(20, h - 20, 20): + gray[y_start:min(y_start + 8, h), x] = 30 + binary[y_start:min(y_start + 8, h), x] = 255 + + edge = _detect_left_edge_shadow(gray, binary, w, h) + # Should use binary fallback and find content at ~x=50 + assert 40 <= edge <= 60 + # --------------------------------------------------------------------------- # Tests: detect_and_crop_page (end-to-end) @@ -238,18 +330,16 @@ class TestDetectAndCropPage: assert result["cropped_size"] == {"width": 80, "height": 100} def test_crops_white_borders(self): - """Image with wide white borders around dark content.""" + """Image with white borders around dark content.""" h, w = 400, 300 - img = _make_image_with_content(h, w, (80, 320, 60, 240)) + # Content area big enough to pass the 40% sanity check + img = _make_image_with_content(h, w, (40, 360, 30, 270)) cropped, result = detect_and_crop_page(img) assert result["crop_applied"] # Cropped size should be close to the content area (with margin) assert result["cropped_size"]["width"] < w assert result["cropped_size"]["height"] < h - # Content should be roughly 180x240 + margins (adaptive threshold may widen slightly) - assert 160 <= result["cropped_size"]["width"] <= 260 - assert 220 <= result["cropped_size"]["height"] <= 300 def test_book_scan_detects_spine_shadow(self): """Synthetic book scan with spine shadow on left.""" @@ -258,8 +348,8 @@ class TestDetectAndCropPage: # Should crop the spine shadow area left_border = result["border_fractions"]["left"] - # Spine shadow is ~10% of width, plus some margin - assert left_border > 0.05 # At least 5% left border detected + # Spine center is at ~5% of width, so left border should be >= 4% + assert left_border >= 0.04 # At least 4% left border detected def test_sanity_check_too_small_crop(self): """If detected content area is too small, skip crop."""