Fix spine shadow false positives: require dark valley, brightness rise, trim convolution edges
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 25s
CI / test-go-edu-search (push) Successful in 25s
CI / test-python-klausur (push) Failing after 1m54s
CI / test-python-agent-core (push) Successful in 18s
CI / test-nodejs-website (push) Successful in 16s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 25s
CI / test-go-edu-search (push) Successful in 25s
CI / test-python-klausur (push) Failing after 1m54s
CI / test-python-agent-core (push) Successful in 18s
CI / test-nodejs-website (push) Successful in 16s
The _detect_spine_shadow function was triggering on normal text content because shadow_range > 20 was too low and convolution edge artifacts created artificially low values. Now requires: range > 40, darkest < 180, narrow valley (not text plateau), and brightness rise toward page content. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -291,6 +291,12 @@ def _detect_spine_shadow(
|
||||
the glass. The darkest column in that strip is the spine center —
|
||||
that's where we crop.
|
||||
|
||||
Distinguishes real spine shadows from text content by checking:
|
||||
1. Strong brightness range (> 40 levels)
|
||||
2. Darkest point is genuinely dark (< 180 mean brightness)
|
||||
3. The dark area is a NARROW valley, not a text-content plateau
|
||||
4. Brightness rises significantly toward the page content side
|
||||
|
||||
Args:
|
||||
gray: Full grayscale image (for context).
|
||||
search_region: Column slice of the grayscale image to search in.
|
||||
@@ -301,6 +307,10 @@ def _detect_spine_shadow(
|
||||
Returns:
|
||||
X coordinate (in full image) of the spine center, or None.
|
||||
"""
|
||||
region_w = search_region.shape[1]
|
||||
if region_w < 10:
|
||||
return None
|
||||
|
||||
# Column-mean brightness in the search region
|
||||
col_means = np.mean(search_region, axis=0).astype(np.float64)
|
||||
|
||||
@@ -309,23 +319,91 @@ def _detect_spine_shadow(
|
||||
if kernel_size % 2 == 0:
|
||||
kernel_size += 1
|
||||
kernel = np.ones(kernel_size) / kernel_size
|
||||
smoothed = np.convolve(col_means, kernel, mode="same")
|
||||
smoothed_raw = np.convolve(col_means, kernel, mode="same")
|
||||
|
||||
# Trim convolution edge artifacts (edges are zero-padded → artificially low)
|
||||
margin = kernel_size // 2
|
||||
if region_w <= 2 * margin + 10:
|
||||
return None
|
||||
smoothed = smoothed_raw[margin:region_w - margin]
|
||||
trim_offset = margin # offset of smoothed[0] relative to search_region
|
||||
|
||||
val_min = float(np.min(smoothed))
|
||||
val_max = float(np.max(smoothed))
|
||||
shadow_range = val_max - val_min
|
||||
|
||||
# Only detect if meaningful brightness gradient (> 20 levels)
|
||||
if shadow_range <= 20:
|
||||
# --- Check 1: Strong brightness gradient ---
|
||||
if shadow_range <= 40:
|
||||
logger.debug(
|
||||
"%s edge: no spine (range=%.0f <= 40)", side.capitalize(), shadow_range,
|
||||
)
|
||||
return None
|
||||
|
||||
# The darkest column is the spine center — crop exactly there
|
||||
spine_local = int(np.argmin(smoothed))
|
||||
# --- Check 2: Darkest point must be genuinely dark ---
|
||||
# Spine shadows have mean column brightness 60-160.
|
||||
# Text on white paper stays above 180.
|
||||
if val_min > 180:
|
||||
logger.debug(
|
||||
"%s edge: no spine (darkest=%.0f > 180, likely text)", side.capitalize(), val_min,
|
||||
)
|
||||
return None
|
||||
|
||||
spine_idx = int(np.argmin(smoothed)) # index in trimmed array
|
||||
spine_local = spine_idx + trim_offset # index in search_region
|
||||
trimmed_len = len(smoothed)
|
||||
|
||||
# --- Check 3: Valley width (spine is narrow, text plateau is wide) ---
|
||||
# Count how many columns are within 20% of the shadow range above the min.
|
||||
valley_thresh = val_min + shadow_range * 0.20
|
||||
valley_mask = smoothed < valley_thresh
|
||||
valley_width = int(np.sum(valley_mask))
|
||||
# Spine valleys are typically 3-15% of image width (20-120px on a 800px image).
|
||||
# Text content plateaus span 20%+ of the search region.
|
||||
max_valley_frac = 0.50 # valley must not cover more than half the trimmed region
|
||||
if valley_width > trimmed_len * max_valley_frac:
|
||||
logger.debug(
|
||||
"%s edge: no spine (valley too wide: %d/%d = %.0f%%)",
|
||||
side.capitalize(), valley_width, trimmed_len,
|
||||
100.0 * valley_width / trimmed_len,
|
||||
)
|
||||
return None
|
||||
|
||||
# --- Check 4: Brightness must rise toward page content ---
|
||||
# For left edge: after spine, brightness should rise (= page paper)
|
||||
# For right edge: before spine, brightness should rise
|
||||
rise_check_w = max(5, trimmed_len // 5) # check 20% of trimmed region
|
||||
if side == "left":
|
||||
# Check columns to the right of the spine (in trimmed array)
|
||||
right_start = min(spine_idx + 5, trimmed_len - 1)
|
||||
right_end = min(right_start + rise_check_w, trimmed_len)
|
||||
if right_end > right_start:
|
||||
rise_brightness = float(np.mean(smoothed[right_start:right_end]))
|
||||
rise = rise_brightness - val_min
|
||||
if rise < shadow_range * 0.3:
|
||||
logger.debug(
|
||||
"%s edge: no spine (insufficient rise: %.0f, need %.0f)",
|
||||
side.capitalize(), rise, shadow_range * 0.3,
|
||||
)
|
||||
return None
|
||||
else: # right
|
||||
# Check columns to the left of the spine (in trimmed array)
|
||||
left_end = max(spine_idx - 5, 0)
|
||||
left_start = max(left_end - rise_check_w, 0)
|
||||
if left_end > left_start:
|
||||
rise_brightness = float(np.mean(smoothed[left_start:left_end]))
|
||||
rise = rise_brightness - val_min
|
||||
if rise < shadow_range * 0.3:
|
||||
logger.debug(
|
||||
"%s edge: no spine (insufficient rise: %.0f, need %.0f)",
|
||||
side.capitalize(), rise, shadow_range * 0.3,
|
||||
)
|
||||
return None
|
||||
|
||||
spine_x = offset_x + spine_local
|
||||
|
||||
logger.debug(
|
||||
"%s edge: spine center at x=%d (brightness=%.0f, range=%.0f)",
|
||||
side.capitalize(), spine_x, val_min, shadow_range,
|
||||
logger.info(
|
||||
"%s edge: spine center at x=%d (brightness=%.0f, range=%.0f, valley=%dpx)",
|
||||
side.capitalize(), spine_x, val_min, shadow_range, valley_width,
|
||||
)
|
||||
return spine_x
|
||||
|
||||
|
||||
Reference in New Issue
Block a user