Cut at spine center (darkest point) instead of shadow edge
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 2m5s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 18s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 2m5s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 18s
Refactor left/right shadow detection into shared _detect_spine_shadow() that finds the darkest column (= book spine center) via argmin of smoothed brightness. Both sides now cut at the spine center, ensuring equal page sizes in double-page scans regardless of shadow position. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -278,24 +278,31 @@ def detect_and_crop_page(
|
||||
# Edge detection helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _detect_left_edge_shadow(
|
||||
def _detect_spine_shadow(
|
||||
gray: np.ndarray,
|
||||
binary: np.ndarray,
|
||||
search_region: np.ndarray,
|
||||
offset_x: int,
|
||||
w: int,
|
||||
h: int,
|
||||
) -> int:
|
||||
"""Detect left content edge, accounting for book-spine shadow.
|
||||
side: str,
|
||||
) -> Optional[int]:
|
||||
"""Find the book spine center (darkest point) in a scanner shadow.
|
||||
|
||||
Strategy: look at the left 25% of the image.
|
||||
1. Compute column-mean brightness in grayscale.
|
||||
2. Smooth with a boxcar kernel.
|
||||
3. Find the transition from shadow (dark) to page (bright).
|
||||
4. Fallback: use binary vertical projection if no shadow detected.
|
||||
The scanner produces a gray strip where the book spine presses against
|
||||
the glass. The darkest column in that strip is the spine center —
|
||||
that's where we crop.
|
||||
|
||||
Args:
|
||||
gray: Full grayscale image (for context).
|
||||
search_region: Column slice of the grayscale image to search in.
|
||||
offset_x: X offset of search_region relative to full image.
|
||||
w: Full image width.
|
||||
side: 'left' or 'right' (for logging).
|
||||
|
||||
Returns:
|
||||
X coordinate (in full image) of the spine center, or None.
|
||||
"""
|
||||
search_w = max(1, w // 4)
|
||||
|
||||
# Column-mean brightness in the left quarter
|
||||
col_means = np.mean(gray[:, :search_w], axis=0).astype(np.float64)
|
||||
# Column-mean brightness in the search region
|
||||
col_means = np.mean(search_region, axis=0).astype(np.float64)
|
||||
|
||||
# Smooth with boxcar kernel (width = 1% of image width, min 5)
|
||||
kernel_size = max(5, w // 100)
|
||||
@@ -304,20 +311,40 @@ def _detect_left_edge_shadow(
|
||||
kernel = np.ones(kernel_size) / kernel_size
|
||||
smoothed = np.convolve(col_means, kernel, mode="same")
|
||||
|
||||
# Determine brightness threshold: midpoint between darkest and brightest
|
||||
val_min = float(np.min(smoothed))
|
||||
val_max = float(np.max(smoothed))
|
||||
shadow_range = val_max - val_min
|
||||
|
||||
# Only use shadow detection if there is a meaningful brightness gradient (> 20 levels)
|
||||
if shadow_range > 20:
|
||||
threshold = val_min + shadow_range * 0.6
|
||||
# Find first column where brightness exceeds threshold
|
||||
above = np.where(smoothed >= threshold)[0]
|
||||
if len(above) > 0:
|
||||
shadow_edge = int(above[0])
|
||||
logger.debug("Left edge: shadow detected at x=%d (range=%.0f)", shadow_edge, shadow_range)
|
||||
return shadow_edge
|
||||
# Only detect if meaningful brightness gradient (> 20 levels)
|
||||
if shadow_range <= 20:
|
||||
return None
|
||||
|
||||
# The darkest column is the spine center — crop exactly there
|
||||
spine_local = int(np.argmin(smoothed))
|
||||
spine_x = offset_x + spine_local
|
||||
|
||||
logger.debug(
|
||||
"%s edge: spine center at x=%d (brightness=%.0f, range=%.0f)",
|
||||
side.capitalize(), spine_x, val_min, shadow_range,
|
||||
)
|
||||
return spine_x
|
||||
|
||||
|
||||
def _detect_left_edge_shadow(
|
||||
gray: np.ndarray,
|
||||
binary: np.ndarray,
|
||||
w: int,
|
||||
h: int,
|
||||
) -> int:
|
||||
"""Detect left content edge, accounting for book-spine shadow.
|
||||
|
||||
Looks at the left 25% for a scanner gray strip. Cuts at the
|
||||
darkest column (= spine center). Fallback: binary projection.
|
||||
"""
|
||||
search_w = max(1, w // 4)
|
||||
spine_x = _detect_spine_shadow(gray, gray[:, :search_w], 0, w, "left")
|
||||
if spine_x is not None:
|
||||
return spine_x
|
||||
|
||||
# Fallback: binary vertical projection
|
||||
return _detect_edge_projection(binary, axis=0, from_start=True, dim=w)
|
||||
@@ -331,40 +358,14 @@ def _detect_right_edge_shadow(
|
||||
) -> int:
|
||||
"""Detect right content edge, accounting for book-spine shadow.
|
||||
|
||||
Mirror of _detect_left_edge_shadow: look at the right 25% of the image
|
||||
for a brightness dip (scanner gray strip at book spine).
|
||||
The darkest point in the gradient marks the spine center; crop there.
|
||||
Looks at the right 25% for a scanner gray strip. Cuts at the
|
||||
darkest column (= spine center). Fallback: binary projection.
|
||||
"""
|
||||
search_w = max(1, w // 4)
|
||||
right_start = w - search_w
|
||||
|
||||
# Column-mean brightness in the right quarter
|
||||
col_means = np.mean(gray[:, right_start:], axis=0).astype(np.float64)
|
||||
|
||||
# Smooth with boxcar kernel (width = 1% of image width, min 5)
|
||||
kernel_size = max(5, w // 100)
|
||||
if kernel_size % 2 == 0:
|
||||
kernel_size += 1
|
||||
kernel = np.ones(kernel_size) / kernel_size
|
||||
smoothed = np.convolve(col_means, kernel, mode="same")
|
||||
|
||||
# Determine brightness threshold: midpoint between darkest and brightest
|
||||
val_min = float(np.min(smoothed))
|
||||
val_max = float(np.max(smoothed))
|
||||
shadow_range = val_max - val_min
|
||||
|
||||
# Only use shadow detection if there is a meaningful brightness gradient (> 20 levels)
|
||||
if shadow_range > 20:
|
||||
threshold = val_min + shadow_range * 0.6
|
||||
# Find LAST column (from right) where brightness exceeds threshold
|
||||
# = first column from right that drops below threshold marks the spine
|
||||
above = np.where(smoothed >= threshold)[0]
|
||||
if len(above) > 0:
|
||||
# The last bright column before it drops into shadow
|
||||
shadow_edge = right_start + int(above[-1])
|
||||
logger.debug("Right edge: shadow detected at x=%d (range=%.0f)",
|
||||
shadow_edge, shadow_range)
|
||||
return shadow_edge
|
||||
spine_x = _detect_spine_shadow(gray, gray[:, right_start:], right_start, w, "right")
|
||||
if spine_x is not None:
|
||||
return spine_x
|
||||
|
||||
# Fallback: binary vertical projection
|
||||
return _detect_edge_projection(binary, axis=0, from_start=False, dim=w)
|
||||
|
||||
Reference in New Issue
Block a user