""" Page Crop - Edge Detection Helpers Spine shadow detection, gutter continuity analysis, projection-based edge detection, and narrow-run filtering for content cropping. Extracted from page_crop.py to keep files under 500 LOC. License: Apache 2.0 """ import logging from typing import Optional, Tuple import cv2 import numpy as np logger = logging.getLogger(__name__) # Minimum ink density (fraction of pixels) to count a row/column as "content" _INK_THRESHOLD = 0.003 # 0.3% # Minimum run length (fraction of dimension) to keep — shorter runs are noise _MIN_RUN_FRAC = 0.005 # 0.5% def _detect_spine_shadow( gray: np.ndarray, search_region: np.ndarray, offset_x: int, w: int, side: str, ) -> Optional[int]: """Find the book spine center (darkest point) in a scanner shadow. The scanner produces a gray strip where the book spine presses against the glass. The darkest column in that strip is the spine center — that's where we crop. Distinguishes real spine shadows from text content by checking: 1. Strong brightness range (> 40 levels) 2. Darkest point is genuinely dark (< 180 mean brightness) 3. The dark area is a NARROW valley, not a text-content plateau 4. Brightness rises significantly toward the page content side Args: gray: Full grayscale image (for context). search_region: Column slice of the grayscale image to search in. offset_x: X offset of search_region relative to full image. w: Full image width. side: 'left' or 'right' (for logging). Returns: X coordinate (in full image) of the spine center, or None. """ region_w = search_region.shape[1] if region_w < 10: return None # Column-mean brightness in the search region col_means = np.mean(search_region, axis=0).astype(np.float64) # Smooth with boxcar kernel (width = 1% of image width, min 5) kernel_size = max(5, w // 100) if kernel_size % 2 == 0: kernel_size += 1 kernel = np.ones(kernel_size) / kernel_size smoothed_raw = np.convolve(col_means, kernel, mode="same") # Trim convolution edge artifacts (edges are zero-padded -> artificially low) margin = kernel_size // 2 if region_w <= 2 * margin + 10: return None smoothed = smoothed_raw[margin:region_w - margin] trim_offset = margin # offset of smoothed[0] relative to search_region val_min = float(np.min(smoothed)) val_max = float(np.max(smoothed)) shadow_range = val_max - val_min # --- Check 1: Strong brightness gradient --- if shadow_range <= 40: logger.debug( "%s edge: no spine (range=%.0f <= 40)", side.capitalize(), shadow_range, ) return None # --- Check 2: Darkest point must be genuinely dark --- if val_min > 180: logger.debug( "%s edge: no spine (darkest=%.0f > 180, likely text)", side.capitalize(), val_min, ) return None spine_idx = int(np.argmin(smoothed)) # index in trimmed array spine_local = spine_idx + trim_offset # index in search_region trimmed_len = len(smoothed) # --- Check 3: Valley width (spine is narrow, text plateau is wide) --- valley_thresh = val_min + shadow_range * 0.20 valley_mask = smoothed < valley_thresh valley_width = int(np.sum(valley_mask)) max_valley_frac = 0.50 if valley_width > trimmed_len * max_valley_frac: logger.debug( "%s edge: no spine (valley too wide: %d/%d = %.0f%%)", side.capitalize(), valley_width, trimmed_len, 100.0 * valley_width / trimmed_len, ) return None # --- Check 4: Brightness must rise toward page content --- rise_check_w = max(5, trimmed_len // 5) if side == "left": right_start = min(spine_idx + 5, trimmed_len - 1) right_end = min(right_start + rise_check_w, trimmed_len) if right_end > right_start: rise_brightness = float(np.mean(smoothed[right_start:right_end])) rise = rise_brightness - val_min if rise < shadow_range * 0.3: logger.debug( "%s edge: no spine (insufficient rise: %.0f, need %.0f)", side.capitalize(), rise, shadow_range * 0.3, ) return None else: # right left_end = max(spine_idx - 5, 0) left_start = max(left_end - rise_check_w, 0) if left_end > left_start: rise_brightness = float(np.mean(smoothed[left_start:left_end])) rise = rise_brightness - val_min if rise < shadow_range * 0.3: logger.debug( "%s edge: no spine (insufficient rise: %.0f, need %.0f)", side.capitalize(), rise, shadow_range * 0.3, ) return None spine_x = offset_x + spine_local logger.info( "%s edge: spine center at x=%d (brightness=%.0f, range=%.0f, valley=%dpx)", side.capitalize(), spine_x, val_min, shadow_range, valley_width, ) return spine_x def _detect_gutter_continuity( gray: np.ndarray, search_region: np.ndarray, offset_x: int, w: int, side: str, ) -> Optional[int]: """Detect gutter shadow via vertical continuity analysis. Camera book scans produce a subtle brightness gradient at the gutter that is too faint for scanner-shadow detection (range < 40). However, the gutter shadow has a unique property: it runs **continuously from top to bottom** without interruption. Algorithm: 1. Divide image into N horizontal strips (~60px each) 2. For each column, compute what fraction of strips are darker than the page median (from the center 50% of the full image) 3. A "gutter column" has >= 75% of strips darker than page_median - d 4. Smooth the dark-fraction profile and find the transition point 5. Validate: gutter band must be 0.5%-10% of image width """ region_h, region_w = search_region.shape[:2] if region_w < 20 or region_h < 100: return None # --- 1. Divide into horizontal strips --- strip_target_h = 60 n_strips = max(10, region_h // strip_target_h) strip_h = region_h // n_strips strip_means = np.zeros((n_strips, region_w), dtype=np.float64) for s in range(n_strips): y0 = s * strip_h y1 = min((s + 1) * strip_h, region_h) strip_means[s] = np.mean(search_region[y0:y1, :], axis=0) # --- 2. Page median from center 50% of full image --- center_lo = w // 4 center_hi = 3 * w // 4 page_median = float(np.median(gray[:, center_lo:center_hi])) dark_thresh = page_median - 5.0 if page_median < 180: return None # --- 3. Per-column dark fraction --- dark_count = np.sum(strip_means < dark_thresh, axis=0).astype(np.float64) dark_frac = dark_count / n_strips # --- 4. Smooth and find transition --- smooth_w = max(5, w // 100) if smooth_w % 2 == 0: smooth_w += 1 kernel = np.ones(smooth_w) / smooth_w frac_smooth = np.convolve(dark_frac, kernel, mode="same") margin = smooth_w // 2 if region_w <= 2 * margin + 10: return None transition_thresh = 0.50 peak_frac = float(np.max(frac_smooth[margin:region_w - margin])) if peak_frac < 0.70: logger.debug( "%s gutter: peak dark fraction %.2f < 0.70", side.capitalize(), peak_frac, ) return None peak_x = int(np.argmax(frac_smooth[margin:region_w - margin])) + margin gutter_inner = None if side == "right": for x in range(peak_x, margin, -1): if frac_smooth[x] < transition_thresh: gutter_inner = x + 1 break else: for x in range(peak_x, region_w - margin): if frac_smooth[x] < transition_thresh: gutter_inner = x - 1 break if gutter_inner is None: return None # --- 5. Validate gutter width --- if side == "right": gutter_width = region_w - gutter_inner else: gutter_width = gutter_inner min_gutter = max(3, int(w * 0.005)) max_gutter = int(w * 0.10) if gutter_width < min_gutter: logger.debug( "%s gutter: too narrow (%dpx < %dpx)", side.capitalize(), gutter_width, min_gutter, ) return None if gutter_width > max_gutter: logger.debug( "%s gutter: too wide (%dpx > %dpx)", side.capitalize(), gutter_width, max_gutter, ) return None if side == "right": gutter_brightness = float(np.mean(strip_means[:, gutter_inner:])) else: gutter_brightness = float(np.mean(strip_means[:, :gutter_inner])) brightness_drop = page_median - gutter_brightness if brightness_drop < 3: logger.debug( "%s gutter: insufficient brightness drop (%.1f levels)", side.capitalize(), brightness_drop, ) return None gutter_x = offset_x + gutter_inner logger.info( "%s gutter (continuity): x=%d, width=%dpx (%.1f%%), " "brightness=%.0f vs page=%.0f (drop=%.0f), frac@edge=%.2f", side.capitalize(), gutter_x, gutter_width, 100.0 * gutter_width / w, gutter_brightness, page_median, brightness_drop, float(frac_smooth[gutter_inner]), ) return gutter_x def _detect_left_edge_shadow( gray: np.ndarray, binary: np.ndarray, w: int, h: int, ) -> int: """Detect left content edge, accounting for book-spine shadow. Tries three methods in order: 1. Scanner spine-shadow (dark gradient, range > 40) 2. Camera gutter continuity (subtle shadow running top-to-bottom) 3. Binary projection fallback (first ink column) """ search_w = max(1, w // 4) spine_x = _detect_spine_shadow(gray, gray[:, :search_w], 0, w, "left") if spine_x is not None: return spine_x gutter_x = _detect_gutter_continuity(gray, gray[:, :search_w], 0, w, "left") if gutter_x is not None: return gutter_x return _detect_edge_projection(binary, axis=0, from_start=True, dim=w) def _detect_right_edge_shadow( gray: np.ndarray, binary: np.ndarray, w: int, h: int, ) -> int: """Detect right content edge, accounting for book-spine shadow. Tries three methods in order: 1. Scanner spine-shadow (dark gradient, range > 40) 2. Camera gutter continuity (subtle shadow running top-to-bottom) 3. Binary projection fallback (last ink column) """ search_w = max(1, w // 4) right_start = w - search_w spine_x = _detect_spine_shadow(gray, gray[:, right_start:], right_start, w, "right") if spine_x is not None: return spine_x gutter_x = _detect_gutter_continuity(gray, gray[:, right_start:], right_start, w, "right") if gutter_x is not None: return gutter_x return _detect_edge_projection(binary, axis=0, from_start=False, dim=w) def _detect_top_bottom_edges(binary: np.ndarray, w: int, h: int) -> Tuple[int, int]: """Detect top and bottom content edges via binary horizontal projection.""" top = _detect_edge_projection(binary, axis=1, from_start=True, dim=h) bottom = _detect_edge_projection(binary, axis=1, from_start=False, dim=h) return top, bottom def _detect_edge_projection( binary: np.ndarray, axis: int, from_start: bool, dim: int, ) -> int: """Find the first/last row or column with ink density above threshold. axis=0 -> project vertically (column densities) -> returns x position axis=1 -> project horizontally (row densities) -> returns y position Filters out narrow noise runs shorter than _MIN_RUN_FRAC of the dimension. """ projection = np.mean(binary, axis=axis) / 255.0 ink_mask = projection >= _INK_THRESHOLD min_run = max(1, int(dim * _MIN_RUN_FRAC)) ink_mask = _filter_narrow_runs(ink_mask, min_run) ink_positions = np.where(ink_mask)[0] if len(ink_positions) == 0: return 0 if from_start else dim if from_start: return int(ink_positions[0]) else: return int(ink_positions[-1]) def _filter_narrow_runs(mask: np.ndarray, min_run: int) -> np.ndarray: """Remove True-runs shorter than min_run pixels.""" if min_run <= 1: return mask result = mask.copy() n = len(result) i = 0 while i < n: if result[i]: start = i while i < n and result[i]: i += 1 if i - start < min_run: result[start:i] = False else: i += 1 return result