breakpilot-lehrer/klausur-service/backend/page_crop_edges.py

"""
Page Crop - Edge Detection Helpers

Spine shadow detection, gutter continuity analysis, projection-based
edge detection, and narrow-run filtering for content cropping.

Extracted from page_crop.py to keep files under 500 LOC.
License: Apache 2.0
"""

import logging
from typing import Optional, Tuple

import cv2
import numpy as np

logger = logging.getLogger(__name__)

# Minimum ink density (fraction of pixels) to count a row/column as "content"
_INK_THRESHOLD = 0.003  # 0.3%

# Minimum run length (fraction of dimension) to keep — shorter runs are noise
_MIN_RUN_FRAC = 0.005  # 0.5%


def _detect_spine_shadow(
    gray: np.ndarray,
    search_region: np.ndarray,
    offset_x: int,
    w: int,
    side: str,
) -> Optional[int]:
    """Find the book spine center (darkest point) in a scanner shadow.

    The scanner produces a gray strip where the book spine presses against
    the glass.  The darkest column in that strip is the spine center —
    that's where we crop.

    Distinguishes real spine shadows from text content by checking:
    1. Strong brightness range (> 40 levels)
    2. Darkest point is genuinely dark (< 180 mean brightness)
    3. The dark area is a NARROW valley, not a text-content plateau
    4. Brightness rises significantly toward the page content side

    Args:
        gray: Full grayscale image (for context).
        search_region: Column slice of the grayscale image to search in.
        offset_x: X offset of search_region relative to full image.
        w: Full image width.
        side: 'left' or 'right' (for logging).

    Returns:
        X coordinate (in full image) of the spine center, or None.
    """
    region_w = search_region.shape[1]
    if region_w < 10:
        return None

    # Column-mean brightness in the search region
    col_means = np.mean(search_region, axis=0).astype(np.float64)

    # Smooth with boxcar kernel (width = 1% of image width, min 5)
    kernel_size = max(5, w // 100)
    if kernel_size % 2 == 0:
        kernel_size += 1
    kernel = np.ones(kernel_size) / kernel_size
    smoothed_raw = np.convolve(col_means, kernel, mode="same")

    # Trim convolution edge artifacts (edges are zero-padded -> artificially low)
    margin = kernel_size // 2
    if region_w <= 2 * margin + 10:
        return None
    smoothed = smoothed_raw[margin:region_w - margin]
    trim_offset = margin  # offset of smoothed[0] relative to search_region

    val_min = float(np.min(smoothed))
    val_max = float(np.max(smoothed))
    shadow_range = val_max - val_min

    # --- Check 1: Strong brightness gradient ---
    if shadow_range <= 40:
        logger.debug(
            "%s edge: no spine (range=%.0f <= 40)", side.capitalize(), shadow_range,
        )
        return None

    # --- Check 2: Darkest point must be genuinely dark ---
    if val_min > 180:
        logger.debug(
            "%s edge: no spine (darkest=%.0f > 180, likely text)", side.capitalize(), val_min,
        )
        return None

    spine_idx = int(np.argmin(smoothed))  # index in trimmed array
    spine_local = spine_idx + trim_offset  # index in search_region
    trimmed_len = len(smoothed)

    # --- Check 3: Valley width (spine is narrow, text plateau is wide) ---
    valley_thresh = val_min + shadow_range * 0.20
    valley_mask = smoothed < valley_thresh
    valley_width = int(np.sum(valley_mask))
    max_valley_frac = 0.50
    if valley_width > trimmed_len * max_valley_frac:
        logger.debug(
            "%s edge: no spine (valley too wide: %d/%d = %.0f%%)",
            side.capitalize(), valley_width, trimmed_len,
            100.0 * valley_width / trimmed_len,
        )
        return None

    # --- Check 4: Brightness must rise toward page content ---
    rise_check_w = max(5, trimmed_len // 5)
    if side == "left":
        right_start = min(spine_idx + 5, trimmed_len - 1)
        right_end = min(right_start + rise_check_w, trimmed_len)
        if right_end > right_start:
            rise_brightness = float(np.mean(smoothed[right_start:right_end]))
            rise = rise_brightness - val_min
            if rise < shadow_range * 0.3:
                logger.debug(
                    "%s edge: no spine (insufficient rise: %.0f, need %.0f)",
                    side.capitalize(), rise, shadow_range * 0.3,
                )
                return None
    else:  # right
        left_end = max(spine_idx - 5, 0)
        left_start = max(left_end - rise_check_w, 0)
        if left_end > left_start:
            rise_brightness = float(np.mean(smoothed[left_start:left_end]))
            rise = rise_brightness - val_min
            if rise < shadow_range * 0.3:
                logger.debug(
                    "%s edge: no spine (insufficient rise: %.0f, need %.0f)",
                    side.capitalize(), rise, shadow_range * 0.3,
                )
                return None

    spine_x = offset_x + spine_local

    logger.info(
        "%s edge: spine center at x=%d (brightness=%.0f, range=%.0f, valley=%dpx)",
        side.capitalize(), spine_x, val_min, shadow_range, valley_width,
    )
    return spine_x


def _detect_gutter_continuity(
    gray: np.ndarray,
    search_region: np.ndarray,
    offset_x: int,
    w: int,
    side: str,
) -> Optional[int]:
    """Detect gutter shadow via vertical continuity analysis.

    Camera book scans produce a subtle brightness gradient at the gutter
    that is too faint for scanner-shadow detection (range < 40).  However,
    the gutter shadow has a unique property: it runs **continuously from
    top to bottom** without interruption.

    Algorithm:
    1. Divide image into N horizontal strips (~60px each)
    2. For each column, compute what fraction of strips are darker than
       the page median (from the center 50% of the full image)
    3. A "gutter column" has >= 75% of strips darker than page_median - d
    4. Smooth the dark-fraction profile and find the transition point
    5. Validate: gutter band must be 0.5%-10% of image width
    """
    region_h, region_w = search_region.shape[:2]
    if region_w < 20 or region_h < 100:
        return None

    # --- 1. Divide into horizontal strips ---
    strip_target_h = 60
    n_strips = max(10, region_h // strip_target_h)
    strip_h = region_h // n_strips

    strip_means = np.zeros((n_strips, region_w), dtype=np.float64)
    for s in range(n_strips):
        y0 = s * strip_h
        y1 = min((s + 1) * strip_h, region_h)
        strip_means[s] = np.mean(search_region[y0:y1, :], axis=0)

    # --- 2. Page median from center 50% of full image ---
    center_lo = w // 4
    center_hi = 3 * w // 4
    page_median = float(np.median(gray[:, center_lo:center_hi]))

    dark_thresh = page_median - 5.0

    if page_median < 180:
        return None

    # --- 3. Per-column dark fraction ---
    dark_count = np.sum(strip_means < dark_thresh, axis=0).astype(np.float64)
    dark_frac = dark_count / n_strips

    # --- 4. Smooth and find transition ---
    smooth_w = max(5, w // 100)
    if smooth_w % 2 == 0:
        smooth_w += 1
    kernel = np.ones(smooth_w) / smooth_w
    frac_smooth = np.convolve(dark_frac, kernel, mode="same")

    margin = smooth_w // 2
    if region_w <= 2 * margin + 10:
        return None

    transition_thresh = 0.50
    peak_frac = float(np.max(frac_smooth[margin:region_w - margin]))

    if peak_frac < 0.70:
        logger.debug(
            "%s gutter: peak dark fraction %.2f < 0.70", side.capitalize(), peak_frac,
        )
        return None

    peak_x = int(np.argmax(frac_smooth[margin:region_w - margin])) + margin
    gutter_inner = None

    if side == "right":
        for x in range(peak_x, margin, -1):
            if frac_smooth[x] < transition_thresh:
                gutter_inner = x + 1
                break
    else:
        for x in range(peak_x, region_w - margin):
            if frac_smooth[x] < transition_thresh:
                gutter_inner = x - 1
                break

    if gutter_inner is None:
        return None

    # --- 5. Validate gutter width ---
    if side == "right":
        gutter_width = region_w - gutter_inner
    else:
        gutter_width = gutter_inner

    min_gutter = max(3, int(w * 0.005))
    max_gutter = int(w * 0.10)

    if gutter_width < min_gutter:
        logger.debug(
            "%s gutter: too narrow (%dpx < %dpx)", side.capitalize(),
            gutter_width, min_gutter,
        )
        return None

    if gutter_width > max_gutter:
        logger.debug(
            "%s gutter: too wide (%dpx > %dpx)", side.capitalize(),
            gutter_width, max_gutter,
        )
        return None

    if side == "right":
        gutter_brightness = float(np.mean(strip_means[:, gutter_inner:]))
    else:
        gutter_brightness = float(np.mean(strip_means[:, :gutter_inner]))

    brightness_drop = page_median - gutter_brightness
    if brightness_drop < 3:
        logger.debug(
            "%s gutter: insufficient brightness drop (%.1f levels)",
            side.capitalize(), brightness_drop,
        )
        return None

    gutter_x = offset_x + gutter_inner

    logger.info(
        "%s gutter (continuity): x=%d, width=%dpx (%.1f%%), "
        "brightness=%.0f vs page=%.0f (drop=%.0f), frac@edge=%.2f",
        side.capitalize(), gutter_x, gutter_width,
        100.0 * gutter_width / w, gutter_brightness, page_median,
        brightness_drop, float(frac_smooth[gutter_inner]),
    )
    return gutter_x


def _detect_left_edge_shadow(
    gray: np.ndarray,
    binary: np.ndarray,
    w: int,
    h: int,
) -> int:
    """Detect left content edge, accounting for book-spine shadow.

    Tries three methods in order:
    1. Scanner spine-shadow (dark gradient, range > 40)
    2. Camera gutter continuity (subtle shadow running top-to-bottom)
    3. Binary projection fallback (first ink column)
    """
    search_w = max(1, w // 4)
    spine_x = _detect_spine_shadow(gray, gray[:, :search_w], 0, w, "left")
    if spine_x is not None:
        return spine_x

    gutter_x = _detect_gutter_continuity(gray, gray[:, :search_w], 0, w, "left")
    if gutter_x is not None:
        return gutter_x

    return _detect_edge_projection(binary, axis=0, from_start=True, dim=w)


def _detect_right_edge_shadow(
    gray: np.ndarray,
    binary: np.ndarray,
    w: int,
    h: int,
) -> int:
    """Detect right content edge, accounting for book-spine shadow.

    Tries three methods in order:
    1. Scanner spine-shadow (dark gradient, range > 40)
    2. Camera gutter continuity (subtle shadow running top-to-bottom)
    3. Binary projection fallback (last ink column)
    """
    search_w = max(1, w // 4)
    right_start = w - search_w
    spine_x = _detect_spine_shadow(gray, gray[:, right_start:], right_start, w, "right")
    if spine_x is not None:
        return spine_x

    gutter_x = _detect_gutter_continuity(gray, gray[:, right_start:], right_start, w, "right")
    if gutter_x is not None:
        return gutter_x

    return _detect_edge_projection(binary, axis=0, from_start=False, dim=w)


def _detect_top_bottom_edges(binary: np.ndarray, w: int, h: int) -> Tuple[int, int]:
    """Detect top and bottom content edges via binary horizontal projection."""
    top = _detect_edge_projection(binary, axis=1, from_start=True, dim=h)
    bottom = _detect_edge_projection(binary, axis=1, from_start=False, dim=h)
    return top, bottom


def _detect_edge_projection(
    binary: np.ndarray,
    axis: int,
    from_start: bool,
    dim: int,
) -> int:
    """Find the first/last row or column with ink density above threshold.

    axis=0 -> project vertically (column densities) -> returns x position
    axis=1 -> project horizontally (row densities) -> returns y position

    Filters out narrow noise runs shorter than _MIN_RUN_FRAC of the dimension.
    """
    projection = np.mean(binary, axis=axis) / 255.0

    ink_mask = projection >= _INK_THRESHOLD

    min_run = max(1, int(dim * _MIN_RUN_FRAC))
    ink_mask = _filter_narrow_runs(ink_mask, min_run)

    ink_positions = np.where(ink_mask)[0]
    if len(ink_positions) == 0:
        return 0 if from_start else dim

    if from_start:
        return int(ink_positions[0])
    else:
        return int(ink_positions[-1])


def _filter_narrow_runs(mask: np.ndarray, min_run: int) -> np.ndarray:
    """Remove True-runs shorter than min_run pixels."""
    if min_run <= 1:
        return mask

    result = mask.copy()
    n = len(result)
    i = 0
    while i < n:
        if result[i]:
            start = i
            while i < n and result[i]:
                i += 1
            if i - start < min_run:
                result[start:i] = False
        else:
            i += 1
    return result