breakpilot-lehrer/klausur-service/backend/page_crop_core.py

"""
Page Crop - Core Crop and Format Detection

Content-based crop for scanned pages and book scans.  Detects the content
boundary by analysing ink density projections and (for book scans) the
spine shadow gradient.

Extracted from page_crop.py to keep files under 500 LOC.
License: Apache 2.0
"""

import logging
from typing import Dict, Any, Tuple

import cv2
import numpy as np

from page_crop_edges import (
    _detect_left_edge_shadow,
    _detect_right_edge_shadow,
    _detect_top_bottom_edges,
)

logger = logging.getLogger(__name__)

# Known paper format aspect ratios (height / width, portrait orientation)
PAPER_FORMATS = {
    "A4": 297.0 / 210.0,       # 1.4143
    "A5": 210.0 / 148.0,       # 1.4189
    "Letter": 11.0 / 8.5,      # 1.2941
    "Legal": 14.0 / 8.5,       # 1.6471
    "A3": 420.0 / 297.0,       # 1.4141
}


def detect_page_splits(
    img_bgr: np.ndarray,
) -> list:
    """Detect if the image is a multi-page spread and return split rectangles.

    Uses **brightness** (not ink density) to find the spine area:
    the scanner bed produces a characteristic gray strip where pages meet,
    which is darker than the white paper on either side.

    Returns a list of page dicts ``{x, y, width, height, page_index}``
    or an empty list if only one page is detected.
    """
    h, w = img_bgr.shape[:2]

    # Only check landscape-ish images (width > height * 1.15)
    if w < h * 1.15:
        return []

    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)

    # Column-mean brightness (0-255) — the spine is darker (gray scanner bed)
    col_brightness = np.mean(gray, axis=0).astype(np.float64)

    # Heavy smoothing to ignore individual text lines
    kern = max(11, w // 50)
    if kern % 2 == 0:
        kern += 1
    brightness_smooth = np.convolve(col_brightness, np.ones(kern) / kern, mode="same")

    # Page paper is bright (typically > 200), spine/scanner bed is darker
    page_brightness = float(np.max(brightness_smooth))
    if page_brightness < 100:
        return []  # Very dark image, skip

    # Spine threshold: significantly darker than the page
    spine_thresh = page_brightness * 0.88

    # Search in center region (30-70% of width)
    center_lo = int(w * 0.30)
    center_hi = int(w * 0.70)

    # Find the darkest valley in the center region
    center_brightness = brightness_smooth[center_lo:center_hi]
    darkest_val = float(np.min(center_brightness))

    if darkest_val >= spine_thresh:
        logger.debug("No spine detected: min brightness %.0f >= threshold %.0f",
                      darkest_val, spine_thresh)
        return []

    # Find ALL contiguous dark runs in the center region
    is_dark = center_brightness < spine_thresh
    dark_runs: list = []
    run_start = -1
    for i in range(len(is_dark)):
        if is_dark[i]:
            if run_start < 0:
                run_start = i
        else:
            if run_start >= 0:
                dark_runs.append((run_start, i))
                run_start = -1
    if run_start >= 0:
        dark_runs.append((run_start, len(is_dark)))

    # Filter out runs that are too narrow (< 1% of image width)
    min_spine_px = int(w * 0.01)
    dark_runs = [(s, e) for s, e in dark_runs if e - s >= min_spine_px]

    if not dark_runs:
        logger.debug("No dark runs wider than %dpx in center region", min_spine_px)
        return []

    # Score each dark run: prefer centered, dark, narrow valleys
    center_region_len = center_hi - center_lo
    image_center_in_region = (w * 0.5 - center_lo)
    best_score = -1.0
    best_start, best_end = dark_runs[0]

    for rs, re in dark_runs:
        run_width = re - rs
        run_center = (rs + re) / 2.0

        sigma = center_region_len * 0.15
        dist = abs(run_center - image_center_in_region)
        center_factor = float(np.exp(-0.5 * (dist / sigma) ** 2))

        run_brightness = float(np.mean(center_brightness[rs:re]))
        darkness_factor = max(0.0, (spine_thresh - run_brightness) / spine_thresh)

        width_frac = run_width / w
        if width_frac <= 0.05:
            narrowness_bonus = 1.0
        elif width_frac <= 0.15:
            narrowness_bonus = 1.0 - (width_frac - 0.05) / 0.10
        else:
            narrowness_bonus = 0.0

        score = center_factor * darkness_factor * (0.3 + 0.7 * narrowness_bonus)

        logger.debug(
            "Dark run x=%d..%d (w=%d): center_f=%.3f dark_f=%.3f narrow_b=%.3f -> score=%.4f",
            center_lo + rs, center_lo + re, run_width,
            center_factor, darkness_factor, narrowness_bonus, score,
        )

        if score > best_score:
            best_score = score
            best_start, best_end = rs, re

    spine_w = best_end - best_start
    spine_x = center_lo + best_start
    spine_center = spine_x + spine_w // 2

    logger.debug(
        "Best spine candidate: x=%d..%d (w=%d), score=%.4f",
        spine_x, spine_x + spine_w, spine_w, best_score,
    )

    # Verify: must have bright (paper) content on BOTH sides
    left_brightness = float(np.mean(brightness_smooth[max(0, spine_x - w // 10):spine_x]))
    right_end = center_lo + best_end
    right_brightness = float(np.mean(brightness_smooth[right_end:min(w, right_end + w // 10)]))

    if left_brightness < spine_thresh or right_brightness < spine_thresh:
        logger.debug("No bright paper flanking spine: left=%.0f right=%.0f thresh=%.0f",
                      left_brightness, right_brightness, spine_thresh)
        return []

    logger.info(
        "Spine detected: x=%d..%d (w=%d), brightness=%.0f vs paper=%.0f, "
        "left_paper=%.0f, right_paper=%.0f",
        spine_x, right_end, spine_w, darkest_val, page_brightness,
        left_brightness, right_brightness,
    )

    # Split at the spine center
    split_points = [spine_center]

    # Build page rectangles
    pages: list = []
    prev_x = 0
    for i, sx in enumerate(split_points):
        pages.append({"x": prev_x, "y": 0, "width": sx - prev_x,
                       "height": h, "page_index": i})
        prev_x = sx
    pages.append({"x": prev_x, "y": 0, "width": w - prev_x,
                   "height": h, "page_index": len(split_points)})

    # Filter out tiny pages (< 15% of total width)
    pages = [p for p in pages if p["width"] >= w * 0.15]
    if len(pages) < 2:
        return []

    # Re-index
    for i, p in enumerate(pages):
        p["page_index"] = i

    logger.info(
        "Page split detected: %d pages, spine_w=%d, split_points=%s",
        len(pages), spine_w, split_points,
    )
    return pages


def detect_and_crop_page(
    img_bgr: np.ndarray,
    margin_frac: float = 0.01,
) -> Tuple[np.ndarray, Dict[str, Any]]:
    """Detect content boundary and crop scanner/book borders.

    Algorithm (4-edge detection):
    1. Adaptive threshold -> binary (text=255, bg=0)
    2. Left edge: spine-shadow detection via grayscale column means,
       fallback to binary vertical projection
    3. Right edge: binary vertical projection (last ink column)
    4. Top/bottom edges: binary horizontal projection
    5. Sanity checks, then crop with configurable margin

    Args:
        img_bgr: Input BGR image (should already be deskewed/dewarped)
        margin_frac: Extra margin around content (fraction of dimension, default 1%)

    Returns:
        Tuple of (cropped_image, result_dict)
    """
    h, w = img_bgr.shape[:2]
    total_area = h * w

    result: Dict[str, Any] = {
        "crop_applied": False,
        "crop_rect": None,
        "crop_rect_pct": None,
        "original_size": {"width": w, "height": h},
        "cropped_size": {"width": w, "height": h},
        "detected_format": None,
        "format_confidence": 0.0,
        "aspect_ratio": round(max(h, w) / max(min(h, w), 1), 4),
        "border_fractions": {"top": 0.0, "bottom": 0.0, "left": 0.0, "right": 0.0},
    }

    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)

    # --- Binarise with adaptive threshold ---
    binary = cv2.adaptiveThreshold(
        gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY_INV, blockSize=51, C=15,
    )

    # --- Edge detection ---
    left_edge = _detect_left_edge_shadow(gray, binary, w, h)
    right_edge = _detect_right_edge_shadow(gray, binary, w, h)
    top_edge, bottom_edge = _detect_top_bottom_edges(binary, w, h)

    # Compute border fractions
    border_top = top_edge / h
    border_bottom = (h - bottom_edge) / h
    border_left = left_edge / w
    border_right = (w - right_edge) / w

    result["border_fractions"] = {
        "top": round(border_top, 4),
        "bottom": round(border_bottom, 4),
        "left": round(border_left, 4),
        "right": round(border_right, 4),
    }

    # Sanity: only crop if at least one edge has > 2% border
    min_border = 0.02
    if all(f < min_border for f in [border_top, border_bottom, border_left, border_right]):
        logger.info("All borders < %.0f%% — no crop needed", min_border * 100)
        result["detected_format"], result["format_confidence"] = _detect_format(w, h)
        return img_bgr, result

    # Add margin
    margin_x = int(w * margin_frac)
    margin_y = int(h * margin_frac)

    crop_x = max(0, left_edge - margin_x)
    crop_y = max(0, top_edge - margin_y)
    crop_x2 = min(w, right_edge + margin_x)
    crop_y2 = min(h, bottom_edge + margin_y)

    crop_w = crop_x2 - crop_x
    crop_h = crop_y2 - crop_y

    # Sanity: cropped area must be >= 40% of original
    if crop_w * crop_h < 0.40 * total_area:
        logger.warning("Cropped area too small (%.0f%%) — skipping crop",
                       100.0 * crop_w * crop_h / total_area)
        result["detected_format"], result["format_confidence"] = _detect_format(w, h)
        return img_bgr, result

    cropped = img_bgr[crop_y:crop_y2, crop_x:crop_x2].copy()

    detected_format, format_confidence = _detect_format(crop_w, crop_h)

    result["crop_applied"] = True
    result["crop_rect"] = {"x": crop_x, "y": crop_y, "width": crop_w, "height": crop_h}
    result["crop_rect_pct"] = {
        "x": round(100.0 * crop_x / w, 2),
        "y": round(100.0 * crop_y / h, 2),
        "width": round(100.0 * crop_w / w, 2),
        "height": round(100.0 * crop_h / h, 2),
    }
    result["cropped_size"] = {"width": crop_w, "height": crop_h}
    result["detected_format"] = detected_format
    result["format_confidence"] = format_confidence
    result["aspect_ratio"] = round(max(crop_w, crop_h) / max(min(crop_w, crop_h), 1), 4)

    logger.info(
        "Page cropped: %dx%d -> %dx%d, format=%s (%.0f%%), "
        "borders: T=%.1f%% B=%.1f%% L=%.1f%% R=%.1f%%",
        w, h, crop_w, crop_h, detected_format, format_confidence * 100,
        border_top * 100, border_bottom * 100,
        border_left * 100, border_right * 100,
    )

    return cropped, result


# ---------------------------------------------------------------------------
# Format detection (kept as optional metadata)
# ---------------------------------------------------------------------------

def _detect_format(width: int, height: int) -> Tuple[str, float]:
    """Detect paper format from dimensions by comparing aspect ratios."""
    if width <= 0 or height <= 0:
        return "unknown", 0.0

    aspect = max(width, height) / min(width, height)

    best_format = "unknown"
    best_diff = float("inf")

    for fmt, expected_ratio in PAPER_FORMATS.items():
        diff = abs(aspect - expected_ratio)
        if diff < best_diff:
            best_diff = diff
            best_format = fmt

    confidence = max(0.0, 1.0 - best_diff * 5.0)

    if confidence < 0.3:
        return "unknown", 0.0

    return best_format, round(confidence, 3)