""" Page Crop - Automatic scanner border removal and page format detection. Detects the paper boundary in a scanned image and crops away scanner borders. Also identifies the paper format (A4, Letter, etc.) from the aspect ratio. License: Apache 2.0 """ import logging from typing import Dict, Any, Tuple import cv2 import numpy as np logger = logging.getLogger(__name__) # Known paper format aspect ratios (height / width, portrait orientation) PAPER_FORMATS = { "A4": 297.0 / 210.0, # 1.4143 "A5": 210.0 / 148.0, # 1.4189 "Letter": 11.0 / 8.5, # 1.2941 "Legal": 14.0 / 8.5, # 1.6471 "A3": 420.0 / 297.0, # 1.4141 } def detect_and_crop_page( img_bgr: np.ndarray, min_border_fraction: float = 0.01, ) -> Tuple[np.ndarray, Dict[str, Any]]: """Detect page boundary and crop scanner borders. Algorithm: 1. Grayscale + GaussianBlur to smooth out text 2. Otsu threshold (page=bright, scanner border=dark) 3. Morphological close to fill gaps 4. Find largest contour = page 5. If contour covers >95% of image area -> no crop needed 6. Get bounding rect, add safety margin 7. Match aspect ratio to known paper formats Args: img_bgr: Input BGR image min_border_fraction: Minimum border fraction to trigger crop (default 1%) Returns: Tuple of (cropped_image, result_dict) """ h, w = img_bgr.shape[:2] total_area = h * w result: Dict[str, Any] = { "crop_applied": False, "crop_rect": None, "crop_rect_pct": None, "original_size": {"width": w, "height": h}, "cropped_size": {"width": w, "height": h}, "detected_format": None, "format_confidence": 0.0, "aspect_ratio": round(max(h, w) / max(min(h, w), 1), 4), "border_fractions": {"top": 0.0, "bottom": 0.0, "left": 0.0, "right": 0.0}, } # 1. Grayscale + blur gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY) blurred = cv2.GaussianBlur(gray, (21, 21), 0) # 2. Otsu threshold _, binary = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) # 3. Morphological close to fill text gaps kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50, 50)) closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel) # 4. Find contours contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if not contours: logger.info("No contours found - returning original image") return img_bgr, result # Get the largest contour largest = max(contours, key=cv2.contourArea) contour_area = cv2.contourArea(largest) # 5. If contour covers >95% of image, no crop needed if contour_area > 0.95 * total_area: logger.info("Page covers >95%% of image - no crop needed") result["detected_format"], result["format_confidence"] = _detect_format(w, h) return img_bgr, result # 6. Get bounding rect rx, ry, rw, rh = cv2.boundingRect(largest) # Calculate border fractions border_top = ry / h border_bottom = (h - (ry + rh)) / h border_left = rx / w border_right = (w - (rx + rw)) / w result["border_fractions"] = { "top": round(border_top, 4), "bottom": round(border_bottom, 4), "left": round(border_left, 4), "right": round(border_right, 4), } # 7. Check if borders are significant enough to crop if all(f < min_border_fraction for f in [border_top, border_bottom, border_left, border_right]): logger.info("All borders < %.1f%% - no crop needed", min_border_fraction * 100) result["detected_format"], result["format_confidence"] = _detect_format(w, h) return img_bgr, result # 8. Add safety margin (0.5% of image dimensions) margin_x = int(w * 0.005) margin_y = int(h * 0.005) crop_x = max(0, rx - margin_x) crop_y = max(0, ry - margin_y) crop_x2 = min(w, rx + rw + margin_x) crop_y2 = min(h, ry + rh + margin_y) crop_w = crop_x2 - crop_x crop_h = crop_y2 - crop_y # Sanity check: cropped area should be at least 50% of original if crop_w * crop_h < 0.5 * total_area: logger.warning("Cropped area too small (%.0f%%) - skipping crop", 100.0 * crop_w * crop_h / total_area) result["detected_format"], result["format_confidence"] = _detect_format(w, h) return img_bgr, result # 9. Crop cropped = img_bgr[crop_y:crop_y2, crop_x:crop_x2].copy() # 10. Detect format from cropped dimensions detected_format, format_confidence = _detect_format(crop_w, crop_h) result["crop_applied"] = True result["crop_rect"] = {"x": crop_x, "y": crop_y, "width": crop_w, "height": crop_h} result["crop_rect_pct"] = { "x": round(100.0 * crop_x / w, 2), "y": round(100.0 * crop_y / h, 2), "width": round(100.0 * crop_w / w, 2), "height": round(100.0 * crop_h / h, 2), } result["cropped_size"] = {"width": crop_w, "height": crop_h} result["detected_format"] = detected_format result["format_confidence"] = format_confidence result["aspect_ratio"] = round(max(crop_w, crop_h) / max(min(crop_w, crop_h), 1), 4) logger.info("Page cropped: %dx%d -> %dx%d, format=%s (%.0f%%), borders: T=%.1f%% B=%.1f%% L=%.1f%% R=%.1f%%", w, h, crop_w, crop_h, detected_format, format_confidence * 100, border_top * 100, border_bottom * 100, border_left * 100, border_right * 100) return cropped, result def _detect_format(width: int, height: int) -> Tuple[str, float]: """Detect paper format from dimensions by comparing aspect ratios. Returns: (format_name, confidence) where confidence is 0.0-1.0 """ if width <= 0 or height <= 0: return "unknown", 0.0 # Use portrait aspect ratio (taller / shorter) aspect = max(width, height) / min(width, height) best_format = "unknown" best_diff = float("inf") for fmt, expected_ratio in PAPER_FORMATS.items(): diff = abs(aspect - expected_ratio) if diff < best_diff: best_diff = diff best_format = fmt # Confidence: 1.0 if exact match, decreasing with deviation # Threshold: if diff > 0.1, confidence drops below 0.5 confidence = max(0.0, 1.0 - best_diff * 5.0) if confidence < 0.3: return "unknown", 0.0 return best_format, round(confidence, 3)