fix(ocr-pipeline): clamp gap detection to img_h to avoid dewarp padding

The inverted image can be taller than img_h after dewarp shear correction, causing footer_y to be detected outside the visible page. Now clamps the horizontal projection to actual_h = min(inv.shape[0], img_h). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-02 17:06:58 +01:00
parent c8981423d4
commit f1fcc67357
1 changed files with 10 additions and 7 deletions
@@ -2545,12 +2545,15 @@ def _detect_header_footer_gaps(
    HEADER_FOOTER_ZONE = 0.20
    GAP_MULTIPLIER = 2.0
-    # Step 1: Horizontal projection over full image width
+    # Step 1: Horizontal projection — clamp to img_h to avoid dewarp padding
-    h_proj = np.sum(inv, axis=1).astype(float)
+    actual_h = min(inv.shape[0], img_h)
-    h_proj_norm = h_proj / (img_w * 255) if img_w > 0 else h_proj
+    roi = inv[:actual_h, :]
    h_proj = np.sum(roi, axis=1).astype(float)
    proj_w = roi.shape[1]
    h_proj_norm = h_proj / (proj_w * 255) if proj_w > 0 else h_proj
    # Step 2: Smoothing
-    kernel_size = max(3, img_h // 200)
+    kernel_size = max(3, actual_h // 200)
    if kernel_size % 2 == 0:
        kernel_size += 1
    h_smooth = np.convolve(h_proj_norm, np.ones(kernel_size) / kernel_size, mode='same')
@@ -2561,7 +2564,7 @@ def _detect_header_footer_gaps(
    gap_threshold = max(median_density * 0.15, 0.003)
    in_gap = h_smooth < gap_threshold
-    MIN_GAP_HEIGHT = max(3, img_h // 500)
+    MIN_GAP_HEIGHT = max(3, actual_h // 500)
    # Step 4: Collect contiguous gaps
    raw_gaps: List[Tuple[int, int]] = []
@@ -2590,8 +2593,8 @@ def _detect_header_footer_gaps(
    large_gap_threshold = median_gap * GAP_MULTIPLIER
    # Step 6: Find largest qualifying gap in header / footer zones
-    header_zone_limit = int(img_h * HEADER_FOOTER_ZONE)
+    header_zone_limit = int(actual_h * HEADER_FOOTER_ZONE)
-    footer_zone_start = int(img_h * (1.0 - HEADER_FOOTER_ZONE))
+    footer_zone_start = int(actual_h * (1.0 - HEADER_FOOTER_ZONE))
    header_y: Optional[int] = None
    footer_y: Optional[int] = None