diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py index 52756aa..3a9dde9 100644 --- a/klausur-service/backend/cv_vocab_pipeline.py +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -2545,12 +2545,15 @@ def _detect_header_footer_gaps( HEADER_FOOTER_ZONE = 0.20 GAP_MULTIPLIER = 2.0 - # Step 1: Horizontal projection over full image width - h_proj = np.sum(inv, axis=1).astype(float) - h_proj_norm = h_proj / (img_w * 255) if img_w > 0 else h_proj + # Step 1: Horizontal projection — clamp to img_h to avoid dewarp padding + actual_h = min(inv.shape[0], img_h) + roi = inv[:actual_h, :] + h_proj = np.sum(roi, axis=1).astype(float) + proj_w = roi.shape[1] + h_proj_norm = h_proj / (proj_w * 255) if proj_w > 0 else h_proj # Step 2: Smoothing - kernel_size = max(3, img_h // 200) + kernel_size = max(3, actual_h // 200) if kernel_size % 2 == 0: kernel_size += 1 h_smooth = np.convolve(h_proj_norm, np.ones(kernel_size) / kernel_size, mode='same') @@ -2561,7 +2564,7 @@ def _detect_header_footer_gaps( gap_threshold = max(median_density * 0.15, 0.003) in_gap = h_smooth < gap_threshold - MIN_GAP_HEIGHT = max(3, img_h // 500) + MIN_GAP_HEIGHT = max(3, actual_h // 500) # Step 4: Collect contiguous gaps raw_gaps: List[Tuple[int, int]] = [] @@ -2590,8 +2593,8 @@ def _detect_header_footer_gaps( large_gap_threshold = median_gap * GAP_MULTIPLIER # Step 6: Find largest qualifying gap in header / footer zones - header_zone_limit = int(img_h * HEADER_FOOTER_ZONE) - footer_zone_start = int(img_h * (1.0 - HEADER_FOOTER_ZONE)) + header_zone_limit = int(actual_h * HEADER_FOOTER_ZONE) + footer_zone_start = int(actual_h * (1.0 - HEADER_FOOTER_ZONE)) header_y: Optional[int] = None footer_y: Optional[int] = None