diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py index 3a9dde9..2bd8bd1 100644 --- a/klausur-service/backend/cv_vocab_pipeline.py +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -2593,6 +2593,9 @@ def _detect_header_footer_gaps( large_gap_threshold = median_gap * GAP_MULTIPLIER # Step 6: Find largest qualifying gap in header / footer zones + # A separator gap must have content on BOTH sides — edge-touching gaps + # (e.g. dewarp padding at bottom) are not valid separators. + EDGE_MARGIN = max(5, actual_h // 400) header_zone_limit = int(actual_h * HEADER_FOOTER_ZONE) footer_zone_start = int(actual_h * (1.0 - HEADER_FOOTER_ZONE)) @@ -2601,6 +2604,8 @@ def _detect_header_footer_gaps( best_header_size = 0 for gs, ge in raw_gaps: + if gs <= EDGE_MARGIN: + continue # skip gaps touching the top edge gap_mid = (gs + ge) / 2 gap_size = ge - gs if gap_mid < header_zone_limit and gap_size > large_gap_threshold: @@ -2610,6 +2615,8 @@ def _detect_header_footer_gaps( best_footer_size = 0 for gs, ge in raw_gaps: + if ge >= actual_h - EDGE_MARGIN: + continue # skip gaps touching the bottom edge gap_mid = (gs + ge) / 2 gap_size = ge - gs if gap_mid > footer_zone_start and gap_size > large_gap_threshold: diff --git a/klausur-service/backend/tests/test_cv_vocab_pipeline.py b/klausur-service/backend/tests/test_cv_vocab_pipeline.py index 0ce25d7..4d764c7 100644 --- a/klausur-service/backend/tests/test_cv_vocab_pipeline.py +++ b/klausur-service/backend/tests/test_cv_vocab_pipeline.py @@ -1092,6 +1092,17 @@ class TestHeaderFooterGapDetection: assert header_y is None assert footer_y is None + def test_edge_gaps_ignored_dewarp_padding(self): + """Trailing gap at bottom edge (dewarp padding) should not be detected as footer.""" + h, w = 2000, 800 + # Body lines from 10 to 1700 + bands = self._make_body_with_lines(h, w, 10, 1700) + # Gap from 1700 to 2000 = bottom edge padding (no content after) + inv = self._make_inv(h, w, bands) + header_y, footer_y = _detect_header_footer_gaps(inv, w, h) + # The trailing gap touches the image edge → not a valid separator + assert footer_y is None + class TestRegionContentCheck: """Tests for _region_has_content() and _add_header_footer() type selection."""