From bc5ab29c067eaadb0eb9818ed6339a3209199aab Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Fri, 20 Mar 2026 08:06:05 +0100 Subject: [PATCH] Fix false positive: exclude first/last rows from single-cell heading detection Page numbers like "two hundred and twelve" in the last row were falsely detected as headings. Now first and last non-header rows are excluded. Co-Authored-By: Claude Opus 4.6 --- klausur-service/backend/grid_editor_api.py | 10 ++++++++++ .../backend/tests/test_grid_editor_api.py | 19 +++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/klausur-service/backend/grid_editor_api.py b/klausur-service/backend/grid_editor_api.py index 35bc996..307ff69 100644 --- a/klausur-service/backend/grid_editor_api.py +++ b/klausur-service/backend/grid_editor_api.py @@ -706,11 +706,21 @@ def _detect_heading_rows_by_single_cell( if multi_col_rows < len(rows) * 0.4: continue + # Exclude first and last non-header rows — these are typically + # page numbers or footer text, not headings. + non_header_rows = [r for r in rows if not r.get("is_header")] + if len(non_header_rows) < 3: + continue + first_ri = non_header_rows[0]["index"] + last_ri = non_header_rows[-1]["index"] + heading_row_indices = [] for row in rows: if row.get("is_header"): continue ri = row["index"] + if ri == first_ri or ri == last_ri: + continue row_cells = [c for c in cells if c.get("row_index") == ri] content_cells = [ c for c in row_cells diff --git a/klausur-service/backend/tests/test_grid_editor_api.py b/klausur-service/backend/tests/test_grid_editor_api.py index b0b0563..5e5f99b 100644 --- a/klausur-service/backend/tests/test_grid_editor_api.py +++ b/klausur-service/backend/tests/test_grid_editor_api.py @@ -675,3 +675,22 @@ class TestDetectHeadingRowsBySingleCell: assert len(heading) == 1 assert heading[0]["col_type"] == "heading" assert heading[0]["col_index"] == 1 # Should start at column_2, not 0 + + def test_last_row_single_cell_not_heading(self): + """Single-cell in last row (e.g. page number '212') → NOT heading.""" + zone = self._make_vocab_zone() + # Make row 7 (the last) have only 1 cell in column_2 + zone["cells"] = [c for c in zone["cells"] if c["row_index"] != 7] + zone["cells"].append({ + "cell_id": "Z0_R07_C1", + "zone_index": 0, "row_index": 7, "col_index": 1, + "col_type": "column_2", "text": "two hundred and twelve", + "word_boxes": [self._make_word_box("two", 130, 310, 30, 20)], + }) + zones_data = [zone] + count = _detect_heading_rows_by_single_cell(zones_data, 800, 1000) + # Row 4 "Theme" = heading, but row 7 (last) should NOT be heading + assert count == 1 + heading_cells = [c for c in zone["cells"] + if c.get("col_type") == "heading"] + assert all(c["row_index"] != 7 for c in heading_cells)