From a666e883da524943897bc31ae823293ca2e2151c Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Sun, 1 Mar 2026 17:33:48 +0100 Subject: [PATCH] fix(ocr-pipeline): exclude header/footer/page_ref from cell grid columns Co-Authored-By: Claude Opus 4.6 --- klausur-service/backend/cv_vocab_pipeline.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py index b25b94a..ddb570f 100644 --- a/klausur-service/backend/cv_vocab_pipeline.py +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -3057,8 +3057,9 @@ def build_cell_grid( logger.warning("build_cell_grid: no content rows found") return [], [] - # Use all columns except column_ignore - relevant_cols = [c for c in column_regions if c.type != 'column_ignore'] + # Use columns only — skip ignore, header, footer, page_ref + _skip_types = {'column_ignore', 'header', 'footer', 'page_ref'} + relevant_cols = [c for c in column_regions if c.type not in _skip_types] if not relevant_cols: logger.warning("build_cell_grid: no usable columns found") return [], []