fix(ocr-pipeline): exclude header/footer/page_ref from cell grid columns

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-01 17:33:48 +01:00
parent 27b895a848
commit a666e883da

View File

@@ -3057,8 +3057,9 @@ def build_cell_grid(
logger.warning("build_cell_grid: no content rows found")
return [], []
# Use all columns except column_ignore
relevant_cols = [c for c in column_regions if c.type != 'column_ignore']
# Use columns only — skip ignore, header, footer, page_ref
_skip_types = {'column_ignore', 'header', 'footer', 'page_ref'}
relevant_cols = [c for c in column_regions if c.type not in _skip_types]
if not relevant_cols:
logger.warning("build_cell_grid: no usable columns found")
return [], []