fix(ocr-pipeline): exclude header/footer/page_ref from cell grid columns
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -3057,8 +3057,9 @@ def build_cell_grid(
|
||||
logger.warning("build_cell_grid: no content rows found")
|
||||
return [], []
|
||||
|
||||
# Use all columns except column_ignore
|
||||
relevant_cols = [c for c in column_regions if c.type != 'column_ignore']
|
||||
# Use columns only — skip ignore, header, footer, page_ref
|
||||
_skip_types = {'column_ignore', 'header', 'footer', 'page_ref'}
|
||||
relevant_cols = [c for c in column_regions if c.type not in _skip_types]
|
||||
if not relevant_cols:
|
||||
logger.warning("build_cell_grid: no usable columns found")
|
||||
return [], []
|
||||
|
||||
Reference in New Issue
Block a user