fix(ocr-pipeline): exclude header/footer/page_ref from cell grid columns
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -3057,8 +3057,9 @@ def build_cell_grid(
|
|||||||
logger.warning("build_cell_grid: no content rows found")
|
logger.warning("build_cell_grid: no content rows found")
|
||||||
return [], []
|
return [], []
|
||||||
|
|
||||||
# Use all columns except column_ignore
|
# Use columns only — skip ignore, header, footer, page_ref
|
||||||
relevant_cols = [c for c in column_regions if c.type != 'column_ignore']
|
_skip_types = {'column_ignore', 'header', 'footer', 'page_ref'}
|
||||||
|
relevant_cols = [c for c in column_regions if c.type not in _skip_types]
|
||||||
if not relevant_cols:
|
if not relevant_cols:
|
||||||
logger.warning("build_cell_grid: no usable columns found")
|
logger.warning("build_cell_grid: no usable columns found")
|
||||||
return [], []
|
return [], []
|
||||||
|
|||||||
Reference in New Issue
Block a user