From d6a8c1d8214b897a9f6973f07c762f5032540aea Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Tue, 3 Mar 2026 07:48:07 +0100 Subject: [PATCH] fix(streaming): include page_ref columns in SSE metadata The streaming word endpoint excluded page_ref from _skip_types, causing sub-column splits to be lost in the meta event and final grid_shape. Aligned _skip_types with build_cell_grid_streaming(). Co-Authored-By: Claude Sonnet 4.6 --- klausur-service/backend/ocr_pipeline_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/klausur-service/backend/ocr_pipeline_api.py b/klausur-service/backend/ocr_pipeline_api.py index ba8dcb8..2d92727 100644 --- a/klausur-service/backend/ocr_pipeline_api.py +++ b/klausur-service/backend/ocr_pipeline_api.py @@ -1237,7 +1237,7 @@ async def _word_stream_generator( # Compute grid shape upfront for the meta event n_content_rows = len([r for r in row_geoms if r.row_type == 'content']) - _skip_types = {'column_ignore', 'header', 'footer', 'margin_top', 'margin_bottom', 'page_ref'} + _skip_types = {'column_ignore', 'header', 'footer', 'margin_top', 'margin_bottom', 'margin_left', 'margin_right'} n_cols = len([c for c in col_regions if c.type not in _skip_types]) # Determine layout