From cf995f2d526c0768fe0ee20163369a9dfee8f039 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Mon, 16 Mar 2026 22:04:17 +0100 Subject: [PATCH] fix: global column detection across content zones in Kombi grid builder Content zones (above/between/below boxes) now share the same column structure: columns are detected once from ALL content-zone words, then applied to each content zone. Box zones still detect columns independently. This fixes the issue where narrow columns (page refs like p.55) were not detected in small content zones above boxes, even though the same column existed in the larger content zone below the box. Co-Authored-By: Claude Opus 4.6 --- klausur-service/backend/grid_editor_api.py | 41 ++++++++++++++++++++-- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/klausur-service/backend/grid_editor_api.py b/klausur-service/backend/grid_editor_api.py index 6cbfe5f..697f7d4 100644 --- a/klausur-service/backend/grid_editor_api.py +++ b/klausur-service/backend/grid_editor_api.py @@ -309,8 +309,16 @@ def _build_zone_grid( zone_index: int, img_w: int, img_h: int, + global_columns: Optional[List[Dict]] = None, ) -> Dict[str, Any]: - """Build columns, rows, cells for a single zone from its words.""" + """Build columns, rows, cells for a single zone from its words. + + Args: + global_columns: If provided, use these pre-computed column boundaries + instead of detecting columns per zone. Used for content zones so + that all content zones (above/between/below boxes) share the same + column structure. Box zones always detect columns independently. + """ if not zone_words: return { "columns": [], @@ -321,8 +329,8 @@ def _build_zone_grid( # Cluster rows first (needed for column alignment analysis) rows = _cluster_rows(zone_words) - # Cluster columns by left-edge alignment - columns = _cluster_columns_by_alignment(zone_words, zone_w, rows) + # Use global columns if provided, otherwise detect per zone + columns = global_columns if global_columns else _cluster_columns_by_alignment(zone_words, zone_w, rows) if not columns or not rows: return { @@ -476,13 +484,40 @@ async def build_grid(session_id: str): content_x, content_y, content_w, content_h, boxes ) + # --- Global column detection across ALL content zones --- + # Content zones share the same table structure (the table + # spans the full page, boxes are overlaid on top). Detect + # columns once from all content-zone words so that narrow + # columns (page refs, markers) visible in only one zone + # are applied consistently everywhere. + all_content_words: List[Dict] = [] + for pz in page_zones: + if pz.zone_type == "content": + all_content_words.extend( + _words_in_zone(all_words, pz.y, pz.height, pz.x, pz.width) + ) + + global_columns = None + if all_content_words: + global_rows = _cluster_rows(all_content_words) + global_columns = _cluster_columns_by_alignment( + all_content_words, content_w, global_rows, + ) + logger.info( + "build-grid session %s: global columns from %d content words → %d columns", + session_id, len(all_content_words), len(global_columns), + ) + for pz in page_zones: zone_words = _words_in_zone( all_words, pz.y, pz.height, pz.x, pz.width ) + # Content zones use global columns; box zones detect independently + cols_override = global_columns if pz.zone_type == "content" else None grid = _build_zone_grid( zone_words, pz.x, pz.y, pz.width, pz.height, pz.index, img_w, img_h, + global_columns=cols_override, ) zone_entry: Dict[str, Any] = {