diff --git a/klausur-service/backend/grid_editor_api.py b/klausur-service/backend/grid_editor_api.py index 6cbfe5f..697f7d4 100644 --- a/klausur-service/backend/grid_editor_api.py +++ b/klausur-service/backend/grid_editor_api.py @@ -309,8 +309,16 @@ def _build_zone_grid( zone_index: int, img_w: int, img_h: int, + global_columns: Optional[List[Dict]] = None, ) -> Dict[str, Any]: - """Build columns, rows, cells for a single zone from its words.""" + """Build columns, rows, cells for a single zone from its words. + + Args: + global_columns: If provided, use these pre-computed column boundaries + instead of detecting columns per zone. Used for content zones so + that all content zones (above/between/below boxes) share the same + column structure. Box zones always detect columns independently. + """ if not zone_words: return { "columns": [], @@ -321,8 +329,8 @@ def _build_zone_grid( # Cluster rows first (needed for column alignment analysis) rows = _cluster_rows(zone_words) - # Cluster columns by left-edge alignment - columns = _cluster_columns_by_alignment(zone_words, zone_w, rows) + # Use global columns if provided, otherwise detect per zone + columns = global_columns if global_columns else _cluster_columns_by_alignment(zone_words, zone_w, rows) if not columns or not rows: return { @@ -476,13 +484,40 @@ async def build_grid(session_id: str): content_x, content_y, content_w, content_h, boxes ) + # --- Global column detection across ALL content zones --- + # Content zones share the same table structure (the table + # spans the full page, boxes are overlaid on top). Detect + # columns once from all content-zone words so that narrow + # columns (page refs, markers) visible in only one zone + # are applied consistently everywhere. + all_content_words: List[Dict] = [] + for pz in page_zones: + if pz.zone_type == "content": + all_content_words.extend( + _words_in_zone(all_words, pz.y, pz.height, pz.x, pz.width) + ) + + global_columns = None + if all_content_words: + global_rows = _cluster_rows(all_content_words) + global_columns = _cluster_columns_by_alignment( + all_content_words, content_w, global_rows, + ) + logger.info( + "build-grid session %s: global columns from %d content words → %d columns", + session_id, len(all_content_words), len(global_columns), + ) + for pz in page_zones: zone_words = _words_in_zone( all_words, pz.y, pz.height, pz.x, pz.width ) + # Content zones use global columns; box zones detect independently + cols_override = global_columns if pz.zone_type == "content" else None grid = _build_zone_grid( zone_words, pz.x, pz.y, pz.width, pz.height, pz.index, img_w, img_h, + global_columns=cols_override, ) zone_entry: Dict[str, Any] = {