Generic colspan detection for merged cells in grids and boxes

New _detect_colspan_cells() in grid_editor_helpers.py: - Runs after _build_cells() for every zone (content + box) - Detects word-blocks that extend across column boundaries - Merges affected cells into spanning_header with colspan=N - Uses column midpoints to determine which columns are covered - Works for full-page scans and box zones equally Also fixes box flowing/bullet_list row height fields (y_min_px/y_max_px). Removed duplicate spanning logic from cv_box_layout.py — now uses the generic _detect_colspan_cells from grid_editor_helpers. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-13 11:38:03 +02:00
parent 5d91698c3b
commit c62ff7cd31
2 changed files with 144 additions and 43 deletions
--- a/klausur-service/backend/cv_box_layout.py
+++ b/klausur-service/backend/cv_box_layout.py
@@ -257,49 +257,8 @@ def build_box_zone_grid(
        global_columns=None,  # detect columns independently
    )

-    # --- Detect spanning cells ---
-    # If a row has fewer word-blocks than columns, some cells span multiple
-    # columns.  Detect this and mark them as spanning_header so the frontend
-    # renders them correctly (single cell across the row).
-    columns = result.get("columns", [])
-    cells = result.get("cells", [])
-    rows = result.get("rows", [])
-
-    if len(columns) >= 2:
-        # Group original words by row
-        from grid_editor_helpers import _cluster_rows as _cr
-        row_data = _cr(zone_words)
-        row_word_map: Dict[int, List[Dict]] = {}
-        for w in zone_words:
-            yc = w["top"] + w["height"] / 2
-            best = min(row_data, key=lambda r: abs(r["y_center"] - yc))
-            row_word_map.setdefault(best["index"], []).append(w)
-
-        for row in rows:
-            ri = row.get("index", row.get("row_index", -1))
-            rw = row_word_map.get(ri, [])
-            row_cells = [c for c in cells if c.get("row_index") == ri]
-
-            # If this row has fewer word-blocks than columns, it's a spanning row
-            if 0 < len(rw) < len(columns):
-                # Merge all cell texts and mark as spanning
-                merged_text = " ".join(
-                    c.get("text", "") for c in sorted(row_cells, key=lambda c: c.get("col_index", 0))
-                ).strip()
-                # Remove existing cells for this row
-                cells = [c for c in cells if c.get("row_index") != ri]
-                # Add single spanning cell
-                cells.append({
-                    "cell_id": f"Z{zone_index}_R{ri:02d}_C0",
-                    "row_index": ri,
-                    "col_index": 0,
-                    "col_type": "spanning_header",
-                    "text": merged_text,
-                    "word_boxes": rw,
-                })
-                logger.info("Box zone %d row %d: merged %d cells into spanning cell", zone_index, ri, len(row_cells))
-
-        result["cells"] = cells
+    # Colspan detection is now handled generically by _detect_colspan_cells
+    # in grid_editor_helpers.py (called inside _build_zone_grid).

    result["box_layout_type"] = layout_type
    result["box_grid_reviewed"] = False