Fix box grid: row height fields + spanning cell detection

Box 3 empty rows: flowing/bullet_list rows were missing y_min_px/ y_max_px fields that GridTable uses for row height calculation. Added _px and _pct variants. Box 2 spanning cells: rows with fewer word-blocks than columns (e.g., "In Britain..." spanning 2 columns) are now detected and merged into spanning_header cells. GridTable already renders spanning_header cells across the full row width. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-13 09:46:43 +02:00
parent 5fa5767c9a
commit 5d91698c3b
1 changed files with 50 additions and 0 deletions
--- a/klausur-service/backend/cv_box_layout.py
+++ b/klausur-service/backend/cv_box_layout.py
@@ -211,6 +211,11 @@ def build_box_zone_grid(
                "y_min": y_min,
                "y_max": y_max,
                "y_center": y_center,
+                # GridTable expects _px and _pct variants
+                "y_min_px": y_min,
+                "y_max_px": y_max,
+                "y_min_pct": round(y_min / img_h * 100, 2) if img_h else 0,
+                "y_max_pct": round(y_max / img_h * 100, 2) if img_h else 0,
            }
            rows.append(row)

@@ -251,6 +256,51 @@ def build_box_zone_grid(
        zone_index, img_w, img_h,
        global_columns=None,  # detect columns independently
    )
+
+    # --- Detect spanning cells ---
+    # If a row has fewer word-blocks than columns, some cells span multiple
+    # columns.  Detect this and mark them as spanning_header so the frontend
+    # renders them correctly (single cell across the row).
+    columns = result.get("columns", [])
+    cells = result.get("cells", [])
+    rows = result.get("rows", [])
+
+    if len(columns) >= 2:
+        # Group original words by row
+        from grid_editor_helpers import _cluster_rows as _cr
+        row_data = _cr(zone_words)
+        row_word_map: Dict[int, List[Dict]] = {}
+        for w in zone_words:
+            yc = w["top"] + w["height"] / 2
+            best = min(row_data, key=lambda r: abs(r["y_center"] - yc))
+            row_word_map.setdefault(best["index"], []).append(w)
+
+        for row in rows:
+            ri = row.get("index", row.get("row_index", -1))
+            rw = row_word_map.get(ri, [])
+            row_cells = [c for c in cells if c.get("row_index") == ri]
+
+            # If this row has fewer word-blocks than columns, it's a spanning row
+            if 0 < len(rw) < len(columns):
+                # Merge all cell texts and mark as spanning
+                merged_text = " ".join(
+                    c.get("text", "") for c in sorted(row_cells, key=lambda c: c.get("col_index", 0))
+                ).strip()
+                # Remove existing cells for this row
+                cells = [c for c in cells if c.get("row_index") != ri]
+                # Add single spanning cell
+                cells.append({
+                    "cell_id": f"Z{zone_index}_R{ri:02d}_C0",
+                    "row_index": ri,
+                    "col_index": 0,
+                    "col_type": "spanning_header",
+                    "text": merged_text,
+                    "word_boxes": rw,
+                })
+                logger.info("Box zone %d row %d: merged %d cells into spanning cell", zone_index, ri, len(row_cells))
+
+        result["cells"] = cells
+
    result["box_layout_type"] = layout_type
    result["box_grid_reviewed"] = False
    return result