Fix colspan text + box row fields for GridTable compatibility

Colspan: use original word-block text instead of split cell texts. Prevents "euros a nd cents" from split_cross_column_words. Box rows: add is_header field (was undefined, causing GridTable rendering issues). Add y_min_px/y_max_px to header_only rows. These missing fields caused empty rows with only row numbers visible. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-13 12:08:49 +02:00
parent dc25f243a4
commit 868f99f109
2 changed files with 12 additions and 18 deletions
--- a/klausur-service/backend/cv_box_layout.py
+++ b/klausur-service/backend/cv_box_layout.py
@@ -177,7 +177,12 @@ def build_box_zone_grid(
        ).strip()
        return {
            "columns": [{"col_index": 0, "index": 0, "label": "column_text", "col_type": "column_1"}],
-            "rows": [{"index": 0, "row_index": 0, "y_min": box_y, "y_max": box_y + box_h, "y_center": box_y + box_h / 2}],
+            "rows": [{"index": 0, "row_index": 0,
+                       "y_min": box_y, "y_max": box_y + box_h, "y_center": box_y + box_h / 2,
+                       "y_min_px": box_y, "y_max_px": box_y + box_h,
+                       "y_min_pct": round(box_y / img_h * 100, 2) if img_h else 0,
+                       "y_max_pct": round((box_y + box_h) / img_h * 100, 2) if img_h else 0,
+                       "is_header": True}],
            "cells": [{
                "cell_id": f"Z{zone_index}_R0C0",
                "row_index": 0,
@@ -211,11 +216,11 @@ def build_box_zone_grid(
                "y_min": y_min,
                "y_max": y_max,
                "y_center": y_center,
-                # GridTable expects _px and _pct variants
                "y_min_px": y_min,
                "y_max_px": y_max,
                "y_min_pct": round(y_min / img_h * 100, 2) if img_h else 0,
                "y_max_pct": round(y_max / img_h * 100, 2) if img_h else 0,
+                "is_header": False,
            }
            rows.append(row)

--- a/klausur-service/backend/grid_editor_helpers.py
+++ b/klausur-service/backend/grid_editor_helpers.py
@@ -1296,22 +1296,11 @@ def _detect_colspan_cells(
                is_part_of_span = True
                # Only emit the merged cell for the FIRST column in the span
                if ci == span["cols"][0]:
-                    # Collect all cells in this span
-                    span_cells = [c for c in cells
-                                  if c.get("row_index") == ri
-                                  and c.get("col_index") in span["cols"]]
-                    # Merge texts (skip if same text repeated)
-                    texts = []
-                    for sc in sorted(span_cells, key=lambda c: c.get("col_index", 0)):
-                        t = sc.get("text", "").strip()
-                        if t and t not in texts:
-                            texts.append(t)
-                    merged_text = " ".join(texts)
-
-                    # Collect all word_boxes
-                    all_wb = []
-                    for sc in span_cells:
-                        all_wb.extend(sc.get("word_boxes", []))
+                    # Use the ORIGINAL word-block text (not the split cell texts
+                    # which may have broken words like "euros a" + "nd cents")
+                    orig_word = span["word"]
+                    merged_text = orig_word.get("text", "").strip()
+                    all_wb = [orig_word]

                    # Compute merged bbox
                    if all_wb: