Fix box grid: row height fields + spanning cell detection
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 46s
CI / test-go-edu-search (push) Successful in 43s
CI / test-python-klausur (push) Failing after 2m36s
CI / test-python-agent-core (push) Successful in 33s
CI / test-nodejs-website (push) Successful in 37s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 46s
CI / test-go-edu-search (push) Successful in 43s
CI / test-python-klausur (push) Failing after 2m36s
CI / test-python-agent-core (push) Successful in 33s
CI / test-nodejs-website (push) Successful in 37s
Box 3 empty rows: flowing/bullet_list rows were missing y_min_px/ y_max_px fields that GridTable uses for row height calculation. Added _px and _pct variants. Box 2 spanning cells: rows with fewer word-blocks than columns (e.g., "In Britain..." spanning 2 columns) are now detected and merged into spanning_header cells. GridTable already renders spanning_header cells across the full row width. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -211,6 +211,11 @@ def build_box_zone_grid(
|
|||||||
"y_min": y_min,
|
"y_min": y_min,
|
||||||
"y_max": y_max,
|
"y_max": y_max,
|
||||||
"y_center": y_center,
|
"y_center": y_center,
|
||||||
|
# GridTable expects _px and _pct variants
|
||||||
|
"y_min_px": y_min,
|
||||||
|
"y_max_px": y_max,
|
||||||
|
"y_min_pct": round(y_min / img_h * 100, 2) if img_h else 0,
|
||||||
|
"y_max_pct": round(y_max / img_h * 100, 2) if img_h else 0,
|
||||||
}
|
}
|
||||||
rows.append(row)
|
rows.append(row)
|
||||||
|
|
||||||
@@ -251,6 +256,51 @@ def build_box_zone_grid(
|
|||||||
zone_index, img_w, img_h,
|
zone_index, img_w, img_h,
|
||||||
global_columns=None, # detect columns independently
|
global_columns=None, # detect columns independently
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# --- Detect spanning cells ---
|
||||||
|
# If a row has fewer word-blocks than columns, some cells span multiple
|
||||||
|
# columns. Detect this and mark them as spanning_header so the frontend
|
||||||
|
# renders them correctly (single cell across the row).
|
||||||
|
columns = result.get("columns", [])
|
||||||
|
cells = result.get("cells", [])
|
||||||
|
rows = result.get("rows", [])
|
||||||
|
|
||||||
|
if len(columns) >= 2:
|
||||||
|
# Group original words by row
|
||||||
|
from grid_editor_helpers import _cluster_rows as _cr
|
||||||
|
row_data = _cr(zone_words)
|
||||||
|
row_word_map: Dict[int, List[Dict]] = {}
|
||||||
|
for w in zone_words:
|
||||||
|
yc = w["top"] + w["height"] / 2
|
||||||
|
best = min(row_data, key=lambda r: abs(r["y_center"] - yc))
|
||||||
|
row_word_map.setdefault(best["index"], []).append(w)
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
ri = row.get("index", row.get("row_index", -1))
|
||||||
|
rw = row_word_map.get(ri, [])
|
||||||
|
row_cells = [c for c in cells if c.get("row_index") == ri]
|
||||||
|
|
||||||
|
# If this row has fewer word-blocks than columns, it's a spanning row
|
||||||
|
if 0 < len(rw) < len(columns):
|
||||||
|
# Merge all cell texts and mark as spanning
|
||||||
|
merged_text = " ".join(
|
||||||
|
c.get("text", "") for c in sorted(row_cells, key=lambda c: c.get("col_index", 0))
|
||||||
|
).strip()
|
||||||
|
# Remove existing cells for this row
|
||||||
|
cells = [c for c in cells if c.get("row_index") != ri]
|
||||||
|
# Add single spanning cell
|
||||||
|
cells.append({
|
||||||
|
"cell_id": f"Z{zone_index}_R{ri:02d}_C0",
|
||||||
|
"row_index": ri,
|
||||||
|
"col_index": 0,
|
||||||
|
"col_type": "spanning_header",
|
||||||
|
"text": merged_text,
|
||||||
|
"word_boxes": rw,
|
||||||
|
})
|
||||||
|
logger.info("Box zone %d row %d: merged %d cells into spanning cell", zone_index, ri, len(row_cells))
|
||||||
|
|
||||||
|
result["cells"] = cells
|
||||||
|
|
||||||
result["box_layout_type"] = layout_type
|
result["box_layout_type"] = layout_type
|
||||||
result["box_grid_reviewed"] = False
|
result["box_grid_reviewed"] = False
|
||||||
return result
|
return result
|
||||||
|
|||||||
Reference in New Issue
Block a user