diff --git a/klausur-service/backend/grid_build_core.py b/klausur-service/backend/grid_build_core.py index d18e36b..5d47861 100644 --- a/klausur-service/backend/grid_build_core.py +++ b/klausur-service/backend/grid_build_core.py @@ -1638,6 +1638,89 @@ async def _build_grid_core( if wb_reordered: logger.info("Step 5j: re-ordered word_boxes in %d cells to reading order", wb_reordered) + # 5k. Enforce max_columns by merging narrowest columns + if max_columns and max_columns > 0: + for z in zones_data: + if z.get("zone_type") != "content": + continue + cols = z.get("columns", []) + cells = z.get("cells", []) + if len(cols) <= max_columns: + continue + + logger.info( + "max_columns=%d: zone %s has %d columns → merging", + max_columns, z.get("zone_index"), len(cols), + ) + + # Sort columns by width (ascending) — merge narrowest first + cols_by_width = sorted(cols, key=lambda c: (c.get("x_max_px", 0) - c.get("x_min_px", 0))) + + while len(cols) > max_columns: + # Find the narrowest column + narrowest = cols_by_width.pop(0) + ni = narrowest["index"] + + # Find its nearest neighbor (by x-position) + sorted_by_x = sorted(cols, key=lambda c: c.get("x_min_px", c.get("x_min_pct", 0))) + pos = next(i for i, c in enumerate(sorted_by_x) if c["index"] == ni) + # Merge into right neighbor if possible, else left + if pos + 1 < len(sorted_by_x): + merge_target = sorted_by_x[pos + 1] + elif pos > 0: + merge_target = sorted_by_x[pos - 1] + else: + break + + ti = merge_target["index"] + + # Expand target column bounds + merge_target["x_min_px"] = min( + merge_target.get("x_min_px", merge_target.get("x_min_pct", 0)), + narrowest.get("x_min_px", narrowest.get("x_min_pct", 0)), + ) + merge_target["x_max_px"] = max( + merge_target.get("x_max_px", merge_target.get("x_max_pct", 100)), + narrowest.get("x_max_px", narrowest.get("x_max_pct", 100)), + ) + if "x_min_pct" in merge_target and "x_min_pct" in narrowest: + merge_target["x_min_pct"] = min(merge_target["x_min_pct"], narrowest["x_min_pct"]) + merge_target["x_max_pct"] = max(merge_target["x_max_pct"], narrowest["x_max_pct"]) + + # Reassign cells from narrowest → target + for cell in cells: + if cell.get("col_index") == ni: + cell["col_index"] = ti + # Append text to existing cell in same row if it exists + existing = next( + (c for c in cells if c["col_index"] == ti + and c["row_index"] == cell["row_index"] + and c is not cell), + None, + ) + if existing: + existing["text"] = ( + (existing.get("text", "") + " " + cell.get("text", "")).strip() + ) + existing["word_boxes"] = existing.get("word_boxes", []) + cell.get("word_boxes", []) + cell["_merged"] = True + + # Remove merged cells and column + z["cells"] = [c for c in cells if not c.get("_merged")] + cells = z["cells"] + cols.remove(narrowest) + cols_by_width = [c for c in cols_by_width if c["index"] != ni] + + # Re-index columns 0..N-1 + for new_idx, col in enumerate(sorted(cols, key=lambda c: c.get("x_min_px", c.get("x_min_pct", 0)))): + old_idx = col["index"] + col["index"] = new_idx + for cell in cells: + if cell.get("col_index") == old_idx: + cell["col_index"] = new_idx + + logger.info("max_columns: zone %s now has %d columns", z.get("zone_index"), len(cols)) + duration = time.time() - t0 # 6. Build result