From 43dec5dd91aeeac4c3d25389da06269efdca5b90 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Tue, 17 Mar 2026 09:45:29 +0100 Subject: [PATCH] diag: add row-clustering logging for small/box zones Logs word positions, median height, Y tolerance, and resulting rows for zones with <= 30 words to diagnose row merging issues. Co-Authored-By: Claude Opus 4.6 --- klausur-service/backend/grid_editor_api.py | 23 ++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/klausur-service/backend/grid_editor_api.py b/klausur-service/backend/grid_editor_api.py index 3139795..2c1a665 100644 --- a/klausur-service/backend/grid_editor_api.py +++ b/klausur-service/backend/grid_editor_api.py @@ -365,6 +365,29 @@ def _build_zone_grid( # Cluster rows first (needed for column alignment analysis) rows = _cluster_rows(zone_words) + + # Diagnostic logging for small zones (box zones typically) + if len(zone_words) <= 30: + import statistics as _st + _heights = [w['height'] for w in zone_words if w.get('height', 0) > 0] + _med_h = _st.median(_heights) if _heights else 20 + _y_tol = max(_med_h * 0.5, 5) + logger.info( + "zone %d row-clustering: %d words, median_h=%.0f, y_tol=%.1f → %d rows", + zone_index, len(zone_words), _med_h, _y_tol, len(rows), + ) + for w in sorted(zone_words, key=lambda ww: (ww['top'], ww['left'])): + logger.info( + " zone %d word: y=%d x=%d h=%d w=%d '%s'", + zone_index, w['top'], w['left'], w['height'], w['width'], + w.get('text', '')[:40], + ) + for r in rows: + logger.info( + " zone %d row %d: y_min=%d y_max=%d y_center=%.0f", + zone_index, r['index'], r['y_min'], r['y_max'], r['y_center'], + ) + # Use global columns if provided, otherwise detect per zone columns = global_columns if global_columns else _cluster_columns_by_alignment(zone_words, zone_w, rows)