diff --git a/klausur-service/backend/grid_editor_api.py b/klausur-service/backend/grid_editor_api.py index 3139795..2c1a665 100644 --- a/klausur-service/backend/grid_editor_api.py +++ b/klausur-service/backend/grid_editor_api.py @@ -365,6 +365,29 @@ def _build_zone_grid( # Cluster rows first (needed for column alignment analysis) rows = _cluster_rows(zone_words) + + # Diagnostic logging for small zones (box zones typically) + if len(zone_words) <= 30: + import statistics as _st + _heights = [w['height'] for w in zone_words if w.get('height', 0) > 0] + _med_h = _st.median(_heights) if _heights else 20 + _y_tol = max(_med_h * 0.5, 5) + logger.info( + "zone %d row-clustering: %d words, median_h=%.0f, y_tol=%.1f → %d rows", + zone_index, len(zone_words), _med_h, _y_tol, len(rows), + ) + for w in sorted(zone_words, key=lambda ww: (ww['top'], ww['left'])): + logger.info( + " zone %d word: y=%d x=%d h=%d w=%d '%s'", + zone_index, w['top'], w['left'], w['height'], w['width'], + w.get('text', '')[:40], + ) + for r in rows: + logger.info( + " zone %d row %d: y_min=%d y_max=%d y_center=%.0f", + zone_index, r['index'], r['y_min'], r['y_max'], r['y_center'], + ) + # Use global columns if provided, otherwise detect per zone columns = global_columns if global_columns else _cluster_columns_by_alignment(zone_words, zone_w, rows)