diff --git a/klausur-service/backend/grid_editor_helpers.py b/klausur-service/backend/grid_editor_helpers.py index a025fac..8100dc2 100644 --- a/klausur-service/backend/grid_editor_helpers.py +++ b/klausur-service/backend/grid_editor_helpers.py @@ -375,13 +375,17 @@ def _cluster_columns_by_alignment( used_ids = {id(c) for c in primary} | {id(c) for c in secondary} sig_xs = [c["mean_x"] for c in primary + secondary] - MIN_DISTINCT_ROWS_TERTIARY = max(MIN_DISTINCT_ROWS + 1, 4) - MIN_COVERAGE_TERTIARY = 0.05 # at least 5% of rows + # Tertiary: clusters that are clearly to the LEFT of the first + # significant column (or RIGHT of the last). If words consistently + # start at a position left of the established first column boundary, + # they MUST be a separate column — regardless of how few rows they + # cover. The only requirement is a clear spatial gap. + MIN_COVERAGE_TERTIARY = 0.02 # at least 1 row effectively tertiary = [] for c in clusters: if id(c) in used_ids: continue - if c["distinct_rows"] < MIN_DISTINCT_ROWS_TERTIARY: + if c["distinct_rows"] < 1: continue if c["row_coverage"] < MIN_COVERAGE_TERTIARY: continue