diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py index 4069a1e..b1820bd 100644 --- a/klausur-service/backend/cv_vocab_pipeline.py +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -1010,8 +1010,11 @@ def detect_column_geometry(ocr_img: np.ndarray, dewarped_bgr: np.ndarray) -> Opt logger.info("ColumnGeometry: < 3 clusters after verticality filter, signaling fallback") return None - # --- Merge clusters that are very close (3% of content width) --- - merge_distance = max(20, int(content_w * 0.03)) + # --- Merge clusters that are very close --- + # 6% of content width: on a typical 5-col vocab page (~1500px wide), + # this is ~90px, which merges sub-alignments within a single column + # while keeping real column boundaries (~300px apart) separate. + merge_distance = max(30, int(content_w * 0.06)) merged = [significant[0].copy()] for s in significant[1:]: if s['mean_x'] - merged[-1]['mean_x'] < merge_distance: