diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py index 1c4961d..c2a01d1 100644 --- a/klausur-service/backend/cv_vocab_pipeline.py +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -2097,6 +2097,14 @@ def _split_broad_columns( logger.info(f"SplitBroadCols: input {len(geometries)} cols: " f"{[(g.index, g.x, g.width, g.word_count, round(g.width_ratio, 3)) for g in geometries]}") + # Count how many broad content columns exist. If there are already 2+, + # the layout is likely already correctly split into EN / DE — skip. + broad_count = sum(1 for g in geometries + if g.width_ratio > _broad_threshold and len(g.words) >= 10) + if broad_count >= 2: + logger.info(f"SplitBroadCols: {broad_count} broad cols already → skip") + return geometries + for geo in geometries: if geo.width_ratio <= _broad_threshold or len(geo.words) < 10: result.append(geo)