From ae1f9f7494d8109acd1c8eccedadd7dad05a1b42 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Wed, 4 Mar 2026 10:49:10 +0100 Subject: [PATCH] fix: expand narrow columns into neighbor space, not just gaps Sub-column splits create adjacent columns with 0px gap between them. The previous expansion only worked with explicit gaps. Now it looks at where the neighbor's actual words are and claims unused space up to MIN_WORD_MARGIN (4px) from the nearest word, even if there's no gap in the column boundaries. Also added debug logging for expansion input. Co-Authored-By: Claude Opus 4.6 --- klausur-service/backend/cv_vocab_pipeline.py | 60 +++++++++++--------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py index 722cb67..33a5a3a 100644 --- a/klausur-service/backend/cv_vocab_pipeline.py +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -1912,12 +1912,16 @@ def expand_narrow_columns( (which create the narrowest columns) have already happened. """ _NARROW_THRESHOLD_PCT = 10.0 - _GAP_CLAIM_RATIO = 0.40 _MIN_WORD_MARGIN = 4 if len(geometries) < 2: return geometries + logger.info("ExpandNarrowCols: input %d cols: %s", + len(geometries), + [(i, g.x, g.width, round(g.width / content_w * 100, 1)) + for i, g in enumerate(geometries)]) + for i, g in enumerate(geometries): col_pct = g.width / content_w * 100 if content_w > 0 else 100 if col_pct >= _NARROW_THRESHOLD_PCT: @@ -1929,37 +1933,37 @@ def expand_narrow_columns( # --- try expanding to the LEFT --- if i > 0: left_nb = geometries[i - 1] - gap_left = g.x - (left_nb.x + left_nb.width) - if gap_left > _MIN_WORD_MARGIN * 2: - nb_words_right = [wd['left'] + wd.get('width', 0) - for wd in left_nb.words] - if nb_words_right: - safe_left_abs = left_x + max(nb_words_right) + _MIN_WORD_MARGIN - else: - safe_left_abs = left_nb.x + left_nb.width + _MIN_WORD_MARGIN - max_expand = int(gap_left * _GAP_CLAIM_RATIO) - new_x = max(safe_left_abs, g.x - max_expand) - if new_x < g.x: - delta = g.x - new_x - g.width += delta - g.x = new_x - expanded = True + # Gap can be 0 if sub-column split created adjacent columns. + # In that case, look at where the neighbor's rightmost words + # actually are — there may be unused space we can claim. + nb_words_right = [wd['left'] + wd.get('width', 0) + for wd in left_nb.words] + if nb_words_right: + rightmost_word_abs = left_x + max(nb_words_right) + safe_left_abs = rightmost_word_abs + _MIN_WORD_MARGIN + else: + # No words in neighbor → we can take up to neighbor's start + safe_left_abs = left_nb.x + _MIN_WORD_MARGIN + + if safe_left_abs < g.x: + g.width += (g.x - safe_left_abs) + g.x = safe_left_abs + expanded = True # --- try expanding to the RIGHT --- if i + 1 < len(geometries): right_nb = geometries[i + 1] - gap_right = right_nb.x - (g.x + g.width) - if gap_right > _MIN_WORD_MARGIN * 2: - nb_words_left = [wd['left'] for wd in right_nb.words] - if nb_words_left: - safe_right_abs = left_x + min(nb_words_left) - _MIN_WORD_MARGIN - else: - safe_right_abs = right_nb.x - _MIN_WORD_MARGIN - max_expand = int(gap_right * _GAP_CLAIM_RATIO) - new_right = min(safe_right_abs, g.x + g.width + max_expand) - if new_right > g.x + g.width: - g.width = new_right - g.x - expanded = True + nb_words_left = [wd['left'] for wd in right_nb.words] + if nb_words_left: + leftmost_word_abs = left_x + min(nb_words_left) + safe_right_abs = leftmost_word_abs - _MIN_WORD_MARGIN + else: + safe_right_abs = right_nb.x + right_nb.width - _MIN_WORD_MARGIN + + cur_right = g.x + g.width + if safe_right_abs > cur_right: + g.width = safe_right_abs - g.x + expanded = True if expanded: col_left_rel = g.x - left_x