fix: expand narrow columns into neighbor space, not just gaps
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 27s
CI / test-go-edu-search (push) Successful in 25s
CI / test-python-klausur (push) Failing after 1m48s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 18s

Sub-column splits create adjacent columns with 0px gap between them.
The previous expansion only worked with explicit gaps. Now it looks at
where the neighbor's actual words are and claims unused space up to
MIN_WORD_MARGIN (4px) from the nearest word, even if there's no gap
in the column boundaries.

Also added debug logging for expansion input.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-04 10:49:10 +01:00
parent e4aff2b27e
commit ae1f9f7494

View File

@@ -1912,12 +1912,16 @@ def expand_narrow_columns(
(which create the narrowest columns) have already happened. (which create the narrowest columns) have already happened.
""" """
_NARROW_THRESHOLD_PCT = 10.0 _NARROW_THRESHOLD_PCT = 10.0
_GAP_CLAIM_RATIO = 0.40
_MIN_WORD_MARGIN = 4 _MIN_WORD_MARGIN = 4
if len(geometries) < 2: if len(geometries) < 2:
return geometries return geometries
logger.info("ExpandNarrowCols: input %d cols: %s",
len(geometries),
[(i, g.x, g.width, round(g.width / content_w * 100, 1))
for i, g in enumerate(geometries)])
for i, g in enumerate(geometries): for i, g in enumerate(geometries):
col_pct = g.width / content_w * 100 if content_w > 0 else 100 col_pct = g.width / content_w * 100 if content_w > 0 else 100
if col_pct >= _NARROW_THRESHOLD_PCT: if col_pct >= _NARROW_THRESHOLD_PCT:
@@ -1929,37 +1933,37 @@ def expand_narrow_columns(
# --- try expanding to the LEFT --- # --- try expanding to the LEFT ---
if i > 0: if i > 0:
left_nb = geometries[i - 1] left_nb = geometries[i - 1]
gap_left = g.x - (left_nb.x + left_nb.width) # Gap can be 0 if sub-column split created adjacent columns.
if gap_left > _MIN_WORD_MARGIN * 2: # In that case, look at where the neighbor's rightmost words
nb_words_right = [wd['left'] + wd.get('width', 0) # actually are — there may be unused space we can claim.
for wd in left_nb.words] nb_words_right = [wd['left'] + wd.get('width', 0)
if nb_words_right: for wd in left_nb.words]
safe_left_abs = left_x + max(nb_words_right) + _MIN_WORD_MARGIN if nb_words_right:
else: rightmost_word_abs = left_x + max(nb_words_right)
safe_left_abs = left_nb.x + left_nb.width + _MIN_WORD_MARGIN safe_left_abs = rightmost_word_abs + _MIN_WORD_MARGIN
max_expand = int(gap_left * _GAP_CLAIM_RATIO) else:
new_x = max(safe_left_abs, g.x - max_expand) # No words in neighbor → we can take up to neighbor's start
if new_x < g.x: safe_left_abs = left_nb.x + _MIN_WORD_MARGIN
delta = g.x - new_x
g.width += delta if safe_left_abs < g.x:
g.x = new_x g.width += (g.x - safe_left_abs)
expanded = True g.x = safe_left_abs
expanded = True
# --- try expanding to the RIGHT --- # --- try expanding to the RIGHT ---
if i + 1 < len(geometries): if i + 1 < len(geometries):
right_nb = geometries[i + 1] right_nb = geometries[i + 1]
gap_right = right_nb.x - (g.x + g.width) nb_words_left = [wd['left'] for wd in right_nb.words]
if gap_right > _MIN_WORD_MARGIN * 2: if nb_words_left:
nb_words_left = [wd['left'] for wd in right_nb.words] leftmost_word_abs = left_x + min(nb_words_left)
if nb_words_left: safe_right_abs = leftmost_word_abs - _MIN_WORD_MARGIN
safe_right_abs = left_x + min(nb_words_left) - _MIN_WORD_MARGIN else:
else: safe_right_abs = right_nb.x + right_nb.width - _MIN_WORD_MARGIN
safe_right_abs = right_nb.x - _MIN_WORD_MARGIN
max_expand = int(gap_right * _GAP_CLAIM_RATIO) cur_right = g.x + g.width
new_right = min(safe_right_abs, g.x + g.width + max_expand) if safe_right_abs > cur_right:
if new_right > g.x + g.width: g.width = safe_right_abs - g.x
g.width = new_right - g.x expanded = True
expanded = True
if expanded: if expanded:
col_left_rel = g.x - left_x col_left_rel = g.x - left_x