fix: expand narrow columns into neighbor space, not just gaps
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 27s
CI / test-go-edu-search (push) Successful in 25s
CI / test-python-klausur (push) Failing after 1m48s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 18s

Sub-column splits create adjacent columns with 0px gap between them.
The previous expansion only worked with explicit gaps. Now it looks at
where the neighbor's actual words are and claims unused space up to
MIN_WORD_MARGIN (4px) from the nearest word, even if there's no gap
in the column boundaries.

Also added debug logging for expansion input.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-04 10:49:10 +01:00
parent e4aff2b27e
commit ae1f9f7494

View File

@@ -1912,12 +1912,16 @@ def expand_narrow_columns(
(which create the narrowest columns) have already happened.
"""
_NARROW_THRESHOLD_PCT = 10.0
_GAP_CLAIM_RATIO = 0.40
_MIN_WORD_MARGIN = 4
if len(geometries) < 2:
return geometries
logger.info("ExpandNarrowCols: input %d cols: %s",
len(geometries),
[(i, g.x, g.width, round(g.width / content_w * 100, 1))
for i, g in enumerate(geometries)])
for i, g in enumerate(geometries):
col_pct = g.width / content_w * 100 if content_w > 0 else 100
if col_pct >= _NARROW_THRESHOLD_PCT:
@@ -1929,37 +1933,37 @@ def expand_narrow_columns(
# --- try expanding to the LEFT ---
if i > 0:
left_nb = geometries[i - 1]
gap_left = g.x - (left_nb.x + left_nb.width)
if gap_left > _MIN_WORD_MARGIN * 2:
nb_words_right = [wd['left'] + wd.get('width', 0)
for wd in left_nb.words]
if nb_words_right:
safe_left_abs = left_x + max(nb_words_right) + _MIN_WORD_MARGIN
else:
safe_left_abs = left_nb.x + left_nb.width + _MIN_WORD_MARGIN
max_expand = int(gap_left * _GAP_CLAIM_RATIO)
new_x = max(safe_left_abs, g.x - max_expand)
if new_x < g.x:
delta = g.x - new_x
g.width += delta
g.x = new_x
expanded = True
# Gap can be 0 if sub-column split created adjacent columns.
# In that case, look at where the neighbor's rightmost words
# actually are — there may be unused space we can claim.
nb_words_right = [wd['left'] + wd.get('width', 0)
for wd in left_nb.words]
if nb_words_right:
rightmost_word_abs = left_x + max(nb_words_right)
safe_left_abs = rightmost_word_abs + _MIN_WORD_MARGIN
else:
# No words in neighbor → we can take up to neighbor's start
safe_left_abs = left_nb.x + _MIN_WORD_MARGIN
if safe_left_abs < g.x:
g.width += (g.x - safe_left_abs)
g.x = safe_left_abs
expanded = True
# --- try expanding to the RIGHT ---
if i + 1 < len(geometries):
right_nb = geometries[i + 1]
gap_right = right_nb.x - (g.x + g.width)
if gap_right > _MIN_WORD_MARGIN * 2:
nb_words_left = [wd['left'] for wd in right_nb.words]
if nb_words_left:
safe_right_abs = left_x + min(nb_words_left) - _MIN_WORD_MARGIN
else:
safe_right_abs = right_nb.x - _MIN_WORD_MARGIN
max_expand = int(gap_right * _GAP_CLAIM_RATIO)
new_right = min(safe_right_abs, g.x + g.width + max_expand)
if new_right > g.x + g.width:
g.width = new_right - g.x
expanded = True
nb_words_left = [wd['left'] for wd in right_nb.words]
if nb_words_left:
leftmost_word_abs = left_x + min(nb_words_left)
safe_right_abs = leftmost_word_abs - _MIN_WORD_MARGIN
else:
safe_right_abs = right_nb.x + right_nb.width - _MIN_WORD_MARGIN
cur_right = g.x + g.width
if safe_right_abs > cur_right:
g.width = safe_right_abs - g.x
expanded = True
if expanded:
col_left_rel = g.x - left_x