Fix hyphen-join: preserve next row + skip valid hyphenations
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 41s
CI / test-go-edu-search (push) Successful in 40s
CI / test-python-klausur (push) Failing after 2m26s
CI / test-python-agent-core (push) Successful in 27s
CI / test-nodejs-website (push) Successful in 31s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 41s
CI / test-go-edu-search (push) Successful in 40s
CI / test-python-klausur (push) Failing after 2m26s
CI / test-python-agent-core (push) Successful in 27s
CI / test-nodejs-website (push) Successful in 31s
Two bugs fixed:
- Apply no longer removes the continuation word from the next row.
"künden" stays in row 31 — only the current row is repaired
("ve" → "ver-"). The original line-break layout is preserved.
- Analysis now skips words that already end with "-" when the direct
join with the next row is a known word (valid hyphenation, not an error).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -400,8 +400,17 @@ def analyse_grid_for_gutter_repair(
|
||||
next_words = next_text.split()
|
||||
if next_words:
|
||||
first_next = next_words[0]
|
||||
first_next_clean = _TRAILING_PUNCT_RE.sub('', first_next)
|
||||
first_alpha = next((c for c in first_next if c.isalpha()), "")
|
||||
|
||||
# If the word already ends with "-" and the direct join
|
||||
# (no missing chars) is a known word, this is a VALID
|
||||
# hyphenation — not a gutter error. Skip it.
|
||||
if ends_with_hyphen and first_next_clean:
|
||||
direct = last_word_clean.rstrip("-") + first_next_clean
|
||||
if _is_known(direct):
|
||||
continue
|
||||
|
||||
# Continuation likely if:
|
||||
# - explicit hyphen, OR
|
||||
# - next row starts lowercase (= not a new entry)
|
||||
@@ -557,13 +566,16 @@ def apply_gutter_suggestions(
|
||||
# The first display part is what goes in the current row
|
||||
first_part = display_parts[0] if display_parts else ""
|
||||
|
||||
# Replace the last word in current cell
|
||||
# Replace the last word in current cell with the restored form.
|
||||
# The next row is NOT modified — "künden" stays in its row
|
||||
# because the original book layout has it there. We only fix
|
||||
# the truncated word in the current row (e.g. "ve" → "ver-").
|
||||
idx = old_text.rfind(original_word)
|
||||
if idx >= 0:
|
||||
new_text = old_text[:idx] + first_part + old_text[idx + len(original_word):]
|
||||
target_cell["text"] = new_text
|
||||
changes.append({
|
||||
"type": "hyphen_join_current",
|
||||
"type": "hyphen_join",
|
||||
"zone_index": zi,
|
||||
"row_index": ri,
|
||||
"col_index": ci,
|
||||
@@ -573,30 +585,6 @@ def apply_gutter_suggestions(
|
||||
"joined_word": joined,
|
||||
})
|
||||
|
||||
# Next row: remove the first word (it's now joined into current row)
|
||||
if next_ri >= 0:
|
||||
next_cell = None
|
||||
for cell in zone_cells:
|
||||
if cell.get("row_index") == next_ri and cell.get("col_index") == ci:
|
||||
next_cell = cell
|
||||
break
|
||||
|
||||
if next_cell:
|
||||
next_old = next_cell.get("text", "")
|
||||
next_words = next_old.split()
|
||||
if next_words:
|
||||
next_new = " ".join(next_words[1:])
|
||||
next_cell["text"] = next_new
|
||||
changes.append({
|
||||
"type": "hyphen_join_next",
|
||||
"zone_index": zi,
|
||||
"row_index": next_ri,
|
||||
"col_index": ci,
|
||||
"cell_id": next_cell.get("cell_id", ""),
|
||||
"old_text": next_old,
|
||||
"new_text": next_new,
|
||||
})
|
||||
|
||||
logger.info("Gutter repair applied: %d/%d suggestions", len(changes), len(accepted_suggestions))
|
||||
|
||||
return {
|
||||
|
||||
@@ -310,8 +310,8 @@ class TestApplySuggestions:
|
||||
assert result["applied_count"] == 1
|
||||
# Current row: "ve" replaced with "ver-"
|
||||
assert grid["zones"][0]["cells"][0]["text"] == "ver-"
|
||||
# Next row: "künden" removed, "und" remains
|
||||
assert grid["zones"][0]["cells"][1]["text"] == "und"
|
||||
# Next row: UNCHANGED — "künden" stays in its original row
|
||||
assert grid["zones"][0]["cells"][1]["text"] == "künden und"
|
||||
|
||||
def test_apply_nothing_when_no_accepted(self):
|
||||
grid = _make_grid([])
|
||||
|
||||
Reference in New Issue
Block a user