Fix hyphen-join: strip trailing punctuation from continuation word
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 50s
CI / test-go-edu-search (push) Successful in 47s
CI / test-python-klausur (push) Failing after 2m35s
CI / test-python-agent-core (push) Successful in 31s
CI / test-nodejs-website (push) Successful in 34s

The next-row word "künden," had a trailing comma, causing dictionary
lookup to fail for "verkünden,". Now strips .,;:!? before joining.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-10 19:25:28 +02:00
parent d1e7dd1c4a
commit aabd849e35
2 changed files with 24 additions and 1 deletions

View File

@@ -161,6 +161,9 @@ class GutterSuggestion:
# Core repair logic # Core repair logic
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
_TRAILING_PUNCT_RE = re.compile(r'[.,;:!?\)\]]+$')
def _try_hyphen_join( def _try_hyphen_join(
word_text: str, word_text: str,
next_word_text: str, next_word_text: str,
@@ -168,10 +171,15 @@ def _try_hyphen_join(
) -> Optional[Tuple[str, str, float]]: ) -> Optional[Tuple[str, str, float]]:
"""Try joining two fragments with 0..max_missing interpolated chars. """Try joining two fragments with 0..max_missing interpolated chars.
Strips trailing punctuation from the continuation word before testing
(e.g. "künden,""künden") so dictionary lookup succeeds.
Returns (joined_word, missing_chars, confidence) or None. Returns (joined_word, missing_chars, confidence) or None.
""" """
base = word_text.rstrip("-").rstrip() base = word_text.rstrip("-").rstrip()
continuation = next_word_text.lstrip() # Strip trailing punctuation from continuation (commas, periods, etc.)
raw_continuation = next_word_text.lstrip()
continuation = _TRAILING_PUNCT_RE.sub('', raw_continuation)
if not base or not continuation: if not base or not continuation:
return None return None

View File

@@ -126,6 +126,21 @@ class TestTryHyphenJoin:
assert _try_hyphen_join("", "word") is None assert _try_hyphen_join("", "word") is None
assert _try_hyphen_join("word", "") is None assert _try_hyphen_join("word", "") is None
def test_join_strips_trailing_punctuation(self):
# "ver" + "künden," → should still find "verkünden" despite comma
result = _try_hyphen_join("ver-", "künden,")
assert result is not None
joined, missing, conf = result
assert joined == "verkünden"
def test_join_with_missing_chars_and_punctuation(self):
# "ve" + "künden," → needs "r" in between, comma must be stripped
result = _try_hyphen_join("ve", "künden,", max_missing=2)
assert result is not None
joined, missing, conf = result
assert joined == "verkünden"
assert "r" in missing
@needs_spellchecker @needs_spellchecker
class TestTrySpellFix: class TestTrySpellFix: