Fix hyphen-join: strip trailing punctuation from continuation word
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 50s
CI / test-go-edu-search (push) Successful in 47s
CI / test-python-klausur (push) Failing after 2m35s
CI / test-python-agent-core (push) Successful in 31s
CI / test-nodejs-website (push) Successful in 34s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 50s
CI / test-go-edu-search (push) Successful in 47s
CI / test-python-klausur (push) Failing after 2m35s
CI / test-python-agent-core (push) Successful in 31s
CI / test-nodejs-website (push) Successful in 34s
The next-row word "künden," had a trailing comma, causing dictionary lookup to fail for "verkünden,". Now strips .,;:!? before joining. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -161,6 +161,9 @@ class GutterSuggestion:
|
||||
# Core repair logic
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_TRAILING_PUNCT_RE = re.compile(r'[.,;:!?\)\]]+$')
|
||||
|
||||
|
||||
def _try_hyphen_join(
|
||||
word_text: str,
|
||||
next_word_text: str,
|
||||
@@ -168,10 +171,15 @@ def _try_hyphen_join(
|
||||
) -> Optional[Tuple[str, str, float]]:
|
||||
"""Try joining two fragments with 0..max_missing interpolated chars.
|
||||
|
||||
Strips trailing punctuation from the continuation word before testing
|
||||
(e.g. "künden," → "künden") so dictionary lookup succeeds.
|
||||
|
||||
Returns (joined_word, missing_chars, confidence) or None.
|
||||
"""
|
||||
base = word_text.rstrip("-").rstrip()
|
||||
continuation = next_word_text.lstrip()
|
||||
# Strip trailing punctuation from continuation (commas, periods, etc.)
|
||||
raw_continuation = next_word_text.lstrip()
|
||||
continuation = _TRAILING_PUNCT_RE.sub('', raw_continuation)
|
||||
|
||||
if not base or not continuation:
|
||||
return None
|
||||
|
||||
@@ -126,6 +126,21 @@ class TestTryHyphenJoin:
|
||||
assert _try_hyphen_join("", "word") is None
|
||||
assert _try_hyphen_join("word", "") is None
|
||||
|
||||
def test_join_strips_trailing_punctuation(self):
|
||||
# "ver" + "künden," → should still find "verkünden" despite comma
|
||||
result = _try_hyphen_join("ver-", "künden,")
|
||||
assert result is not None
|
||||
joined, missing, conf = result
|
||||
assert joined == "verkünden"
|
||||
|
||||
def test_join_with_missing_chars_and_punctuation(self):
|
||||
# "ve" + "künden," → needs "r" in between, comma must be stripped
|
||||
result = _try_hyphen_join("ve", "künden,", max_missing=2)
|
||||
assert result is not None
|
||||
joined, missing, conf = result
|
||||
assert joined == "verkünden"
|
||||
assert "r" in missing
|
||||
|
||||
|
||||
@needs_spellchecker
|
||||
class TestTrySpellFix:
|
||||
|
||||
Reference in New Issue
Block a user