Fix hyphen-join: strip trailing punctuation from continuation word
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 50s
CI / test-go-edu-search (push) Successful in 47s
CI / test-python-klausur (push) Failing after 2m35s
CI / test-python-agent-core (push) Successful in 31s
CI / test-nodejs-website (push) Successful in 34s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 50s
CI / test-go-edu-search (push) Successful in 47s
CI / test-python-klausur (push) Failing after 2m35s
CI / test-python-agent-core (push) Successful in 31s
CI / test-nodejs-website (push) Successful in 34s
The next-row word "künden," had a trailing comma, causing dictionary lookup to fail for "verkünden,". Now strips .,;:!? before joining. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -161,6 +161,9 @@ class GutterSuggestion:
|
|||||||
# Core repair logic
|
# Core repair logic
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_TRAILING_PUNCT_RE = re.compile(r'[.,;:!?\)\]]+$')
|
||||||
|
|
||||||
|
|
||||||
def _try_hyphen_join(
|
def _try_hyphen_join(
|
||||||
word_text: str,
|
word_text: str,
|
||||||
next_word_text: str,
|
next_word_text: str,
|
||||||
@@ -168,10 +171,15 @@ def _try_hyphen_join(
|
|||||||
) -> Optional[Tuple[str, str, float]]:
|
) -> Optional[Tuple[str, str, float]]:
|
||||||
"""Try joining two fragments with 0..max_missing interpolated chars.
|
"""Try joining two fragments with 0..max_missing interpolated chars.
|
||||||
|
|
||||||
|
Strips trailing punctuation from the continuation word before testing
|
||||||
|
(e.g. "künden," → "künden") so dictionary lookup succeeds.
|
||||||
|
|
||||||
Returns (joined_word, missing_chars, confidence) or None.
|
Returns (joined_word, missing_chars, confidence) or None.
|
||||||
"""
|
"""
|
||||||
base = word_text.rstrip("-").rstrip()
|
base = word_text.rstrip("-").rstrip()
|
||||||
continuation = next_word_text.lstrip()
|
# Strip trailing punctuation from continuation (commas, periods, etc.)
|
||||||
|
raw_continuation = next_word_text.lstrip()
|
||||||
|
continuation = _TRAILING_PUNCT_RE.sub('', raw_continuation)
|
||||||
|
|
||||||
if not base or not continuation:
|
if not base or not continuation:
|
||||||
return None
|
return None
|
||||||
|
|||||||
@@ -126,6 +126,21 @@ class TestTryHyphenJoin:
|
|||||||
assert _try_hyphen_join("", "word") is None
|
assert _try_hyphen_join("", "word") is None
|
||||||
assert _try_hyphen_join("word", "") is None
|
assert _try_hyphen_join("word", "") is None
|
||||||
|
|
||||||
|
def test_join_strips_trailing_punctuation(self):
|
||||||
|
# "ver" + "künden," → should still find "verkünden" despite comma
|
||||||
|
result = _try_hyphen_join("ver-", "künden,")
|
||||||
|
assert result is not None
|
||||||
|
joined, missing, conf = result
|
||||||
|
assert joined == "verkünden"
|
||||||
|
|
||||||
|
def test_join_with_missing_chars_and_punctuation(self):
|
||||||
|
# "ve" + "künden," → needs "r" in between, comma must be stripped
|
||||||
|
result = _try_hyphen_join("ve", "künden,", max_missing=2)
|
||||||
|
assert result is not None
|
||||||
|
joined, missing, conf = result
|
||||||
|
assert joined == "verkünden"
|
||||||
|
assert "r" in missing
|
||||||
|
|
||||||
|
|
||||||
@needs_spellchecker
|
@needs_spellchecker
|
||||||
class TestTrySpellFix:
|
class TestTrySpellFix:
|
||||||
|
|||||||
Reference in New Issue
Block a user