diff --git a/klausur-service/backend/cv_gutter_repair.py b/klausur-service/backend/cv_gutter_repair.py index c9d4fea..2eead8e 100644 --- a/klausur-service/backend/cv_gutter_repair.py +++ b/klausur-service/backend/cv_gutter_repair.py @@ -161,6 +161,9 @@ class GutterSuggestion: # Core repair logic # --------------------------------------------------------------------------- +_TRAILING_PUNCT_RE = re.compile(r'[.,;:!?\)\]]+$') + + def _try_hyphen_join( word_text: str, next_word_text: str, @@ -168,10 +171,15 @@ def _try_hyphen_join( ) -> Optional[Tuple[str, str, float]]: """Try joining two fragments with 0..max_missing interpolated chars. + Strips trailing punctuation from the continuation word before testing + (e.g. "künden," → "künden") so dictionary lookup succeeds. + Returns (joined_word, missing_chars, confidence) or None. """ base = word_text.rstrip("-").rstrip() - continuation = next_word_text.lstrip() + # Strip trailing punctuation from continuation (commas, periods, etc.) + raw_continuation = next_word_text.lstrip() + continuation = _TRAILING_PUNCT_RE.sub('', raw_continuation) if not base or not continuation: return None diff --git a/klausur-service/backend/tests/test_gutter_repair.py b/klausur-service/backend/tests/test_gutter_repair.py index b56aee9..353f8d1 100644 --- a/klausur-service/backend/tests/test_gutter_repair.py +++ b/klausur-service/backend/tests/test_gutter_repair.py @@ -126,6 +126,21 @@ class TestTryHyphenJoin: assert _try_hyphen_join("", "word") is None assert _try_hyphen_join("word", "") is None + def test_join_strips_trailing_punctuation(self): + # "ver" + "künden," → should still find "verkünden" despite comma + result = _try_hyphen_join("ver-", "künden,") + assert result is not None + joined, missing, conf = result + assert joined == "verkünden" + + def test_join_with_missing_chars_and_punctuation(self): + # "ve" + "künden," → needs "r" in between, comma must be stripped + result = _try_hyphen_join("ve", "künden,", max_missing=2) + assert result is not None + joined, missing, conf = result + assert joined == "verkünden" + assert "r" in missing + @needs_spellchecker class TestTrySpellFix: