diff --git a/klausur-service/backend/cv_gutter_repair.py b/klausur-service/backend/cv_gutter_repair.py index 5c64cd5..03c7bd1 100644 --- a/klausur-service/backend/cv_gutter_repair.py +++ b/klausur-service/backend/cv_gutter_repair.py @@ -217,6 +217,13 @@ def _try_spell_fix( if len(word_text) < _MIN_WORD_LEN_SPELL: return None + # Strip trailing/leading parentheses and check if the bare word is valid. + # Words like "probieren)" or "(Englisch" are valid words with punctuation, + # not OCR errors. Don't suggest corrections for them. + stripped = word_text.strip("()") + if stripped and _is_known(stripped): + return None + # Determine language priority from column type if "en" in col_type: lang = "en" @@ -362,7 +369,7 @@ def analyse_grid_for_gutter_repair( if last_word.lower().rstrip(".,;:!?-") in _STOPWORDS: continue - last_word_clean = last_word.rstrip(".,;:!?") + last_word_clean = last_word.rstrip(".,;:!?)(") if len(last_word_clean) < _MIN_WORD_LEN_HYPHEN: continue