From 50bfd6e902c1307481e9b7ea4d8cd7998172a3b9 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Wed, 15 Apr 2026 22:38:22 +0200 Subject: [PATCH] Fix gutter repair: don't suggest corrections for words with parentheses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Words like "probieren)" or "Englisch)" were incorrectly flagged as gutter OCR errors because the closing parenthesis wasn't stripped before dictionary lookup. The spellchecker then suggested "probierend" (replacing ) with d, edit distance 1). Two fixes: 1. Strip trailing/leading parentheses in _try_spell_fix before checking if the bare word is valid — skip correction if it is 2. Add )( to the rstrip characters in the analysis phase so "probieren)" becomes "probieren" for the known-word check Co-Authored-By: Claude Opus 4.6 (1M context) --- klausur-service/backend/cv_gutter_repair.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/klausur-service/backend/cv_gutter_repair.py b/klausur-service/backend/cv_gutter_repair.py index 5c64cd5..03c7bd1 100644 --- a/klausur-service/backend/cv_gutter_repair.py +++ b/klausur-service/backend/cv_gutter_repair.py @@ -217,6 +217,13 @@ def _try_spell_fix( if len(word_text) < _MIN_WORD_LEN_SPELL: return None + # Strip trailing/leading parentheses and check if the bare word is valid. + # Words like "probieren)" or "(Englisch" are valid words with punctuation, + # not OCR errors. Don't suggest corrections for them. + stripped = word_text.strip("()") + if stripped and _is_known(stripped): + return None + # Determine language priority from column type if "en" in col_type: lang = "en" @@ -362,7 +369,7 @@ def analyse_grid_for_gutter_repair( if last_word.lower().rstrip(".,;:!?-") in _STOPWORDS: continue - last_word_clean = last_word.rstrip(".,;:!?") + last_word_clean = last_word.rstrip(".,;:!?)(") if len(last_word_clean) < _MIN_WORD_LEN_HYPHEN: continue