From 4561320e0d30591f2a32545d1d3b8cbc06db043c Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Wed, 15 Apr 2026 23:41:33 +0200 Subject: [PATCH] Fix SmartSpellChecker: preserve leading non-alpha text like (= The tokenizer regex only matches alphabetic characters, so text before the first word match (like "(= " in "(= I won...") was silently dropped when reassembling the corrected text. Now preserves text[:first_match_start] as a leading prefix. Co-Authored-By: Claude Opus 4.6 (1M context) --- klausur-service/backend/smart_spell.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/klausur-service/backend/smart_spell.py b/klausur-service/backend/smart_spell.py index 77fe7e8..e400474 100644 --- a/klausur-service/backend/smart_spell.py +++ b/klausur-service/backend/smart_spell.py @@ -534,6 +534,13 @@ class SmartSpellChecker: # --- Pass 3: Per-word correction --- parts: List[str] = [] + + # Preserve any leading text before the first token match + # (e.g., "(= " before "I won and he lost.") + first_start = tokens[0].start() if tokens else 0 + if first_start > 0: + parts.append(text[:first_start]) + for i, (word, sep) in enumerate(token_list): # Skip words inside IPA brackets (brackets land in separators) prev_sep = token_list[i - 1][1] if i > 0 else ""