From 4561320e0d30591f2a32545d1d3b8cbc06db043c Mon Sep 17 00:00:00 2001
From: Benjamin Admin <benjaminadmin@MacBook-Pro.local>
Date: Wed, 15 Apr 2026 23:41:33 +0200
Subject: [PATCH] Fix SmartSpellChecker: preserve leading non-alpha text like
 (=

The tokenizer regex only matches alphabetic characters, so text
before the first word match (like "(= " in "(= I won...") was
silently dropped when reassembling the corrected text.

Now preserves text[:first_match_start] as a leading prefix.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 klausur-service/backend/smart_spell.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/klausur-service/backend/smart_spell.py b/klausur-service/backend/smart_spell.py
index 77fe7e8..e400474 100644
--- a/klausur-service/backend/smart_spell.py
+++ b/klausur-service/backend/smart_spell.py
@@ -534,6 +534,13 @@ class SmartSpellChecker:
 
         # --- Pass 3: Per-word correction ---
         parts: List[str] = []
+
+        # Preserve any leading text before the first token match
+        # (e.g., "(= " before "I won and he lost.")
+        first_start = tokens[0].start() if tokens else 0
+        if first_start > 0:
+            parts.append(text[:first_start])
+
         for i, (word, sep) in enumerate(token_list):
             # Skip words inside IPA brackets (brackets land in separators)
             prev_sep = token_list[i - 1][1] if i > 0 else ""