From a6c5f56003cef5591cb380187c336dda5f77ba45 Mon Sep 17 00:00:00 2001
From: Benjamin Admin <benjaminadmin@MacBook-Pro.local>
Date: Sun, 12 Apr 2026 09:53:16 +0200
Subject: [PATCH] Fix IPA strip: match all square brackets, not just Unicode
 IPA
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

OCR text contains ASCII IPA approximations like [kompa'tifn] instead
of Unicode [kˈɒmpətɪʃən]. The strip regex required Unicode IPA chars
inside brackets and missed the ASCII ones. Now strips all [bracket]
content from excluded columns since square brackets in vocab columns
are always IPA.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 klausur-service/backend/grid_editor_api.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/klausur-service/backend/grid_editor_api.py b/klausur-service/backend/grid_editor_api.py
index 308a2b6..6748d68 100644
--- a/klausur-service/backend/grid_editor_api.py
+++ b/klausur-service/backend/grid_editor_api.py
@@ -1005,7 +1005,9 @@ async def _build_grid_core(
         # --- Strip IPA from columns NOT in the target set ---
         # When user selects "nur DE", English IPA from the OCR scan must
         # be removed.  When "none", all IPA is removed.
-        _IPA_BRACKET_STRIP_RE = re.compile(r'\s*\[[^\]]*[ˈˌːɑɒæɛəɜɪɔʊʌðŋθʃʒɹɡɾʔɐ][^\]]*\]')
+        # In vocab columns, square brackets [...] are always IPA (both
+        # Unicode like [ˈgrænˌdæd] and ASCII OCR like [kompa'tifn]).
+        _SQUARE_BRACKET_RE = re.compile(r'\s*\[[^\]]+\]')
         strip_en_ipa = en_col_type and en_col_type not in en_ipa_target_cols
         if strip_en_ipa or ipa_mode == "none":
             strip_cols = {en_col_type} if strip_en_ipa and ipa_mode != "none" else all_content_cols
@@ -1015,7 +1017,7 @@ async def _build_grid_core(
                     continue
                 text = cell.get("text", "")
                 if "[" in text:
-                    stripped = _IPA_BRACKET_STRIP_RE.sub("", text)
+                    stripped = _SQUARE_BRACKET_RE.sub("", text)
                     if stripped != text:
                         cell["text"] = stripped.strip()
                         cell["_ipa_corrected"] = True