From 584e07eb216e21fc676354f090d7e439695c8c57 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Sun, 12 Apr 2026 09:49:22 +0200 Subject: [PATCH] Strip English IPA when mode excludes EN (nur DE / Aus) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit English IPA from the original OCR scan (e.g. [ˈgrænˌdæd]) was always shown because fix_cell_phonetics only ADDS/CORRECTS but never removes. Now strips IPA brackets containing Unicode IPA chars from the EN column when ipa_mode is "de" or "none". Co-Authored-By: Claude Opus 4.6 (1M context) --- klausur-service/backend/grid_editor_api.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/klausur-service/backend/grid_editor_api.py b/klausur-service/backend/grid_editor_api.py index 844e179..308a2b6 100644 --- a/klausur-service/backend/grid_editor_api.py +++ b/klausur-service/backend/grid_editor_api.py @@ -1002,6 +1002,24 @@ async def _build_grid_core( en_ipa_target_cols.add(en_col_type) de_ipa_target_cols = all_content_cols - en_ipa_target_cols + # --- Strip IPA from columns NOT in the target set --- + # When user selects "nur DE", English IPA from the OCR scan must + # be removed. When "none", all IPA is removed. + _IPA_BRACKET_STRIP_RE = re.compile(r'\s*\[[^\]]*[ˈˌːɑɒæɛəɜɪɔʊʌðŋθʃʒɹɡɾʔɐ][^\]]*\]') + strip_en_ipa = en_col_type and en_col_type not in en_ipa_target_cols + if strip_en_ipa or ipa_mode == "none": + strip_cols = {en_col_type} if strip_en_ipa and ipa_mode != "none" else all_content_cols + for cell in all_cells: + ct = cell.get("col_type", "") + if ct not in strip_cols: + continue + text = cell.get("text", "") + if "[" in text: + stripped = _IPA_BRACKET_STRIP_RE.sub("", text) + if stripped != text: + cell["text"] = stripped.strip() + cell["_ipa_corrected"] = True + # --- English IPA (Britfone + eng_to_ipa) --- if en_ipa_target_cols: for cell in all_cells: