diff --git a/klausur-service/backend/grid_editor_api.py b/klausur-service/backend/grid_editor_api.py index 94282e4..36ef379 100644 --- a/klausur-service/backend/grid_editor_api.py +++ b/klausur-service/backend/grid_editor_api.py @@ -12,6 +12,7 @@ DATENSCHUTZ: Alle Verarbeitung erfolgt lokal. """ import logging +import re import time from typing import Any, Dict, List, Optional @@ -1221,9 +1222,24 @@ async def build_grid(session_id: str): ] if en_cells and not other_cells: en_text = en_cells[0].get("text", "") - # No IPA brackets → phonetics not recognized → - # this is a garbled IPA continuation row - if "[" not in en_text: + # Strip any IPA brackets that fix_cell_phonetics + # may have added for short dictionary matches + # (e.g. "si" → "[si]") to check underlying text. + text_bare = re.sub(r'\[[^\]]*\]', '', en_text).strip() + # Garbled IPA typically contains ':' (length mark) + # or starts with ' (stress mark), and has no word + # with ≥3 letters that could be a real headword. + has_headword = any( + len(re.sub(r'[^a-zA-Z]', '', w)) >= 3 + for w in text_bare.split() + ) if text_bare else False + looks_phonetic = ( + ':' in text_bare + or text_bare.startswith("'") + or text_bare.startswith("\u2019") + or not has_headword + ) + if looks_phonetic: ipa_cont_rows.add(ri) if ipa_cont_rows: for z in zones_data: