From 0599c72cc154f5b536dfb4da469d6d691333f99f Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Wed, 15 Apr 2026 22:28:58 +0200 Subject: [PATCH] Fix IPA continuation: don't replace normal text with IPA Text like "Betonung auf der 1. Silbe: profit ['profit]" was incorrectly detected as garbled IPA and replaced with generated IPA transcription of the previous row's example sentence. Added guard: if the cell text contains >=3 recognizable words (3+ letter alpha tokens), it's normal text, not garbled IPA. Garbled IPA is typically short and has no real dictionary words. Fixes: Row 13 C3 showing IPA instead of pronunciation hint text. Co-Authored-By: Claude Opus 4.6 (1M context) --- klausur-service/backend/grid_build_core.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/klausur-service/backend/grid_build_core.py b/klausur-service/backend/grid_build_core.py index cab5277..037c578 100644 --- a/klausur-service/backend/grid_build_core.py +++ b/klausur-service/backend/grid_build_core.py @@ -1112,6 +1112,13 @@ async def _build_grid_core( # Has real IPA symbols → already fixed or valid if any(c in _REAL_IPA_CHARS for c in cell_text): continue + # Guard: if text contains multiple real words, it's + # normal text (e.g. "Betonung auf der 1. Silbe: + # profit"), not garbled IPA. Garbled IPA is + # typically short and has no recognizable words. + _words_in_text = re.findall(r'[A-Za-zÄÖÜäöüß]{3,}', cell_text) + if len(_words_in_text) >= 3: + continue # Find headword in previous row, same column prev_ri = rows_sorted[idx - 1]["index"]