From 525de557911adca4ab5e95d738d35c2c0ee753c1 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Thu, 26 Mar 2026 00:03:10 +0100 Subject: [PATCH] Fix syllable+IPA combination: strip bracket content before IPA guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The _IPA_RE check in _syllabify_text() skipped entire cells containing any IPA character. After German IPA insertion adds [bɪltʃøn], the check blocked syllabification entirely. Now strips bracket content before checking, so programmatically inserted IPA doesn't prevent syllable divider insertion on the surrounding text. Co-Authored-By: Claude Opus 4.6 --- klausur-service/backend/cv_syllable_detect.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/klausur-service/backend/cv_syllable_detect.py b/klausur-service/backend/cv_syllable_detect.py index cc43cf9..c1e1acc 100644 --- a/klausur-service/backend/cv_syllable_detect.py +++ b/klausur-service/backend/cv_syllable_detect.py @@ -150,8 +150,11 @@ def _syllabify_text(text: str, hyph_de, hyph_en) -> str: if not text: return text - # Skip cells that contain IPA transcription characters - if _IPA_RE.search(text): + # Skip cells that contain IPA transcription characters outside brackets. + # Bracket content like [bɪltʃøn] is programmatically inserted and should + # not block syllabification of the surrounding text. + text_no_brackets = re.sub(r'\[[^\]]*\]', '', text) + if _IPA_RE.search(text_no_brackets): return text # Phase 1: strip existing pipe dividers for clean normalization