From 525de557911adca4ab5e95d738d35c2c0ee753c1 Mon Sep 17 00:00:00 2001
From: Benjamin Admin <benjaminadmin@MacBookPro.fritz.box>
Date: Thu, 26 Mar 2026 00:03:10 +0100
Subject: [PATCH] Fix syllable+IPA combination: strip bracket content before
 IPA guard
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The _IPA_RE check in _syllabify_text() skipped entire cells containing
any IPA character. After German IPA insertion adds [bɪltʃøn], the check
blocked syllabification entirely. Now strips bracket content before
checking, so programmatically inserted IPA doesn't prevent syllable
divider insertion on the surrounding text.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 klausur-service/backend/cv_syllable_detect.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/klausur-service/backend/cv_syllable_detect.py b/klausur-service/backend/cv_syllable_detect.py
index cc43cf9..c1e1acc 100644
--- a/klausur-service/backend/cv_syllable_detect.py
+++ b/klausur-service/backend/cv_syllable_detect.py
@@ -150,8 +150,11 @@ def _syllabify_text(text: str, hyph_de, hyph_en) -> str:
     if not text:
         return text
 
-    # Skip cells that contain IPA transcription characters
-    if _IPA_RE.search(text):
+    # Skip cells that contain IPA transcription characters outside brackets.
+    # Bracket content like [bɪltʃøn] is programmatically inserted and should
+    # not block syllabification of the surrounding text.
+    text_no_brackets = re.sub(r'\[[^\]]*\]', '', text)
+    if _IPA_RE.search(text_no_brackets):
         return text
 
     # Phase 1: strip existing pipe dividers for clean normalization