From 2e6ab3a6460abb778ef3387b8d3a6cb0c6a2d92c Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Thu, 19 Mar 2026 10:57:15 +0100 Subject: [PATCH] Fix IPA marker split: walk back max 3 chars for onset cluster MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The walk-back was going 4 chars, eating the last letter of the headword: "schoolbag" → "schoolba". Limiting to 3 gives correct split: "schoolbag" + "[sku:lbæg]". Co-Authored-By: Claude Opus 4.6 --- klausur-service/backend/cv_ocr_engines.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/klausur-service/backend/cv_ocr_engines.py b/klausur-service/backend/cv_ocr_engines.py index 9c127e2..2111893 100644 --- a/klausur-service/backend/cv_ocr_engines.py +++ b/klausur-service/backend/cv_ocr_engines.py @@ -1073,7 +1073,7 @@ def _insert_missing_ipa(text: str, pronunciation: str = 'british') -> str: if first_marker >= 3: split = first_marker while (split > 0 - and split > first_marker - 4 + and split > first_marker - 3 and w[split - 1].isalpha() and w[split - 1].islower()): split -= 1