From b495e63e6f952ada3d9ea729c12a455c59d5ecfa Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Mon, 27 Apr 2026 00:07:46 +0200 Subject: [PATCH] =?UTF-8?q?Add=20TTS=20abbreviation=20expansion=20(sth?= =?UTF-8?q?=E2=86=92something,=20sb=E2=86=92somebody,=20etc.)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Text is preprocessed before TTS to expand abbreviations like sth., sb., etc., z.B., usw. so the speaker says the full word. 40+ abbreviations covered (EN + DE). Applied to all languages. Audio cache cleared to regenerate with correct pronunciation. Co-Authored-By: Claude Opus 4.6 (1M context) --- backend-lehrer/services/audio.py | 52 +++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/backend-lehrer/services/audio.py b/backend-lehrer/services/audio.py index a3f1e1b..d27f33d 100644 --- a/backend-lehrer/services/audio.py +++ b/backend-lehrer/services/audio.py @@ -23,6 +23,53 @@ TTS_SERVICE_URL = os.getenv("TTS_SERVICE_URL", "http://bp-compliance-tts:8095") # Local cache directory for generated audio AUDIO_CACHE_DIR = os.path.expanduser("~/Arbeitsblaetter/audio-cache") +# Abbreviations expanded before TTS (so the speaker says the full word) +_TTS_EXPANSIONS = { + "sth.": "something", + "sth": "something", + "sb.": "somebody", + "sb": "somebody", + "smth.": "something", + "smb.": "somebody", + "sbd.": "somebody", + "etc.": "etcetera", + "e.g.": "for example", + "i.e.": "that is", + "esp.": "especially", + "approx.": "approximately", + "vs.": "versus", + "nr.": "number", + "no.": "number", + "p.": "page", + "adj.": "adjective", + "adv.": "adverb", + "prep.": "preposition", + "pron.": "pronoun", + "pl.": "plural", + "sg.": "singular", + "syn.": "synonym", + "ant.": "antonym", + # DE + "usw.": "und so weiter", + "bzw.": "beziehungsweise", + "z.B.": "zum Beispiel", + "d.h.": "das heisst", + "vgl.": "vergleiche", + "ca.": "circa", + "evtl.": "eventuell", + "ggf.": "gegebenenfalls", +} + + +def _expand_abbreviations(text: str) -> str: + """Expand abbreviations so TTS speaks the full word.""" + import re + for abbr, full in _TTS_EXPANSIONS.items(): + # Word-boundary aware replacement (case-insensitive) + pattern = re.escape(abbr) + text = re.sub(rf'\b{pattern}', full, text, flags=re.IGNORECASE) + return text + def _ensure_cache_dir(): os.makedirs(AUDIO_CACHE_DIR, exist_ok=True) @@ -56,13 +103,16 @@ async def synthesize_word( if os.path.exists(cached): return cached + # Expand abbreviations before speaking + speak_text = _expand_abbreviations(text) + # Call Piper TTS service via /synthesize-direct (returns MP3, selects language correctly) try: async with httpx.AsyncClient(timeout=30.0) as client: resp = await client.post( f"{TTS_SERVICE_URL}/synthesize-direct", json={ - "text": text, + "text": speak_text, "language": language, }, )