diff --git a/backend-lehrer/services/audio.py b/backend-lehrer/services/audio.py index a3f1e1b..d27f33d 100644 --- a/backend-lehrer/services/audio.py +++ b/backend-lehrer/services/audio.py @@ -23,6 +23,53 @@ TTS_SERVICE_URL = os.getenv("TTS_SERVICE_URL", "http://bp-compliance-tts:8095") # Local cache directory for generated audio AUDIO_CACHE_DIR = os.path.expanduser("~/Arbeitsblaetter/audio-cache") +# Abbreviations expanded before TTS (so the speaker says the full word) +_TTS_EXPANSIONS = { + "sth.": "something", + "sth": "something", + "sb.": "somebody", + "sb": "somebody", + "smth.": "something", + "smb.": "somebody", + "sbd.": "somebody", + "etc.": "etcetera", + "e.g.": "for example", + "i.e.": "that is", + "esp.": "especially", + "approx.": "approximately", + "vs.": "versus", + "nr.": "number", + "no.": "number", + "p.": "page", + "adj.": "adjective", + "adv.": "adverb", + "prep.": "preposition", + "pron.": "pronoun", + "pl.": "plural", + "sg.": "singular", + "syn.": "synonym", + "ant.": "antonym", + # DE + "usw.": "und so weiter", + "bzw.": "beziehungsweise", + "z.B.": "zum Beispiel", + "d.h.": "das heisst", + "vgl.": "vergleiche", + "ca.": "circa", + "evtl.": "eventuell", + "ggf.": "gegebenenfalls", +} + + +def _expand_abbreviations(text: str) -> str: + """Expand abbreviations so TTS speaks the full word.""" + import re + for abbr, full in _TTS_EXPANSIONS.items(): + # Word-boundary aware replacement (case-insensitive) + pattern = re.escape(abbr) + text = re.sub(rf'\b{pattern}', full, text, flags=re.IGNORECASE) + return text + def _ensure_cache_dir(): os.makedirs(AUDIO_CACHE_DIR, exist_ok=True) @@ -56,13 +103,16 @@ async def synthesize_word( if os.path.exists(cached): return cached + # Expand abbreviations before speaking + speak_text = _expand_abbreviations(text) + # Call Piper TTS service via /synthesize-direct (returns MP3, selects language correctly) try: async with httpx.AsyncClient(timeout=30.0) as client: resp = await client.post( f"{TTS_SERVICE_URL}/synthesize-direct", json={ - "text": text, + "text": speak_text, "language": language, }, )