""" Audio Service — Generates TTS audio for vocabulary words. Uses the Piper TTS service (compliance-tts-service, MIT license) for high-quality German (Thorsten) and English (Lessac) voices. Falls back to a placeholder response if TTS service is unavailable. Audio files are cached — generated once, served forever. """ import hashlib import logging import os from typing import Optional import httpx logger = logging.getLogger(__name__) # Piper TTS service (runs in compliance stack) TTS_SERVICE_URL = os.getenv("TTS_SERVICE_URL", "http://bp-compliance-tts:8095") # Local cache directory for generated audio AUDIO_CACHE_DIR = os.path.expanduser("~/Arbeitsblaetter/audio-cache") # Abbreviations expanded before TTS (so the speaker says the full word) _TTS_EXPANSIONS = { "sth.": "something", "sth": "something", "sb.": "somebody", "sb": "somebody", "smth.": "something", "smb.": "somebody", "sbd.": "somebody", "etc.": "etcetera", "e.g.": "for example", "i.e.": "that is", "esp.": "especially", "approx.": "approximately", "vs.": "versus", "nr.": "number", "no.": "number", "p.": "page", "adj.": "adjective", "adv.": "adverb", "prep.": "preposition", "pron.": "pronoun", "pl.": "plural", "sg.": "singular", "syn.": "synonym", "ant.": "antonym", # DE "usw.": "und so weiter", "bzw.": "beziehungsweise", "z.B.": "zum Beispiel", "d.h.": "das heisst", "vgl.": "vergleiche", "ca.": "circa", "evtl.": "eventuell", "ggf.": "gegebenenfalls", } def _expand_abbreviations(text: str) -> str: """Expand abbreviations so TTS speaks the full word.""" import re for abbr, full in _TTS_EXPANSIONS.items(): # Word-boundary aware replacement (case-insensitive) pattern = re.escape(abbr) text = re.sub(rf'\b{pattern}', full, text, flags=re.IGNORECASE) return text def _ensure_cache_dir(): os.makedirs(AUDIO_CACHE_DIR, exist_ok=True) def _cache_key(text: str, language: str) -> str: """Generate a deterministic cache key for text + language.""" h = hashlib.sha256(f"{language}:{text}".encode()).hexdigest()[:16] return f"{language}_{h}" def _cache_path(text: str, language: str) -> str: """Full path to cached MP3 file.""" _ensure_cache_dir() return os.path.join(AUDIO_CACHE_DIR, f"{_cache_key(text, language)}.mp3") async def synthesize_word( text: str, language: str = "de", word_id: str = "", ) -> Optional[str]: """ Generate TTS audio for a word or short phrase. Returns the file path to the cached MP3, or None on error. Uses Piper TTS service (compliance-tts-service). """ # Check cache first cached = _cache_path(text, language) if os.path.exists(cached): return cached # Expand abbreviations before speaking speak_text = _expand_abbreviations(text) # Call Piper TTS service via /synthesize-direct (returns MP3, selects language correctly) try: async with httpx.AsyncClient(timeout=30.0) as client: resp = await client.post( f"{TTS_SERVICE_URL}/synthesize-direct", json={ "text": speak_text, "language": language, }, ) if resp.status_code == 200 and resp.headers.get("content-type", "").startswith("audio"): with open(cached, "wb") as f: f.write(resp.content) logger.info(f"TTS cached (direct): '{text}' ({language}) → {cached}") return cached except Exception as e: logger.debug(f"TTS direct fallback also failed: {e}") return None async def get_or_generate_audio( text: str, language: str = "de", word_id: str = "", ) -> Optional[bytes]: """ Get audio bytes for a word. Returns MP3 bytes or None. Generates via TTS if not cached. """ path = await synthesize_word(text, language, word_id) if path and os.path.exists(path): with open(path, "rb") as f: return f.read() return None