""" Audio Service — Generates TTS audio for vocabulary words. Uses the Piper TTS service (compliance-tts-service, MIT license) for high-quality German (Thorsten) and English (Lessac) voices. Falls back to a placeholder response if TTS service is unavailable. Audio files are cached — generated once, served forever. """ import hashlib import logging import os from typing import Optional import httpx logger = logging.getLogger(__name__) # Piper TTS service (runs in compliance stack) TTS_SERVICE_URL = os.getenv("TTS_SERVICE_URL", "http://bp-compliance-tts:8095") # Local cache directory for generated audio AUDIO_CACHE_DIR = os.path.expanduser("~/Arbeitsblaetter/audio-cache") def _ensure_cache_dir(): os.makedirs(AUDIO_CACHE_DIR, exist_ok=True) def _cache_key(text: str, language: str) -> str: """Generate a deterministic cache key for text + language.""" h = hashlib.sha256(f"{language}:{text}".encode()).hexdigest()[:16] return f"{language}_{h}" def _cache_path(text: str, language: str) -> str: """Full path to cached MP3 file.""" _ensure_cache_dir() return os.path.join(AUDIO_CACHE_DIR, f"{_cache_key(text, language)}.mp3") async def synthesize_word( text: str, language: str = "de", word_id: str = "", ) -> Optional[str]: """ Generate TTS audio for a word or short phrase. Returns the file path to the cached MP3, or None on error. Uses Piper TTS service (compliance-tts-service). """ # Check cache first cached = _cache_path(text, language) if os.path.exists(cached): return cached # Call Piper TTS service try: async with httpx.AsyncClient(timeout=30.0) as client: resp = await client.post( f"{TTS_SERVICE_URL}/synthesize", json={ "text": text, "language": language, "voice": "thorsten-high" if language == "de" else "lessac-high", "module_id": "vocabulary", "content_id": word_id or _cache_key(text, language), }, ) if resp.status_code != 200: logger.warning(f"TTS service returned {resp.status_code} for '{text}'") return None data = resp.json() audio_url = data.get("audio_url") or data.get("presigned_url") if audio_url: # Download the audio file audio_resp = await client.get(audio_url) if audio_resp.status_code == 200: with open(cached, "wb") as f: f.write(audio_resp.content) logger.info(f"TTS cached: '{text}' ({language}) → {cached}") return cached except Exception as e: logger.warning(f"TTS service unavailable: {e}") # Fallback: try direct MP3 endpoint try: async with httpx.AsyncClient(timeout=30.0) as client: resp = await client.post( f"{TTS_SERVICE_URL}/synthesize/mp3", json={ "text": text, "language": language, "voice": "thorsten-high" if language == "de" else "lessac-high", "module_id": "vocabulary", }, ) if resp.status_code == 200 and resp.headers.get("content-type", "").startswith("audio"): with open(cached, "wb") as f: f.write(resp.content) logger.info(f"TTS cached (direct): '{text}' ({language}) → {cached}") return cached except Exception as e: logger.debug(f"TTS direct fallback also failed: {e}") return None async def get_or_generate_audio( text: str, language: str = "de", word_id: str = "", ) -> Optional[bytes]: """ Get audio bytes for a word. Returns MP3 bytes or None. Generates via TTS if not cached. """ path = await synthesize_word(text, language, word_id) if path and os.path.exists(path): with open(path, "rb") as f: return f.read() return None