Files
breakpilot-lehrer/backend-lehrer/services/audio.py
Benjamin Admin 198a0b2a0d Fix: Use /synthesize-direct for correct language selection
/synthesize always used the German model. /synthesize-direct uses
Edge TTS (with language-aware voice selection) and falls back to
Piper with the correct model (Thorsten DE / Lessac EN).

Also cleared audio cache to purge wrongly-generated files.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-26 23:46:40 +02:00

92 lines
2.7 KiB
Python

"""
Audio Service — Generates TTS audio for vocabulary words.
Uses the Piper TTS service (compliance-tts-service, MIT license)
for high-quality German (Thorsten) and English (Lessac) voices.
Falls back to a placeholder response if TTS service is unavailable.
Audio files are cached — generated once, served forever.
"""
import hashlib
import logging
import os
from typing import Optional
import httpx
logger = logging.getLogger(__name__)
# Piper TTS service (runs in compliance stack)
TTS_SERVICE_URL = os.getenv("TTS_SERVICE_URL", "http://bp-compliance-tts:8095")
# Local cache directory for generated audio
AUDIO_CACHE_DIR = os.path.expanduser("~/Arbeitsblaetter/audio-cache")
def _ensure_cache_dir():
os.makedirs(AUDIO_CACHE_DIR, exist_ok=True)
def _cache_key(text: str, language: str) -> str:
"""Generate a deterministic cache key for text + language."""
h = hashlib.sha256(f"{language}:{text}".encode()).hexdigest()[:16]
return f"{language}_{h}"
def _cache_path(text: str, language: str) -> str:
"""Full path to cached MP3 file."""
_ensure_cache_dir()
return os.path.join(AUDIO_CACHE_DIR, f"{_cache_key(text, language)}.mp3")
async def synthesize_word(
text: str,
language: str = "de",
word_id: str = "",
) -> Optional[str]:
"""
Generate TTS audio for a word or short phrase.
Returns the file path to the cached MP3, or None on error.
Uses Piper TTS service (compliance-tts-service).
"""
# Check cache first
cached = _cache_path(text, language)
if os.path.exists(cached):
return cached
# Call Piper TTS service via /synthesize-direct (returns MP3, selects language correctly)
try:
async with httpx.AsyncClient(timeout=30.0) as client:
resp = await client.post(
f"{TTS_SERVICE_URL}/synthesize-direct",
json={
"text": text,
"language": language,
},
)
if resp.status_code == 200 and resp.headers.get("content-type", "").startswith("audio"):
with open(cached, "wb") as f:
f.write(resp.content)
logger.info(f"TTS cached (direct): '{text}' ({language}) → {cached}")
return cached
except Exception as e:
logger.debug(f"TTS direct fallback also failed: {e}")
return None
async def get_or_generate_audio(
text: str, language: str = "de", word_id: str = "",
) -> Optional[bytes]:
"""
Get audio bytes for a word. Returns MP3 bytes or None.
Generates via TTS if not cached.
"""
path = await synthesize_word(text, language, word_id)
if path and os.path.exists(path):
with open(path, "rb") as f:
return f.read()
return None