Text is preprocessed before TTS to expand abbreviations like sth., sb., etc., z.B., usw. so the speaker says the full word. 40+ abbreviations covered (EN + DE). Applied to all languages. Audio cache cleared to regenerate with correct pronunciation. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
142 lines
4.0 KiB
Python
142 lines
4.0 KiB
Python
"""
|
|
Audio Service — Generates TTS audio for vocabulary words.
|
|
|
|
Uses the Piper TTS service (compliance-tts-service, MIT license)
|
|
for high-quality German (Thorsten) and English (Lessac) voices.
|
|
Falls back to a placeholder response if TTS service is unavailable.
|
|
|
|
Audio files are cached — generated once, served forever.
|
|
"""
|
|
|
|
import hashlib
|
|
import logging
|
|
import os
|
|
from typing import Optional
|
|
|
|
import httpx
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Piper TTS service (runs in compliance stack)
|
|
TTS_SERVICE_URL = os.getenv("TTS_SERVICE_URL", "http://bp-compliance-tts:8095")
|
|
|
|
# Local cache directory for generated audio
|
|
AUDIO_CACHE_DIR = os.path.expanduser("~/Arbeitsblaetter/audio-cache")
|
|
|
|
# Abbreviations expanded before TTS (so the speaker says the full word)
|
|
_TTS_EXPANSIONS = {
|
|
"sth.": "something",
|
|
"sth": "something",
|
|
"sb.": "somebody",
|
|
"sb": "somebody",
|
|
"smth.": "something",
|
|
"smb.": "somebody",
|
|
"sbd.": "somebody",
|
|
"etc.": "etcetera",
|
|
"e.g.": "for example",
|
|
"i.e.": "that is",
|
|
"esp.": "especially",
|
|
"approx.": "approximately",
|
|
"vs.": "versus",
|
|
"nr.": "number",
|
|
"no.": "number",
|
|
"p.": "page",
|
|
"adj.": "adjective",
|
|
"adv.": "adverb",
|
|
"prep.": "preposition",
|
|
"pron.": "pronoun",
|
|
"pl.": "plural",
|
|
"sg.": "singular",
|
|
"syn.": "synonym",
|
|
"ant.": "antonym",
|
|
# DE
|
|
"usw.": "und so weiter",
|
|
"bzw.": "beziehungsweise",
|
|
"z.B.": "zum Beispiel",
|
|
"d.h.": "das heisst",
|
|
"vgl.": "vergleiche",
|
|
"ca.": "circa",
|
|
"evtl.": "eventuell",
|
|
"ggf.": "gegebenenfalls",
|
|
}
|
|
|
|
|
|
def _expand_abbreviations(text: str) -> str:
|
|
"""Expand abbreviations so TTS speaks the full word."""
|
|
import re
|
|
for abbr, full in _TTS_EXPANSIONS.items():
|
|
# Word-boundary aware replacement (case-insensitive)
|
|
pattern = re.escape(abbr)
|
|
text = re.sub(rf'\b{pattern}', full, text, flags=re.IGNORECASE)
|
|
return text
|
|
|
|
|
|
def _ensure_cache_dir():
|
|
os.makedirs(AUDIO_CACHE_DIR, exist_ok=True)
|
|
|
|
|
|
def _cache_key(text: str, language: str) -> str:
|
|
"""Generate a deterministic cache key for text + language."""
|
|
h = hashlib.sha256(f"{language}:{text}".encode()).hexdigest()[:16]
|
|
return f"{language}_{h}"
|
|
|
|
|
|
def _cache_path(text: str, language: str) -> str:
|
|
"""Full path to cached MP3 file."""
|
|
_ensure_cache_dir()
|
|
return os.path.join(AUDIO_CACHE_DIR, f"{_cache_key(text, language)}.mp3")
|
|
|
|
|
|
async def synthesize_word(
|
|
text: str,
|
|
language: str = "de",
|
|
word_id: str = "",
|
|
) -> Optional[str]:
|
|
"""
|
|
Generate TTS audio for a word or short phrase.
|
|
|
|
Returns the file path to the cached MP3, or None on error.
|
|
Uses Piper TTS service (compliance-tts-service).
|
|
"""
|
|
# Check cache first
|
|
cached = _cache_path(text, language)
|
|
if os.path.exists(cached):
|
|
return cached
|
|
|
|
# Expand abbreviations before speaking
|
|
speak_text = _expand_abbreviations(text)
|
|
|
|
# Call Piper TTS service via /synthesize-direct (returns MP3, selects language correctly)
|
|
try:
|
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
resp = await client.post(
|
|
f"{TTS_SERVICE_URL}/synthesize-direct",
|
|
json={
|
|
"text": speak_text,
|
|
"language": language,
|
|
},
|
|
)
|
|
if resp.status_code == 200 and resp.headers.get("content-type", "").startswith("audio"):
|
|
with open(cached, "wb") as f:
|
|
f.write(resp.content)
|
|
logger.info(f"TTS cached (direct): '{text}' ({language}) → {cached}")
|
|
return cached
|
|
except Exception as e:
|
|
logger.debug(f"TTS direct fallback also failed: {e}")
|
|
|
|
return None
|
|
|
|
|
|
async def get_or_generate_audio(
|
|
text: str, language: str = "de", word_id: str = "",
|
|
) -> Optional[bytes]:
|
|
"""
|
|
Get audio bytes for a word. Returns MP3 bytes or None.
|
|
Generates via TTS if not cached.
|
|
"""
|
|
path = await synthesize_word(text, language, word_id)
|
|
if path and os.path.exists(path):
|
|
with open(path, "rb") as f:
|
|
return f.read()
|
|
return None
|