Add Piper TTS audio integration for vocabulary words

audio_service.py: Connects to compliance-tts-service (Piper TTS, MIT license) for high-quality German (Thorsten) and English (Lessac) voices. Audio cached as MP3 on first request. vocabulary_api.py: New endpoints: - GET /vocabulary/word/{id}/audio/{lang} — word pronunciation - GET /vocabulary/word/{id}/audio-syllables/{lang} — slow syllable-by-syllable Anton App analysis: identified 5 features to adopt (star system, games as rewards, progress bars, listening exercises, matching exercises). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-25 15:40:01 +02:00
parent 0ff5399a62
commit dc60233262
2 changed files with 187 additions and 0 deletions
@@ -0,0 +1,125 @@
+"""
+Audio Service — Generates TTS audio for vocabulary words.
+
+Uses the Piper TTS service (compliance-tts-service, MIT license)
+for high-quality German (Thorsten) and English (Lessac) voices.
+Falls back to a placeholder response if TTS service is unavailable.
+
+Audio files are cached — generated once, served forever.
+"""
+
+import hashlib
+import logging
+import os
+from typing import Optional
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+# Piper TTS service (runs in compliance stack)
+TTS_SERVICE_URL = os.getenv("TTS_SERVICE_URL", "http://bp-compliance-tts:8095")
+
+# Local cache directory for generated audio
+AUDIO_CACHE_DIR = os.path.expanduser("~/Arbeitsblaetter/audio-cache")
+
+
+def _ensure_cache_dir():
+    os.makedirs(AUDIO_CACHE_DIR, exist_ok=True)
+
+
+def _cache_key(text: str, language: str) -> str:
+    """Generate a deterministic cache key for text + language."""
+    h = hashlib.sha256(f"{language}:{text}".encode()).hexdigest()[:16]
+    return f"{language}_{h}"
+
+
+def _cache_path(text: str, language: str) -> str:
+    """Full path to cached MP3 file."""
+    _ensure_cache_dir()
+    return os.path.join(AUDIO_CACHE_DIR, f"{_cache_key(text, language)}.mp3")
+
+
+async def synthesize_word(
+    text: str,
+    language: str = "de",
+    word_id: str = "",
+) -> Optional[str]:
+    """
+    Generate TTS audio for a word or short phrase.
+
+    Returns the file path to the cached MP3, or None on error.
+    Uses Piper TTS service (compliance-tts-service).
+    """
+    # Check cache first
+    cached = _cache_path(text, language)
+    if os.path.exists(cached):
+        return cached
+
+    # Call Piper TTS service
+    try:
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            resp = await client.post(
+                f"{TTS_SERVICE_URL}/synthesize",
+                json={
+                    "text": text,
+                    "language": language,
+                    "voice": "thorsten-high" if language == "de" else "lessac-high",
+                    "module_id": "vocabulary",
+                    "content_id": word_id or _cache_key(text, language),
+                },
+            )
+            if resp.status_code != 200:
+                logger.warning(f"TTS service returned {resp.status_code} for '{text}'")
+                return None
+
+            data = resp.json()
+            audio_url = data.get("audio_url") or data.get("presigned_url")
+
+            if audio_url:
+                # Download the audio file
+                audio_resp = await client.get(audio_url)
+                if audio_resp.status_code == 200:
+                    with open(cached, "wb") as f:
+                        f.write(audio_resp.content)
+                    logger.info(f"TTS cached: '{text}' ({language}) → {cached}")
+                    return cached
+
+    except Exception as e:
+        logger.warning(f"TTS service unavailable: {e}")
+
+    # Fallback: try direct MP3 endpoint
+    try:
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            resp = await client.post(
+                f"{TTS_SERVICE_URL}/synthesize/mp3",
+                json={
+                    "text": text,
+                    "language": language,
+                    "voice": "thorsten-high" if language == "de" else "lessac-high",
+                    "module_id": "vocabulary",
+                },
+            )
+            if resp.status_code == 200 and resp.headers.get("content-type", "").startswith("audio"):
+                with open(cached, "wb") as f:
+                    f.write(resp.content)
+                logger.info(f"TTS cached (direct): '{text}' ({language}) → {cached}")
+                return cached
+    except Exception as e:
+        logger.debug(f"TTS direct fallback also failed: {e}")
+
+    return None
+
+
+async def get_or_generate_audio(
+    text: str, language: str = "de", word_id: str = "",
+) -> Optional[bytes]:
+    """
+    Get audio bytes for a word. Returns MP3 bytes or None.
+    Generates via TTS if not cached.
+    """
+    path = await synthesize_word(text, language, word_id)
+    if path and os.path.exists(path):
+        with open(path, "rb") as f:
+            return f.read()
+    return None
@@ -99,6 +99,68 @@ async def api_get_filters():
    }


+# ---------------------------------------------------------------------------
+# Audio TTS for Words
+# ---------------------------------------------------------------------------
+
+
+@router.get("/word/{word_id}/audio/{lang}")
+async def api_get_word_audio(word_id: str, lang: str = "en"):
+    """Get or generate TTS audio for a vocabulary word.
+
+    Returns MP3 audio. Generated on first request, cached after.
+    Uses Piper TTS (MIT license) with Thorsten (DE) and Lessac (EN) voices.
+    """
+    from fastapi.responses import Response as FastAPIResponse
+
+    word = await get_word(word_id)
+    if not word:
+        raise HTTPException(status_code=404, detail="Wort nicht gefunden")
+
+    text = word.english if lang == "en" else word.german
+    if not text:
+        raise HTTPException(status_code=400, detail=f"Kein Text fuer Sprache '{lang}'")
+
+    from audio_service import get_or_generate_audio
+    audio_bytes = await get_or_generate_audio(text, language=lang, word_id=word_id)
+
+    if not audio_bytes:
+        raise HTTPException(status_code=503, detail="TTS Service nicht verfuegbar")
+
+    return FastAPIResponse(content=audio_bytes, media_type="audio/mpeg")
+
+
+@router.get("/word/{word_id}/audio-syllables/{lang}")
+async def api_get_syllable_audio(word_id: str, lang: str = "en"):
+    """Get TTS audio with slow syllable pronunciation.
+
+    Generates audio like "ap ... ple" with pauses between syllables.
+    """
+    from fastapi.responses import Response as FastAPIResponse
+
+    word = await get_word(word_id)
+    if not word:
+        raise HTTPException(status_code=404, detail="Wort nicht gefunden")
+
+    syllables = word.syllables_en if lang == "en" else word.syllables_de
+    if not syllables:
+        # Fallback to full word
+        text = word.english if lang == "en" else word.german
+        syllables = [text]
+
+    # Join syllables with pauses (Piper handles "..." as pause)
+    slow_text = " ... ".join(syllables)
+
+    from audio_service import get_or_generate_audio
+    cache_key = f"{word_id}_syl_{lang}"
+    audio_bytes = await get_or_generate_audio(slow_text, language=lang, word_id=cache_key)
+
+    if not audio_bytes:
+        raise HTTPException(status_code=503, detail="TTS Service nicht verfuegbar")
+
+    return FastAPIResponse(content=audio_bytes, media_type="audio/mpeg")
+
+
 # ---------------------------------------------------------------------------
 # Learning Unit Creation from Word Selection
 # ---------------------------------------------------------------------------