Add Piper TTS audio integration for vocabulary words
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 32s
CI / test-python-klausur (push) Failing after 2m24s
CI / test-python-agent-core (push) Successful in 24s
CI / test-nodejs-website (push) Successful in 26s

audio_service.py: Connects to compliance-tts-service (Piper TTS,
MIT license) for high-quality German (Thorsten) and English (Lessac)
voices. Audio cached as MP3 on first request.

vocabulary_api.py: New endpoints:
- GET /vocabulary/word/{id}/audio/{lang} — word pronunciation
- GET /vocabulary/word/{id}/audio-syllables/{lang} — slow syllable-by-syllable

Anton App analysis: identified 5 features to adopt (star system,
games as rewards, progress bars, listening exercises, matching exercises).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-25 15:40:01 +02:00
parent 0ff5399a62
commit dc60233262
2 changed files with 187 additions and 0 deletions

View File

@@ -0,0 +1,125 @@
"""
Audio Service — Generates TTS audio for vocabulary words.
Uses the Piper TTS service (compliance-tts-service, MIT license)
for high-quality German (Thorsten) and English (Lessac) voices.
Falls back to a placeholder response if TTS service is unavailable.
Audio files are cached — generated once, served forever.
"""
import hashlib
import logging
import os
from typing import Optional
import httpx
logger = logging.getLogger(__name__)
# Piper TTS service (runs in compliance stack)
TTS_SERVICE_URL = os.getenv("TTS_SERVICE_URL", "http://bp-compliance-tts:8095")
# Local cache directory for generated audio
AUDIO_CACHE_DIR = os.path.expanduser("~/Arbeitsblaetter/audio-cache")
def _ensure_cache_dir():
os.makedirs(AUDIO_CACHE_DIR, exist_ok=True)
def _cache_key(text: str, language: str) -> str:
"""Generate a deterministic cache key for text + language."""
h = hashlib.sha256(f"{language}:{text}".encode()).hexdigest()[:16]
return f"{language}_{h}"
def _cache_path(text: str, language: str) -> str:
"""Full path to cached MP3 file."""
_ensure_cache_dir()
return os.path.join(AUDIO_CACHE_DIR, f"{_cache_key(text, language)}.mp3")
async def synthesize_word(
text: str,
language: str = "de",
word_id: str = "",
) -> Optional[str]:
"""
Generate TTS audio for a word or short phrase.
Returns the file path to the cached MP3, or None on error.
Uses Piper TTS service (compliance-tts-service).
"""
# Check cache first
cached = _cache_path(text, language)
if os.path.exists(cached):
return cached
# Call Piper TTS service
try:
async with httpx.AsyncClient(timeout=30.0) as client:
resp = await client.post(
f"{TTS_SERVICE_URL}/synthesize",
json={
"text": text,
"language": language,
"voice": "thorsten-high" if language == "de" else "lessac-high",
"module_id": "vocabulary",
"content_id": word_id or _cache_key(text, language),
},
)
if resp.status_code != 200:
logger.warning(f"TTS service returned {resp.status_code} for '{text}'")
return None
data = resp.json()
audio_url = data.get("audio_url") or data.get("presigned_url")
if audio_url:
# Download the audio file
audio_resp = await client.get(audio_url)
if audio_resp.status_code == 200:
with open(cached, "wb") as f:
f.write(audio_resp.content)
logger.info(f"TTS cached: '{text}' ({language}) → {cached}")
return cached
except Exception as e:
logger.warning(f"TTS service unavailable: {e}")
# Fallback: try direct MP3 endpoint
try:
async with httpx.AsyncClient(timeout=30.0) as client:
resp = await client.post(
f"{TTS_SERVICE_URL}/synthesize/mp3",
json={
"text": text,
"language": language,
"voice": "thorsten-high" if language == "de" else "lessac-high",
"module_id": "vocabulary",
},
)
if resp.status_code == 200 and resp.headers.get("content-type", "").startswith("audio"):
with open(cached, "wb") as f:
f.write(resp.content)
logger.info(f"TTS cached (direct): '{text}' ({language}) → {cached}")
return cached
except Exception as e:
logger.debug(f"TTS direct fallback also failed: {e}")
return None
async def get_or_generate_audio(
text: str, language: str = "de", word_id: str = "",
) -> Optional[bytes]:
"""
Get audio bytes for a word. Returns MP3 bytes or None.
Generates via TTS if not cached.
"""
path = await synthesize_word(text, language, word_id)
if path and os.path.exists(path):
with open(path, "rb") as f:
return f.read()
return None

View File

@@ -99,6 +99,68 @@ async def api_get_filters():
}
# ---------------------------------------------------------------------------
# Audio TTS for Words
# ---------------------------------------------------------------------------
@router.get("/word/{word_id}/audio/{lang}")
async def api_get_word_audio(word_id: str, lang: str = "en"):
"""Get or generate TTS audio for a vocabulary word.
Returns MP3 audio. Generated on first request, cached after.
Uses Piper TTS (MIT license) with Thorsten (DE) and Lessac (EN) voices.
"""
from fastapi.responses import Response as FastAPIResponse
word = await get_word(word_id)
if not word:
raise HTTPException(status_code=404, detail="Wort nicht gefunden")
text = word.english if lang == "en" else word.german
if not text:
raise HTTPException(status_code=400, detail=f"Kein Text fuer Sprache '{lang}'")
from audio_service import get_or_generate_audio
audio_bytes = await get_or_generate_audio(text, language=lang, word_id=word_id)
if not audio_bytes:
raise HTTPException(status_code=503, detail="TTS Service nicht verfuegbar")
return FastAPIResponse(content=audio_bytes, media_type="audio/mpeg")
@router.get("/word/{word_id}/audio-syllables/{lang}")
async def api_get_syllable_audio(word_id: str, lang: str = "en"):
"""Get TTS audio with slow syllable pronunciation.
Generates audio like "ap ... ple" with pauses between syllables.
"""
from fastapi.responses import Response as FastAPIResponse
word = await get_word(word_id)
if not word:
raise HTTPException(status_code=404, detail="Wort nicht gefunden")
syllables = word.syllables_en if lang == "en" else word.syllables_de
if not syllables:
# Fallback to full word
text = word.english if lang == "en" else word.german
syllables = [text]
# Join syllables with pauses (Piper handles "..." as pause)
slow_text = " ... ".join(syllables)
from audio_service import get_or_generate_audio
cache_key = f"{word_id}_syl_{lang}"
audio_bytes = await get_or_generate_audio(slow_text, language=lang, word_id=cache_key)
if not audio_bytes:
raise HTTPException(status_code=503, detail="TTS Service nicht verfuegbar")
return FastAPIResponse(content=audio_bytes, media_type="audio/mpeg")
# ---------------------------------------------------------------------------
# Learning Unit Creation from Word Selection
# ---------------------------------------------------------------------------