Add Piper TTS audio integration for vocabulary words
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 32s
CI / test-python-klausur (push) Failing after 2m24s
CI / test-python-agent-core (push) Successful in 24s
CI / test-nodejs-website (push) Successful in 26s
Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 32s
CI / test-python-klausur (push) Failing after 2m24s
CI / test-python-agent-core (push) Successful in 24s
CI / test-nodejs-website (push) Successful in 26s
audio_service.py: Connects to compliance-tts-service (Piper TTS,
MIT license) for high-quality German (Thorsten) and English (Lessac)
voices. Audio cached as MP3 on first request.
vocabulary_api.py: New endpoints:
- GET /vocabulary/word/{id}/audio/{lang} — word pronunciation
- GET /vocabulary/word/{id}/audio-syllables/{lang} — slow syllable-by-syllable
Anton App analysis: identified 5 features to adopt (star system,
games as rewards, progress bars, listening exercises, matching exercises).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
125
backend-lehrer/audio_service.py
Normal file
125
backend-lehrer/audio_service.py
Normal file
@@ -0,0 +1,125 @@
|
|||||||
|
"""
|
||||||
|
Audio Service — Generates TTS audio for vocabulary words.
|
||||||
|
|
||||||
|
Uses the Piper TTS service (compliance-tts-service, MIT license)
|
||||||
|
for high-quality German (Thorsten) and English (Lessac) voices.
|
||||||
|
Falls back to a placeholder response if TTS service is unavailable.
|
||||||
|
|
||||||
|
Audio files are cached — generated once, served forever.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Piper TTS service (runs in compliance stack)
|
||||||
|
TTS_SERVICE_URL = os.getenv("TTS_SERVICE_URL", "http://bp-compliance-tts:8095")
|
||||||
|
|
||||||
|
# Local cache directory for generated audio
|
||||||
|
AUDIO_CACHE_DIR = os.path.expanduser("~/Arbeitsblaetter/audio-cache")
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_cache_dir():
|
||||||
|
os.makedirs(AUDIO_CACHE_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
def _cache_key(text: str, language: str) -> str:
|
||||||
|
"""Generate a deterministic cache key for text + language."""
|
||||||
|
h = hashlib.sha256(f"{language}:{text}".encode()).hexdigest()[:16]
|
||||||
|
return f"{language}_{h}"
|
||||||
|
|
||||||
|
|
||||||
|
def _cache_path(text: str, language: str) -> str:
|
||||||
|
"""Full path to cached MP3 file."""
|
||||||
|
_ensure_cache_dir()
|
||||||
|
return os.path.join(AUDIO_CACHE_DIR, f"{_cache_key(text, language)}.mp3")
|
||||||
|
|
||||||
|
|
||||||
|
async def synthesize_word(
|
||||||
|
text: str,
|
||||||
|
language: str = "de",
|
||||||
|
word_id: str = "",
|
||||||
|
) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Generate TTS audio for a word or short phrase.
|
||||||
|
|
||||||
|
Returns the file path to the cached MP3, or None on error.
|
||||||
|
Uses Piper TTS service (compliance-tts-service).
|
||||||
|
"""
|
||||||
|
# Check cache first
|
||||||
|
cached = _cache_path(text, language)
|
||||||
|
if os.path.exists(cached):
|
||||||
|
return cached
|
||||||
|
|
||||||
|
# Call Piper TTS service
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||||
|
resp = await client.post(
|
||||||
|
f"{TTS_SERVICE_URL}/synthesize",
|
||||||
|
json={
|
||||||
|
"text": text,
|
||||||
|
"language": language,
|
||||||
|
"voice": "thorsten-high" if language == "de" else "lessac-high",
|
||||||
|
"module_id": "vocabulary",
|
||||||
|
"content_id": word_id or _cache_key(text, language),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
if resp.status_code != 200:
|
||||||
|
logger.warning(f"TTS service returned {resp.status_code} for '{text}'")
|
||||||
|
return None
|
||||||
|
|
||||||
|
data = resp.json()
|
||||||
|
audio_url = data.get("audio_url") or data.get("presigned_url")
|
||||||
|
|
||||||
|
if audio_url:
|
||||||
|
# Download the audio file
|
||||||
|
audio_resp = await client.get(audio_url)
|
||||||
|
if audio_resp.status_code == 200:
|
||||||
|
with open(cached, "wb") as f:
|
||||||
|
f.write(audio_resp.content)
|
||||||
|
logger.info(f"TTS cached: '{text}' ({language}) → {cached}")
|
||||||
|
return cached
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"TTS service unavailable: {e}")
|
||||||
|
|
||||||
|
# Fallback: try direct MP3 endpoint
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||||
|
resp = await client.post(
|
||||||
|
f"{TTS_SERVICE_URL}/synthesize/mp3",
|
||||||
|
json={
|
||||||
|
"text": text,
|
||||||
|
"language": language,
|
||||||
|
"voice": "thorsten-high" if language == "de" else "lessac-high",
|
||||||
|
"module_id": "vocabulary",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
if resp.status_code == 200 and resp.headers.get("content-type", "").startswith("audio"):
|
||||||
|
with open(cached, "wb") as f:
|
||||||
|
f.write(resp.content)
|
||||||
|
logger.info(f"TTS cached (direct): '{text}' ({language}) → {cached}")
|
||||||
|
return cached
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"TTS direct fallback also failed: {e}")
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def get_or_generate_audio(
|
||||||
|
text: str, language: str = "de", word_id: str = "",
|
||||||
|
) -> Optional[bytes]:
|
||||||
|
"""
|
||||||
|
Get audio bytes for a word. Returns MP3 bytes or None.
|
||||||
|
Generates via TTS if not cached.
|
||||||
|
"""
|
||||||
|
path = await synthesize_word(text, language, word_id)
|
||||||
|
if path and os.path.exists(path):
|
||||||
|
with open(path, "rb") as f:
|
||||||
|
return f.read()
|
||||||
|
return None
|
||||||
@@ -99,6 +99,68 @@ async def api_get_filters():
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Audio TTS for Words
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/word/{word_id}/audio/{lang}")
|
||||||
|
async def api_get_word_audio(word_id: str, lang: str = "en"):
|
||||||
|
"""Get or generate TTS audio for a vocabulary word.
|
||||||
|
|
||||||
|
Returns MP3 audio. Generated on first request, cached after.
|
||||||
|
Uses Piper TTS (MIT license) with Thorsten (DE) and Lessac (EN) voices.
|
||||||
|
"""
|
||||||
|
from fastapi.responses import Response as FastAPIResponse
|
||||||
|
|
||||||
|
word = await get_word(word_id)
|
||||||
|
if not word:
|
||||||
|
raise HTTPException(status_code=404, detail="Wort nicht gefunden")
|
||||||
|
|
||||||
|
text = word.english if lang == "en" else word.german
|
||||||
|
if not text:
|
||||||
|
raise HTTPException(status_code=400, detail=f"Kein Text fuer Sprache '{lang}'")
|
||||||
|
|
||||||
|
from audio_service import get_or_generate_audio
|
||||||
|
audio_bytes = await get_or_generate_audio(text, language=lang, word_id=word_id)
|
||||||
|
|
||||||
|
if not audio_bytes:
|
||||||
|
raise HTTPException(status_code=503, detail="TTS Service nicht verfuegbar")
|
||||||
|
|
||||||
|
return FastAPIResponse(content=audio_bytes, media_type="audio/mpeg")
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/word/{word_id}/audio-syllables/{lang}")
|
||||||
|
async def api_get_syllable_audio(word_id: str, lang: str = "en"):
|
||||||
|
"""Get TTS audio with slow syllable pronunciation.
|
||||||
|
|
||||||
|
Generates audio like "ap ... ple" with pauses between syllables.
|
||||||
|
"""
|
||||||
|
from fastapi.responses import Response as FastAPIResponse
|
||||||
|
|
||||||
|
word = await get_word(word_id)
|
||||||
|
if not word:
|
||||||
|
raise HTTPException(status_code=404, detail="Wort nicht gefunden")
|
||||||
|
|
||||||
|
syllables = word.syllables_en if lang == "en" else word.syllables_de
|
||||||
|
if not syllables:
|
||||||
|
# Fallback to full word
|
||||||
|
text = word.english if lang == "en" else word.german
|
||||||
|
syllables = [text]
|
||||||
|
|
||||||
|
# Join syllables with pauses (Piper handles "..." as pause)
|
||||||
|
slow_text = " ... ".join(syllables)
|
||||||
|
|
||||||
|
from audio_service import get_or_generate_audio
|
||||||
|
cache_key = f"{word_id}_syl_{lang}"
|
||||||
|
audio_bytes = await get_or_generate_audio(slow_text, language=lang, word_id=cache_key)
|
||||||
|
|
||||||
|
if not audio_bytes:
|
||||||
|
raise HTTPException(status_code=503, detail="TTS Service nicht verfuegbar")
|
||||||
|
|
||||||
|
return FastAPIResponse(content=audio_bytes, media_type="audio/mpeg")
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Learning Unit Creation from Word Selection
|
# Learning Unit Creation from Word Selection
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|||||||
Reference in New Issue
Block a user