Fix: Use /synthesize-direct for correct language selection

/synthesize always used the German model. /synthesize-direct uses Edge TTS (with language-aware voice selection) and falls back to Piper with the correct model (Thorsten DE / Lessac EN). Also cleared audio cache to purge wrongly-generated files. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-26 23:46:40 +02:00
parent 6b3bff48f0
commit 198a0b2a0d
1 changed files with 2 additions and 44 deletions
@@ -56,56 +56,14 @@ async def synthesize_word(
    if os.path.exists(cached):
        return cached
-    # Call Piper TTS service
+    # Call Piper TTS service via /synthesize-direct (returns MP3, selects language correctly)
    try:
        async with httpx.AsyncClient(timeout=30.0) as client:
            resp = await client.post(
-                f"{TTS_SERVICE_URL}/synthesize",
+                f"{TTS_SERVICE_URL}/synthesize-direct",
                json={
                    "text": text,
                    "language": language,
                    "voice": "thorsten-high" if language == "de" else "lessac-high",
                    "module_id": "vocabulary",
                    "content_id": word_id or _cache_key(text, language),
                },
            )
            if resp.status_code != 200:
                logger.warning(f"TTS service returned {resp.status_code} for '{text}'")
                return None
            data = resp.json()
            bucket = data.get("bucket")
            object_key = data.get("object_key")
            if bucket and object_key:
                # Get presigned URL to download the audio
                url_resp = await client.post(
                    f"{TTS_SERVICE_URL}/presigned-url",
                    json={"bucket": bucket, "object_key": object_key, "expires": 300},
                )
                if url_resp.status_code == 200:
                    audio_url = url_resp.json().get("url")
                    if audio_url:
                        audio_resp = await client.get(audio_url)
                        if audio_resp.status_code == 200:
                            with open(cached, "wb") as f:
                                f.write(audio_resp.content)
                            logger.info(f"TTS cached: '{text}' ({language}) → {cached}")
                            return cached
    except Exception as e:
        logger.warning(f"TTS service unavailable: {e}")
    # Fallback: try direct MP3 endpoint
    try:
        async with httpx.AsyncClient(timeout=30.0) as client:
            resp = await client.post(
                f"{TTS_SERVICE_URL}/synthesize/mp3",
                json={
                    "text": text,
                    "language": language,
                    "voice": "thorsten-high" if language == "de" else "lessac-high",
                    "module_id": "vocabulary",
                },
            )
            if resp.status_code == 200 and resp.headers.get("content-type", "").startswith("audio"):