feat(tts): add /synthesize-direct endpoint for real-time audio streaming

- Returns MP3 audio directly in response body (no MinIO upload) - Disk cache (/tmp/tts-cache) avoids re-synthesis of identical text - Used by pitch-deck presenter for real-time TTS playback Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-20 12:25:25 +01:00
parent 95c371e9a5
commit 5ea31a3236
1 changed files with 52 additions and 0 deletions
@@ -1,10 +1,12 @@
 """Compliance TTS Service — Piper TTS + FFmpeg Audio/Video Pipeline."""
+import hashlib
 import logging
 import os
 import tempfile
 import uuid

 from fastapi import FastAPI, HTTPException
+from fastapi.responses import FileResponse, Response
 from pydantic import BaseModel

 from storage import StorageClient
@@ -116,6 +118,56 @@ async def list_voices():
    }


+class SynthesizeDirectRequest(BaseModel):
+    text: str
+    language: str = "de"
+
+
+# Simple disk cache for synthesized audio (avoids re-synthesis of same text)
+TTS_CACHE_DIR = "/tmp/tts-cache"
+os.makedirs(TTS_CACHE_DIR, exist_ok=True)
+
+
+@app.post("/synthesize-direct")
+async def synthesize_direct(req: SynthesizeDirectRequest):
+    """Synthesize text and return MP3 audio directly (no MinIO upload).
+
+    Used by the pitch-deck presenter for real-time TTS playback.
+    Includes disk caching so identical text is only synthesized once.
+    """
+    if not req.text.strip():
+        raise HTTPException(status_code=400, detail="Text is empty")
+
+    # Cache key based on text hash
+    text_hash = hashlib.sha256(req.text.encode()).hexdigest()[:16]
+    cache_path = os.path.join(TTS_CACHE_DIR, f"{text_hash}.mp3")
+
+    if os.path.exists(cache_path):
+        logger.info(f"TTS cache hit: {text_hash}")
+        return FileResponse(
+            cache_path,
+            media_type="audio/mpeg",
+            headers={"X-TTS-Cache": "hit"},
+        )
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        try:
+            mp3_path, duration = tts.synthesize_to_mp3(req.text, tmpdir)
+            # Copy to cache
+            import shutil
+            shutil.copy2(mp3_path, cache_path)
+            logger.info(f"TTS synthesized: {len(req.text)} chars, {duration:.1f}s, cached as {text_hash}")
+        except Exception as e:
+            logger.error(f"Direct synthesis failed: {e}")
+            raise HTTPException(status_code=500, detail=str(e))
+
+    return FileResponse(
+        cache_path,
+        media_type="audio/mpeg",
+        headers={"X-TTS-Cache": "miss", "X-TTS-Duration": str(round(duration, 2))},
+    )
+
+
@app.post("/presigned-url", response_model=PresignedURLResponse)
 async def get_presigned_url(req: PresignedURLRequest):
    """Generate a presigned URL for accessing a stored media file."""