feat(tts): add /synthesize-direct endpoint for real-time audio streaming

- Returns MP3 audio directly in response body (no MinIO upload)
- Disk cache (/tmp/tts-cache) avoids re-synthesis of identical text
- Used by pitch-deck presenter for real-time TTS playback

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-20 12:25:25 +01:00
parent 95c371e9a5
commit 5ea31a3236

View File

@@ -1,10 +1,12 @@
"""Compliance TTS Service — Piper TTS + FFmpeg Audio/Video Pipeline.""" """Compliance TTS Service — Piper TTS + FFmpeg Audio/Video Pipeline."""
import hashlib
import logging import logging
import os import os
import tempfile import tempfile
import uuid import uuid
from fastapi import FastAPI, HTTPException from fastapi import FastAPI, HTTPException
from fastapi.responses import FileResponse, Response
from pydantic import BaseModel from pydantic import BaseModel
from storage import StorageClient from storage import StorageClient
@@ -116,6 +118,56 @@ async def list_voices():
} }
class SynthesizeDirectRequest(BaseModel):
text: str
language: str = "de"
# Simple disk cache for synthesized audio (avoids re-synthesis of same text)
TTS_CACHE_DIR = "/tmp/tts-cache"
os.makedirs(TTS_CACHE_DIR, exist_ok=True)
@app.post("/synthesize-direct")
async def synthesize_direct(req: SynthesizeDirectRequest):
"""Synthesize text and return MP3 audio directly (no MinIO upload).
Used by the pitch-deck presenter for real-time TTS playback.
Includes disk caching so identical text is only synthesized once.
"""
if not req.text.strip():
raise HTTPException(status_code=400, detail="Text is empty")
# Cache key based on text hash
text_hash = hashlib.sha256(req.text.encode()).hexdigest()[:16]
cache_path = os.path.join(TTS_CACHE_DIR, f"{text_hash}.mp3")
if os.path.exists(cache_path):
logger.info(f"TTS cache hit: {text_hash}")
return FileResponse(
cache_path,
media_type="audio/mpeg",
headers={"X-TTS-Cache": "hit"},
)
with tempfile.TemporaryDirectory() as tmpdir:
try:
mp3_path, duration = tts.synthesize_to_mp3(req.text, tmpdir)
# Copy to cache
import shutil
shutil.copy2(mp3_path, cache_path)
logger.info(f"TTS synthesized: {len(req.text)} chars, {duration:.1f}s, cached as {text_hash}")
except Exception as e:
logger.error(f"Direct synthesis failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
return FileResponse(
cache_path,
media_type="audio/mpeg",
headers={"X-TTS-Cache": "miss", "X-TTS-Duration": str(round(duration, 2))},
)
@app.post("/presigned-url", response_model=PresignedURLResponse) @app.post("/presigned-url", response_model=PresignedURLResponse)
async def get_presigned_url(req: PresignedURLRequest): async def get_presigned_url(req: PresignedURLRequest):
"""Generate a presigned URL for accessing a stored media file.""" """Generate a presigned URL for accessing a stored media file."""