feat(tts): add /synthesize-direct endpoint for real-time audio streaming
- Returns MP3 audio directly in response body (no MinIO upload) - Disk cache (/tmp/tts-cache) avoids re-synthesis of identical text - Used by pitch-deck presenter for real-time TTS playback Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,10 +1,12 @@
|
|||||||
"""Compliance TTS Service — Piper TTS + FFmpeg Audio/Video Pipeline."""
|
"""Compliance TTS Service — Piper TTS + FFmpeg Audio/Video Pipeline."""
|
||||||
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from fastapi import FastAPI, HTTPException
|
from fastapi import FastAPI, HTTPException
|
||||||
|
from fastapi.responses import FileResponse, Response
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from storage import StorageClient
|
from storage import StorageClient
|
||||||
@@ -116,6 +118,56 @@ async def list_voices():
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class SynthesizeDirectRequest(BaseModel):
|
||||||
|
text: str
|
||||||
|
language: str = "de"
|
||||||
|
|
||||||
|
|
||||||
|
# Simple disk cache for synthesized audio (avoids re-synthesis of same text)
|
||||||
|
TTS_CACHE_DIR = "/tmp/tts-cache"
|
||||||
|
os.makedirs(TTS_CACHE_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/synthesize-direct")
|
||||||
|
async def synthesize_direct(req: SynthesizeDirectRequest):
|
||||||
|
"""Synthesize text and return MP3 audio directly (no MinIO upload).
|
||||||
|
|
||||||
|
Used by the pitch-deck presenter for real-time TTS playback.
|
||||||
|
Includes disk caching so identical text is only synthesized once.
|
||||||
|
"""
|
||||||
|
if not req.text.strip():
|
||||||
|
raise HTTPException(status_code=400, detail="Text is empty")
|
||||||
|
|
||||||
|
# Cache key based on text hash
|
||||||
|
text_hash = hashlib.sha256(req.text.encode()).hexdigest()[:16]
|
||||||
|
cache_path = os.path.join(TTS_CACHE_DIR, f"{text_hash}.mp3")
|
||||||
|
|
||||||
|
if os.path.exists(cache_path):
|
||||||
|
logger.info(f"TTS cache hit: {text_hash}")
|
||||||
|
return FileResponse(
|
||||||
|
cache_path,
|
||||||
|
media_type="audio/mpeg",
|
||||||
|
headers={"X-TTS-Cache": "hit"},
|
||||||
|
)
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
try:
|
||||||
|
mp3_path, duration = tts.synthesize_to_mp3(req.text, tmpdir)
|
||||||
|
# Copy to cache
|
||||||
|
import shutil
|
||||||
|
shutil.copy2(mp3_path, cache_path)
|
||||||
|
logger.info(f"TTS synthesized: {len(req.text)} chars, {duration:.1f}s, cached as {text_hash}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Direct synthesis failed: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
return FileResponse(
|
||||||
|
cache_path,
|
||||||
|
media_type="audio/mpeg",
|
||||||
|
headers={"X-TTS-Cache": "miss", "X-TTS-Duration": str(round(duration, 2))},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.post("/presigned-url", response_model=PresignedURLResponse)
|
@app.post("/presigned-url", response_model=PresignedURLResponse)
|
||||||
async def get_presigned_url(req: PresignedURLRequest):
|
async def get_presigned_url(req: PresignedURLRequest):
|
||||||
"""Generate a presigned URL for accessing a stored media file."""
|
"""Generate a presigned URL for accessing a stored media file."""
|
||||||
|
|||||||
Reference in New Issue
Block a user