- Returns MP3 audio directly in response body (no MinIO upload) - Disk cache (/tmp/tts-cache) avoids re-synthesis of identical text - Used by pitch-deck presenter for real-time TTS playback Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
357 lines
11 KiB
Python
357 lines
11 KiB
Python
"""Compliance TTS Service — Piper TTS + FFmpeg Audio/Video Pipeline."""
|
|
import hashlib
|
|
import logging
|
|
import os
|
|
import tempfile
|
|
import uuid
|
|
|
|
from fastapi import FastAPI, HTTPException
|
|
from fastapi.responses import FileResponse, Response
|
|
from pydantic import BaseModel
|
|
|
|
from storage import StorageClient
|
|
from tts_engine import PiperTTS
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
app = FastAPI(title="Compliance TTS Service", version="1.0.0")
|
|
|
|
# Configuration
|
|
MINIO_ENDPOINT = os.getenv("MINIO_ENDPOINT", "bp-core-minio:9000")
|
|
MINIO_ACCESS_KEY = os.getenv("MINIO_ACCESS_KEY", "breakpilot")
|
|
MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY", "breakpilot123")
|
|
MINIO_SECURE = os.getenv("MINIO_SECURE", "false").lower() == "true"
|
|
PIPER_MODEL_PATH = os.getenv("PIPER_MODEL_PATH", "/app/models/de_DE-thorsten-high.onnx")
|
|
|
|
AUDIO_BUCKET = "compliance-training-audio"
|
|
VIDEO_BUCKET = "compliance-training-video"
|
|
|
|
# Initialize services
|
|
storage = StorageClient(MINIO_ENDPOINT, MINIO_ACCESS_KEY, MINIO_SECRET_KEY, secure=MINIO_SECURE)
|
|
tts = PiperTTS(PIPER_MODEL_PATH)
|
|
|
|
|
|
@app.on_event("startup")
|
|
async def startup():
|
|
"""Ensure buckets exist on startup."""
|
|
storage.ensure_bucket(AUDIO_BUCKET)
|
|
storage.ensure_bucket(VIDEO_BUCKET)
|
|
logger.info("TTS Service started")
|
|
|
|
|
|
# --- Models ---
|
|
|
|
class SynthesizeRequest(BaseModel):
|
|
text: str
|
|
language: str = "de"
|
|
voice: str = "thorsten-high"
|
|
module_id: str
|
|
content_id: str | None = None
|
|
|
|
|
|
class SynthesizeResponse(BaseModel):
|
|
audio_id: str
|
|
bucket: str
|
|
object_key: str
|
|
duration_seconds: float
|
|
size_bytes: int
|
|
|
|
|
|
class GenerateVideoRequest(BaseModel):
|
|
script: dict
|
|
audio_object_key: str
|
|
module_id: str
|
|
|
|
|
|
class GenerateVideoResponse(BaseModel):
|
|
video_id: str
|
|
bucket: str
|
|
object_key: str
|
|
duration_seconds: float
|
|
size_bytes: int
|
|
|
|
|
|
class PresignedURLRequest(BaseModel):
|
|
bucket: str
|
|
object_key: str
|
|
expires: int = 3600
|
|
|
|
|
|
class PresignedURLResponse(BaseModel):
|
|
url: str
|
|
expires_in: int
|
|
|
|
|
|
class VoiceInfo(BaseModel):
|
|
id: str
|
|
language: str
|
|
name: str
|
|
quality: str
|
|
|
|
|
|
# --- Endpoints ---
|
|
|
|
@app.get("/health")
|
|
async def health():
|
|
"""Health check endpoint."""
|
|
return {
|
|
"status": "healthy",
|
|
"piper_available": tts.is_available,
|
|
"ffmpeg_available": _check_ffmpeg(),
|
|
"minio_connected": storage.is_connected(),
|
|
}
|
|
|
|
|
|
@app.get("/voices")
|
|
async def list_voices():
|
|
"""List available TTS voices."""
|
|
return {
|
|
"voices": [
|
|
VoiceInfo(
|
|
id="de_DE-thorsten-high",
|
|
language="de",
|
|
name="Thorsten (High Quality)",
|
|
quality="high",
|
|
),
|
|
],
|
|
}
|
|
|
|
|
|
class SynthesizeDirectRequest(BaseModel):
|
|
text: str
|
|
language: str = "de"
|
|
|
|
|
|
# Simple disk cache for synthesized audio (avoids re-synthesis of same text)
|
|
TTS_CACHE_DIR = "/tmp/tts-cache"
|
|
os.makedirs(TTS_CACHE_DIR, exist_ok=True)
|
|
|
|
|
|
@app.post("/synthesize-direct")
|
|
async def synthesize_direct(req: SynthesizeDirectRequest):
|
|
"""Synthesize text and return MP3 audio directly (no MinIO upload).
|
|
|
|
Used by the pitch-deck presenter for real-time TTS playback.
|
|
Includes disk caching so identical text is only synthesized once.
|
|
"""
|
|
if not req.text.strip():
|
|
raise HTTPException(status_code=400, detail="Text is empty")
|
|
|
|
# Cache key based on text hash
|
|
text_hash = hashlib.sha256(req.text.encode()).hexdigest()[:16]
|
|
cache_path = os.path.join(TTS_CACHE_DIR, f"{text_hash}.mp3")
|
|
|
|
if os.path.exists(cache_path):
|
|
logger.info(f"TTS cache hit: {text_hash}")
|
|
return FileResponse(
|
|
cache_path,
|
|
media_type="audio/mpeg",
|
|
headers={"X-TTS-Cache": "hit"},
|
|
)
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
try:
|
|
mp3_path, duration = tts.synthesize_to_mp3(req.text, tmpdir)
|
|
# Copy to cache
|
|
import shutil
|
|
shutil.copy2(mp3_path, cache_path)
|
|
logger.info(f"TTS synthesized: {len(req.text)} chars, {duration:.1f}s, cached as {text_hash}")
|
|
except Exception as e:
|
|
logger.error(f"Direct synthesis failed: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
return FileResponse(
|
|
cache_path,
|
|
media_type="audio/mpeg",
|
|
headers={"X-TTS-Cache": "miss", "X-TTS-Duration": str(round(duration, 2))},
|
|
)
|
|
|
|
|
|
@app.post("/presigned-url", response_model=PresignedURLResponse)
|
|
async def get_presigned_url(req: PresignedURLRequest):
|
|
"""Generate a presigned URL for accessing a stored media file."""
|
|
try:
|
|
url = storage.get_presigned_url(req.bucket, req.object_key, req.expires)
|
|
return PresignedURLResponse(url=url, expires_in=req.expires)
|
|
except Exception as e:
|
|
logger.error(f"Presigned URL generation failed: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@app.post("/synthesize", response_model=SynthesizeResponse)
|
|
async def synthesize(req: SynthesizeRequest):
|
|
"""Synthesize text to audio and upload to storage."""
|
|
if not req.text.strip():
|
|
raise HTTPException(status_code=400, detail="Text is empty")
|
|
|
|
audio_id = str(uuid.uuid4())
|
|
content_suffix = req.content_id or "full"
|
|
object_key = f"audio/{req.module_id}/{content_suffix}.mp3"
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
try:
|
|
mp3_path, duration = tts.synthesize_to_mp3(req.text, tmpdir)
|
|
size_bytes = storage.upload_file(AUDIO_BUCKET, object_key, mp3_path, "audio/mpeg")
|
|
except Exception as e:
|
|
logger.error(f"Synthesis failed: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
return SynthesizeResponse(
|
|
audio_id=audio_id,
|
|
bucket=AUDIO_BUCKET,
|
|
object_key=object_key,
|
|
duration_seconds=round(duration, 2),
|
|
size_bytes=size_bytes,
|
|
)
|
|
|
|
|
|
class SynthesizeSectionsRequest(BaseModel):
|
|
sections: list[dict] # [{text, heading}]
|
|
voice: str = "de_DE-thorsten-high"
|
|
module_id: str = ""
|
|
|
|
|
|
class SynthesizeSectionsResponse(BaseModel):
|
|
sections: list[dict]
|
|
total_duration: float
|
|
|
|
|
|
class GenerateInteractiveVideoRequest(BaseModel):
|
|
script: dict
|
|
audio: dict # SynthesizeSectionsResponse
|
|
module_id: str
|
|
|
|
|
|
class GenerateInteractiveVideoResponse(BaseModel):
|
|
video_id: str
|
|
bucket: str
|
|
object_key: str
|
|
duration_seconds: float
|
|
size_bytes: int
|
|
|
|
|
|
@app.post("/synthesize-sections", response_model=SynthesizeSectionsResponse)
|
|
async def synthesize_sections(req: SynthesizeSectionsRequest):
|
|
"""Synthesize audio for multiple sections, returning per-section timing."""
|
|
if not req.sections:
|
|
raise HTTPException(status_code=400, detail="No sections provided")
|
|
|
|
results = []
|
|
cumulative = 0.0
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
for i, section in enumerate(req.sections):
|
|
text = section.get("text", "")
|
|
heading = section.get("heading", f"Section {i+1}")
|
|
|
|
if not text.strip():
|
|
results.append({
|
|
"heading": heading,
|
|
"audio_path": "",
|
|
"audio_object_key": "",
|
|
"duration": 0.0,
|
|
"start_timestamp": cumulative,
|
|
})
|
|
continue
|
|
|
|
try:
|
|
mp3_path, duration = tts.synthesize_to_mp3(text, tmpdir, suffix=f"_section_{i}")
|
|
object_key = f"audio/{req.module_id}/section_{i}.mp3"
|
|
storage.upload_file(AUDIO_BUCKET, object_key, mp3_path, "audio/mpeg")
|
|
|
|
results.append({
|
|
"heading": heading,
|
|
"audio_path": mp3_path,
|
|
"audio_object_key": object_key,
|
|
"duration": round(duration, 2),
|
|
"start_timestamp": round(cumulative, 2),
|
|
})
|
|
cumulative += duration
|
|
except Exception as e:
|
|
logger.error(f"Section {i} synthesis failed: {e}")
|
|
raise HTTPException(status_code=500, detail=f"Section {i} synthesis failed: {e}")
|
|
|
|
return SynthesizeSectionsResponse(
|
|
sections=results,
|
|
total_duration=round(cumulative, 2),
|
|
)
|
|
|
|
|
|
@app.post("/generate-interactive-video", response_model=GenerateInteractiveVideoResponse)
|
|
async def generate_interactive_video(req: GenerateInteractiveVideoRequest):
|
|
"""Generate an interactive presentation video with checkpoint slides."""
|
|
try:
|
|
from video_generator import generate_interactive_presentation_video
|
|
except ImportError:
|
|
raise HTTPException(status_code=501, detail="Interactive video generation not available")
|
|
|
|
video_id = str(uuid.uuid4())
|
|
object_key = f"video/{req.module_id}/interactive.mp4"
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
try:
|
|
mp4_path, duration = generate_interactive_presentation_video(
|
|
script=req.script,
|
|
audio_sections=req.audio.get("sections", []),
|
|
output_dir=tmpdir,
|
|
storage=storage,
|
|
audio_bucket=AUDIO_BUCKET,
|
|
)
|
|
size_bytes = storage.upload_file(VIDEO_BUCKET, object_key, mp4_path, "video/mp4")
|
|
except Exception as e:
|
|
logger.error(f"Interactive video generation failed: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
return GenerateInteractiveVideoResponse(
|
|
video_id=video_id,
|
|
bucket=VIDEO_BUCKET,
|
|
object_key=object_key,
|
|
duration_seconds=round(duration, 2),
|
|
size_bytes=size_bytes,
|
|
)
|
|
|
|
|
|
@app.post("/generate-video", response_model=GenerateVideoResponse)
|
|
async def generate_video(req: GenerateVideoRequest):
|
|
"""Generate a presentation video from slides + audio."""
|
|
try:
|
|
from video_generator import generate_presentation_video
|
|
except ImportError:
|
|
raise HTTPException(status_code=501, detail="Video generation not available yet")
|
|
|
|
video_id = str(uuid.uuid4())
|
|
object_key = f"video/{req.module_id}/presentation.mp4"
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
try:
|
|
mp4_path, duration = generate_presentation_video(
|
|
script=req.script,
|
|
audio_object_key=req.audio_object_key,
|
|
output_dir=tmpdir,
|
|
storage=storage,
|
|
audio_bucket=AUDIO_BUCKET,
|
|
)
|
|
size_bytes = storage.upload_file(VIDEO_BUCKET, object_key, mp4_path, "video/mp4")
|
|
except Exception as e:
|
|
logger.error(f"Video generation failed: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
return GenerateVideoResponse(
|
|
video_id=video_id,
|
|
bucket=VIDEO_BUCKET,
|
|
object_key=object_key,
|
|
duration_seconds=round(duration, 2),
|
|
size_bytes=size_bytes,
|
|
)
|
|
|
|
|
|
def _check_ffmpeg() -> bool:
|
|
"""Check if ffmpeg is available."""
|
|
import subprocess
|
|
try:
|
|
subprocess.run(["ffmpeg", "-version"], capture_output=True, timeout=5)
|
|
return True
|
|
except Exception:
|
|
return False
|