breakpilot-compliance/compliance-tts-service/main.py

"""Compliance TTS Service — Piper TTS + FFmpeg Audio/Video Pipeline."""
import logging
import os
import tempfile
import uuid

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel

from storage import StorageClient
from tts_engine import PiperTTS

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = FastAPI(title="Compliance TTS Service", version="1.0.0")

# Configuration
MINIO_ENDPOINT = os.getenv("MINIO_ENDPOINT", "bp-core-minio:9000")
MINIO_ACCESS_KEY = os.getenv("MINIO_ACCESS_KEY", "breakpilot")
MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY", "breakpilot123")
MINIO_SECURE = os.getenv("MINIO_SECURE", "false").lower() == "true"
PIPER_MODEL_PATH = os.getenv("PIPER_MODEL_PATH", "/app/models/de_DE-thorsten-high.onnx")

AUDIO_BUCKET = "compliance-training-audio"
VIDEO_BUCKET = "compliance-training-video"

# Initialize services
storage = StorageClient(MINIO_ENDPOINT, MINIO_ACCESS_KEY, MINIO_SECRET_KEY, secure=MINIO_SECURE)
tts = PiperTTS(PIPER_MODEL_PATH)


@app.on_event("startup")
async def startup():
    """Ensure buckets exist on startup."""
    storage.ensure_bucket(AUDIO_BUCKET)
    storage.ensure_bucket(VIDEO_BUCKET)
    logger.info("TTS Service started")


# --- Models ---

class SynthesizeRequest(BaseModel):
    text: str
    language: str = "de"
    voice: str = "thorsten-high"
    module_id: str
    content_id: str | None = None


class SynthesizeResponse(BaseModel):
    audio_id: str
    bucket: str
    object_key: str
    duration_seconds: float
    size_bytes: int


class GenerateVideoRequest(BaseModel):
    script: dict
    audio_object_key: str
    module_id: str


class GenerateVideoResponse(BaseModel):
    video_id: str
    bucket: str
    object_key: str
    duration_seconds: float
    size_bytes: int


class PresignedURLRequest(BaseModel):
    bucket: str
    object_key: str
    expires: int = 3600


class PresignedURLResponse(BaseModel):
    url: str
    expires_in: int


class VoiceInfo(BaseModel):
    id: str
    language: str
    name: str
    quality: str


# --- Endpoints ---

@app.get("/health")
async def health():
    """Health check endpoint."""
    return {
        "status": "healthy",
        "piper_available": tts.is_available,
        "ffmpeg_available": _check_ffmpeg(),
        "minio_connected": storage.is_connected(),
    }


@app.get("/voices")
async def list_voices():
    """List available TTS voices."""
    return {
        "voices": [
            VoiceInfo(
                id="de_DE-thorsten-high",
                language="de",
                name="Thorsten (High Quality)",
                quality="high",
            ),
        ],
    }


@app.post("/presigned-url", response_model=PresignedURLResponse)
async def get_presigned_url(req: PresignedURLRequest):
    """Generate a presigned URL for accessing a stored media file."""
    try:
        url = storage.get_presigned_url(req.bucket, req.object_key, req.expires)
        return PresignedURLResponse(url=url, expires_in=req.expires)
    except Exception as e:
        logger.error(f"Presigned URL generation failed: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@app.post("/synthesize", response_model=SynthesizeResponse)
async def synthesize(req: SynthesizeRequest):
    """Synthesize text to audio and upload to storage."""
    if not req.text.strip():
        raise HTTPException(status_code=400, detail="Text is empty")

    audio_id = str(uuid.uuid4())
    content_suffix = req.content_id or "full"
    object_key = f"audio/{req.module_id}/{content_suffix}.mp3"

    with tempfile.TemporaryDirectory() as tmpdir:
        try:
            mp3_path, duration = tts.synthesize_to_mp3(req.text, tmpdir)
            size_bytes = storage.upload_file(AUDIO_BUCKET, object_key, mp3_path, "audio/mpeg")
        except Exception as e:
            logger.error(f"Synthesis failed: {e}")
            raise HTTPException(status_code=500, detail=str(e))

    return SynthesizeResponse(
        audio_id=audio_id,
        bucket=AUDIO_BUCKET,
        object_key=object_key,
        duration_seconds=round(duration, 2),
        size_bytes=size_bytes,
    )


class SynthesizeSectionsRequest(BaseModel):
    sections: list[dict]  # [{text, heading}]
    voice: str = "de_DE-thorsten-high"
    module_id: str = ""


class SynthesizeSectionsResponse(BaseModel):
    sections: list[dict]
    total_duration: float


class GenerateInteractiveVideoRequest(BaseModel):
    script: dict
    audio: dict  # SynthesizeSectionsResponse
    module_id: str


class GenerateInteractiveVideoResponse(BaseModel):
    video_id: str
    bucket: str
    object_key: str
    duration_seconds: float
    size_bytes: int


@app.post("/synthesize-sections", response_model=SynthesizeSectionsResponse)
async def synthesize_sections(req: SynthesizeSectionsRequest):
    """Synthesize audio for multiple sections, returning per-section timing."""
    if not req.sections:
        raise HTTPException(status_code=400, detail="No sections provided")

    results = []
    cumulative = 0.0

    with tempfile.TemporaryDirectory() as tmpdir:
        for i, section in enumerate(req.sections):
            text = section.get("text", "")
            heading = section.get("heading", f"Section {i+1}")

            if not text.strip():
                results.append({
                    "heading": heading,
                    "audio_path": "",
                    "audio_object_key": "",
                    "duration": 0.0,
                    "start_timestamp": cumulative,
                })
                continue

            try:
                mp3_path, duration = tts.synthesize_to_mp3(text, tmpdir, suffix=f"_section_{i}")
                object_key = f"audio/{req.module_id}/section_{i}.mp3"
                storage.upload_file(AUDIO_BUCKET, object_key, mp3_path, "audio/mpeg")

                results.append({
                    "heading": heading,
                    "audio_path": mp3_path,
                    "audio_object_key": object_key,
                    "duration": round(duration, 2),
                    "start_timestamp": round(cumulative, 2),
                })
                cumulative += duration
            except Exception as e:
                logger.error(f"Section {i} synthesis failed: {e}")
                raise HTTPException(status_code=500, detail=f"Section {i} synthesis failed: {e}")

    return SynthesizeSectionsResponse(
        sections=results,
        total_duration=round(cumulative, 2),
    )


@app.post("/generate-interactive-video", response_model=GenerateInteractiveVideoResponse)
async def generate_interactive_video(req: GenerateInteractiveVideoRequest):
    """Generate an interactive presentation video with checkpoint slides."""
    try:
        from video_generator import generate_interactive_presentation_video
    except ImportError:
        raise HTTPException(status_code=501, detail="Interactive video generation not available")

    video_id = str(uuid.uuid4())
    object_key = f"video/{req.module_id}/interactive.mp4"

    with tempfile.TemporaryDirectory() as tmpdir:
        try:
            mp4_path, duration = generate_interactive_presentation_video(
                script=req.script,
                audio_sections=req.audio.get("sections", []),
                output_dir=tmpdir,
                storage=storage,
                audio_bucket=AUDIO_BUCKET,
            )
            size_bytes = storage.upload_file(VIDEO_BUCKET, object_key, mp4_path, "video/mp4")
        except Exception as e:
            logger.error(f"Interactive video generation failed: {e}")
            raise HTTPException(status_code=500, detail=str(e))

    return GenerateInteractiveVideoResponse(
        video_id=video_id,
        bucket=VIDEO_BUCKET,
        object_key=object_key,
        duration_seconds=round(duration, 2),
        size_bytes=size_bytes,
    )


@app.post("/generate-video", response_model=GenerateVideoResponse)
async def generate_video(req: GenerateVideoRequest):
    """Generate a presentation video from slides + audio."""
    try:
        from video_generator import generate_presentation_video
    except ImportError:
        raise HTTPException(status_code=501, detail="Video generation not available yet")

    video_id = str(uuid.uuid4())
    object_key = f"video/{req.module_id}/presentation.mp4"

    with tempfile.TemporaryDirectory() as tmpdir:
        try:
            mp4_path, duration = generate_presentation_video(
                script=req.script,
                audio_object_key=req.audio_object_key,
                output_dir=tmpdir,
                storage=storage,
                audio_bucket=AUDIO_BUCKET,
            )
            size_bytes = storage.upload_file(VIDEO_BUCKET, object_key, mp4_path, "video/mp4")
        except Exception as e:
            logger.error(f"Video generation failed: {e}")
            raise HTTPException(status_code=500, detail=str(e))

    return GenerateVideoResponse(
        video_id=video_id,
        bucket=VIDEO_BUCKET,
        object_key=object_key,
        duration_seconds=round(duration, 2),
        size_bytes=size_bytes,
    )


def _check_ffmpeg() -> bool:
    """Check if ffmpeg is available."""
    import subprocess
    try:
        subprocess.run(["ffmpeg", "-version"], capture_output=True, timeout=5)
        return True
    except Exception:
        return False