breakpilot-compliance/compliance-tts-service/video_generator.py

"""FFmpeg video generator — combines slides + audio into presentation video."""
import logging
import os
import subprocess
import tempfile

from slide_renderer import render_slide, render_title_slide

logger = logging.getLogger(__name__)


def generate_presentation_video(
    script: dict,
    audio_object_key: str,
    output_dir: str,
    storage,
    audio_bucket: str,
) -> tuple[str, float]:
    """
    Generate a presentation video from a slide script and audio.

    1. Download audio from MinIO
    2. Get audio duration
    3. Render slides as PNGs
    4. Calculate timing per slide (proportional to text length)
    5. Create FFmpeg concat list
    6. Combine slides + audio into MP4

    Returns (mp4_path, duration_seconds).
    """
    title = script.get("title", "Compliance Training")
    sections = script.get("sections", [])

    if not sections:
        raise ValueError("Script has no sections")

    # Step 1: Download audio
    audio_path = os.path.join(output_dir, "audio.mp3")
    storage.client.download_file(audio_bucket, audio_object_key, audio_path)

    # Step 2: Get audio duration
    duration = _get_duration(audio_path)

    # Step 3: Render slides
    slides_dir = os.path.join(output_dir, "slides")
    os.makedirs(slides_dir, exist_ok=True)

    slide_paths = []
    text_lengths = []

    # Title slide
    title_path = os.path.join(slides_dir, "slide_000.png")
    render_title_slide(title, "Compliance Schulung", title_path)
    slide_paths.append(title_path)
    text_lengths.append(len(title) + 20)  # Small weight for title

    # Content slides
    module_code = script.get("module_code", "")
    total_slides = len(sections) + 1  # +1 for title

    for i, section in enumerate(sections):
        slide_path = os.path.join(slides_dir, f"slide_{i+1:03d}.png")
        render_slide(
            heading=section.get("heading", ""),
            text=section.get("text", ""),
            bullet_points=section.get("bullet_points", []),
            slide_number=i + 2,  # 1-based, title is 1
            total_slides=total_slides,
            module_code=module_code,
            output_path=slide_path,
        )
        slide_paths.append(slide_path)

        # Text length for timing
        text_len = len(section.get("heading", "")) + len(section.get("text", ""))
        text_len += sum(len(bp) for bp in section.get("bullet_points", []))
        text_lengths.append(max(text_len, 50))

    # Step 4: Calculate timing
    total_text = sum(text_lengths)
    slide_durations = [(tl / total_text) * duration for tl in text_lengths]

    # Minimum 3 seconds per slide
    for i in range(len(slide_durations)):
        if slide_durations[i] < 3.0:
            slide_durations[i] = 3.0

    # Step 5: Create FFmpeg concat file
    concat_path = os.path.join(output_dir, "concat.txt")
    with open(concat_path, "w") as f:
        for slide_path, dur in zip(slide_paths, slide_durations):
            f.write(f"file '{slide_path}'\n")
            f.write(f"duration {dur:.2f}\n")
        # Repeat last slide for FFmpeg concat demuxer
        f.write(f"file '{slide_paths[-1]}'\n")

    # Step 6: Combine with FFmpeg
    output_path = os.path.join(output_dir, "presentation.mp4")
    cmd = [
        "ffmpeg", "-y",
        "-f", "concat", "-safe", "0", "-i", concat_path,
        "-i", audio_path,
        "-c:v", "libx264", "-pix_fmt", "yuv420p",
        "-c:a", "aac", "-b:a", "128k",
        "-shortest",
        "-movflags", "+faststart",
        output_path,
    ]

    result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
    if result.returncode != 0:
        raise RuntimeError(f"FFmpeg video generation failed: {result.stderr}")

    video_duration = _get_duration(output_path)
    return output_path, video_duration


def _get_duration(file_path: str) -> float:
    """Get media duration using FFprobe."""
    cmd = [
        "ffprobe", "-v", "error",
        "-show_entries", "format=duration",
        "-of", "default=noprint_wrappers=1:nokey=1",
        file_path,
    ]
    result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
    return float(result.stdout.strip())