"""FFmpeg video generator — combines slides + audio into presentation video.""" import logging import os import subprocess import tempfile from slide_renderer import render_slide, render_title_slide logger = logging.getLogger(__name__) def generate_presentation_video( script: dict, audio_object_key: str, output_dir: str, storage, audio_bucket: str, ) -> tuple[str, float]: """ Generate a presentation video from a slide script and audio. 1. Download audio from MinIO 2. Get audio duration 3. Render slides as PNGs 4. Calculate timing per slide (proportional to text length) 5. Create FFmpeg concat list 6. Combine slides + audio into MP4 Returns (mp4_path, duration_seconds). """ title = script.get("title", "Compliance Training") sections = script.get("sections", []) if not sections: raise ValueError("Script has no sections") # Step 1: Download audio audio_path = os.path.join(output_dir, "audio.mp3") storage.client.download_file(audio_bucket, audio_object_key, audio_path) # Step 2: Get audio duration duration = _get_duration(audio_path) # Step 3: Render slides slides_dir = os.path.join(output_dir, "slides") os.makedirs(slides_dir, exist_ok=True) slide_paths = [] text_lengths = [] # Title slide title_path = os.path.join(slides_dir, "slide_000.png") render_title_slide(title, "Compliance Schulung", title_path) slide_paths.append(title_path) text_lengths.append(len(title) + 20) # Small weight for title # Content slides module_code = script.get("module_code", "") total_slides = len(sections) + 1 # +1 for title for i, section in enumerate(sections): slide_path = os.path.join(slides_dir, f"slide_{i+1:03d}.png") render_slide( heading=section.get("heading", ""), text=section.get("text", ""), bullet_points=section.get("bullet_points", []), slide_number=i + 2, # 1-based, title is 1 total_slides=total_slides, module_code=module_code, output_path=slide_path, ) slide_paths.append(slide_path) # Text length for timing text_len = len(section.get("heading", "")) + len(section.get("text", "")) text_len += sum(len(bp) for bp in section.get("bullet_points", [])) text_lengths.append(max(text_len, 50)) # Step 4: Calculate timing total_text = sum(text_lengths) slide_durations = [(tl / total_text) * duration for tl in text_lengths] # Minimum 3 seconds per slide for i in range(len(slide_durations)): if slide_durations[i] < 3.0: slide_durations[i] = 3.0 # Step 5: Create FFmpeg concat file concat_path = os.path.join(output_dir, "concat.txt") with open(concat_path, "w") as f: for slide_path, dur in zip(slide_paths, slide_durations): f.write(f"file '{slide_path}'\n") f.write(f"duration {dur:.2f}\n") # Repeat last slide for FFmpeg concat demuxer f.write(f"file '{slide_paths[-1]}'\n") # Step 6: Combine with FFmpeg output_path = os.path.join(output_dir, "presentation.mp4") cmd = [ "ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", concat_path, "-i", audio_path, "-c:v", "libx264", "-pix_fmt", "yuv420p", "-c:a", "aac", "-b:a", "128k", "-shortest", "-movflags", "+faststart", output_path, ] result = subprocess.run(cmd, capture_output=True, text=True, timeout=600) if result.returncode != 0: raise RuntimeError(f"FFmpeg video generation failed: {result.stderr}") video_duration = _get_duration(output_path) return output_path, video_duration def _get_duration(file_path: str) -> float: """Get media duration using FFprobe.""" cmd = [ "ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", file_path, ] result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) return float(result.stdout.strip())