Files
breakpilot-compliance/compliance-tts-service/video_generator.py
Benjamin Boenisch 375914e568
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 36s
CI / test-python-backend-compliance (push) Successful in 31s
CI / test-python-document-crawler (push) Successful in 23s
CI / test-python-dsms-gateway (push) Successful in 21s
feat(training): add Media Pipeline — TTS Audio, Presentation Video, Bulk Generation
Phase A: 8 new IT-Security training modules (SEC-PWD, SEC-DESK, SEC-KIAI,
SEC-BYOD, SEC-VIDEO, SEC-USB, SEC-INC, SEC-HOME) with CTM entries.
Bulk content and quiz generation endpoints for all 28 modules.

Phase B: Piper TTS service (Python/FastAPI) for local German speech synthesis.
training_media table, TTSClient in Go backend, audio generation endpoints,
AudioPlayer component in frontend. MinIO storage integration.

Phase C: FFmpeg presentation video pipeline — LLM generates slide scripts,
ImageMagick renders 1920x1080 slides, FFmpeg combines with audio to MP4.
VideoPlayer and ScriptPreview components in frontend.

New files: 15 created, 9 modified
- compliance-tts-service/ (Dockerfile, main.py, tts_engine.py, storage.py,
  slide_renderer.py, video_generator.py)
- migrations 014-016 (training engine, IT-security modules, media table)
- training package (models, store, content_generator, media, handlers)
- frontend (AudioPlayer, VideoPlayer, ScriptPreview, api, types, page)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 21:45:05 +01:00

128 lines
4.1 KiB
Python

"""FFmpeg video generator — combines slides + audio into presentation video."""
import logging
import os
import subprocess
import tempfile
from slide_renderer import render_slide, render_title_slide
logger = logging.getLogger(__name__)
def generate_presentation_video(
script: dict,
audio_object_key: str,
output_dir: str,
storage,
audio_bucket: str,
) -> tuple[str, float]:
"""
Generate a presentation video from a slide script and audio.
1. Download audio from MinIO
2. Get audio duration
3. Render slides as PNGs
4. Calculate timing per slide (proportional to text length)
5. Create FFmpeg concat list
6. Combine slides + audio into MP4
Returns (mp4_path, duration_seconds).
"""
title = script.get("title", "Compliance Training")
sections = script.get("sections", [])
if not sections:
raise ValueError("Script has no sections")
# Step 1: Download audio
audio_path = os.path.join(output_dir, "audio.mp3")
storage.client.download_file(audio_bucket, audio_object_key, audio_path)
# Step 2: Get audio duration
duration = _get_duration(audio_path)
# Step 3: Render slides
slides_dir = os.path.join(output_dir, "slides")
os.makedirs(slides_dir, exist_ok=True)
slide_paths = []
text_lengths = []
# Title slide
title_path = os.path.join(slides_dir, "slide_000.png")
render_title_slide(title, "Compliance Schulung", title_path)
slide_paths.append(title_path)
text_lengths.append(len(title) + 20) # Small weight for title
# Content slides
module_code = script.get("module_code", "")
total_slides = len(sections) + 1 # +1 for title
for i, section in enumerate(sections):
slide_path = os.path.join(slides_dir, f"slide_{i+1:03d}.png")
render_slide(
heading=section.get("heading", ""),
text=section.get("text", ""),
bullet_points=section.get("bullet_points", []),
slide_number=i + 2, # 1-based, title is 1
total_slides=total_slides,
module_code=module_code,
output_path=slide_path,
)
slide_paths.append(slide_path)
# Text length for timing
text_len = len(section.get("heading", "")) + len(section.get("text", ""))
text_len += sum(len(bp) for bp in section.get("bullet_points", []))
text_lengths.append(max(text_len, 50))
# Step 4: Calculate timing
total_text = sum(text_lengths)
slide_durations = [(tl / total_text) * duration for tl in text_lengths]
# Minimum 3 seconds per slide
for i in range(len(slide_durations)):
if slide_durations[i] < 3.0:
slide_durations[i] = 3.0
# Step 5: Create FFmpeg concat file
concat_path = os.path.join(output_dir, "concat.txt")
with open(concat_path, "w") as f:
for slide_path, dur in zip(slide_paths, slide_durations):
f.write(f"file '{slide_path}'\n")
f.write(f"duration {dur:.2f}\n")
# Repeat last slide for FFmpeg concat demuxer
f.write(f"file '{slide_paths[-1]}'\n")
# Step 6: Combine with FFmpeg
output_path = os.path.join(output_dir, "presentation.mp4")
cmd = [
"ffmpeg", "-y",
"-f", "concat", "-safe", "0", "-i", concat_path,
"-i", audio_path,
"-c:v", "libx264", "-pix_fmt", "yuv420p",
"-c:a", "aac", "-b:a", "128k",
"-shortest",
"-movflags", "+faststart",
output_path,
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
if result.returncode != 0:
raise RuntimeError(f"FFmpeg video generation failed: {result.stderr}")
video_duration = _get_duration(output_path)
return output_path, video_duration
def _get_duration(file_path: str) -> float:
"""Get media duration using FFprobe."""
cmd = [
"ffprobe", "-v", "error",
"-show_entries", "format=duration",
"-of", "default=noprint_wrappers=1:nokey=1",
file_path,
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
return float(result.stdout.strip())