feat(training): add Media Pipeline — TTS Audio, Presentation Video, Bulk Generation

Phase A: 8 new IT-Security training modules (SEC-PWD, SEC-DESK, SEC-KIAI, SEC-BYOD, SEC-VIDEO, SEC-USB, SEC-INC, SEC-HOME) with CTM entries. Bulk content and quiz generation endpoints for all 28 modules. Phase B: Piper TTS service (Python/FastAPI) for local German speech synthesis. training_media table, TTSClient in Go backend, audio generation endpoints, AudioPlayer component in frontend. MinIO storage integration. Phase C: FFmpeg presentation video pipeline — LLM generates slide scripts, ImageMagick renders 1920x1080 slides, FFmpeg combines with audio to MP4. VideoPlayer and ScriptPreview components in frontend. New files: 15 created, 9 modified - compliance-tts-service/ (Dockerfile, main.py, tts_engine.py, storage.py, slide_renderer.py, video_generator.py) - migrations 014-016 (training engine, IT-security modules, media table) - training package (models, store, content_generator, media, handlers) - frontend (AudioPlayer, VideoPlayer, ScriptPreview, api, types, page) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 21:42:33 +01:00
parent fba4c411dc
commit 9b8b7ca073
28 changed files with 7088 additions and 0 deletions
--- a/compliance-tts-service/video_generator.py
+++ b/compliance-tts-service/video_generator.py
@@ -0,0 +1,127 @@
+"""FFmpeg video generator — combines slides + audio into presentation video."""
+import logging
+import os
+import subprocess
+import tempfile
+
+from slide_renderer import render_slide, render_title_slide
+
+logger = logging.getLogger(__name__)
+
+
+def generate_presentation_video(
+    script: dict,
+    audio_object_key: str,
+    output_dir: str,
+    storage,
+    audio_bucket: str,
+) -> tuple[str, float]:
+    """
+    Generate a presentation video from a slide script and audio.
+
+    1. Download audio from MinIO
+    2. Get audio duration
+    3. Render slides as PNGs
+    4. Calculate timing per slide (proportional to text length)
+    5. Create FFmpeg concat list
+    6. Combine slides + audio into MP4
+
+    Returns (mp4_path, duration_seconds).
+    """
+    title = script.get("title", "Compliance Training")
+    sections = script.get("sections", [])
+
+    if not sections:
+        raise ValueError("Script has no sections")
+
+    # Step 1: Download audio
+    audio_path = os.path.join(output_dir, "audio.mp3")
+    storage.client.download_file(audio_bucket, audio_object_key, audio_path)
+
+    # Step 2: Get audio duration
+    duration = _get_duration(audio_path)
+
+    # Step 3: Render slides
+    slides_dir = os.path.join(output_dir, "slides")
+    os.makedirs(slides_dir, exist_ok=True)
+
+    slide_paths = []
+    text_lengths = []
+
+    # Title slide
+    title_path = os.path.join(slides_dir, "slide_000.png")
+    render_title_slide(title, "Compliance Schulung", title_path)
+    slide_paths.append(title_path)
+    text_lengths.append(len(title) + 20)  # Small weight for title
+
+    # Content slides
+    module_code = script.get("module_code", "")
+    total_slides = len(sections) + 1  # +1 for title
+
+    for i, section in enumerate(sections):
+        slide_path = os.path.join(slides_dir, f"slide_{i+1:03d}.png")
+        render_slide(
+            heading=section.get("heading", ""),
+            text=section.get("text", ""),
+            bullet_points=section.get("bullet_points", []),
+            slide_number=i + 2,  # 1-based, title is 1
+            total_slides=total_slides,
+            module_code=module_code,
+            output_path=slide_path,
+        )
+        slide_paths.append(slide_path)
+
+        # Text length for timing
+        text_len = len(section.get("heading", "")) + len(section.get("text", ""))
+        text_len += sum(len(bp) for bp in section.get("bullet_points", []))
+        text_lengths.append(max(text_len, 50))
+
+    # Step 4: Calculate timing
+    total_text = sum(text_lengths)
+    slide_durations = [(tl / total_text) * duration for tl in text_lengths]
+
+    # Minimum 3 seconds per slide
+    for i in range(len(slide_durations)):
+        if slide_durations[i] < 3.0:
+            slide_durations[i] = 3.0
+
+    # Step 5: Create FFmpeg concat file
+    concat_path = os.path.join(output_dir, "concat.txt")
+    with open(concat_path, "w") as f:
+        for slide_path, dur in zip(slide_paths, slide_durations):
+            f.write(f"file '{slide_path}'\n")
+            f.write(f"duration {dur:.2f}\n")
+        # Repeat last slide for FFmpeg concat demuxer
+        f.write(f"file '{slide_paths[-1]}'\n")
+
+    # Step 6: Combine with FFmpeg
+    output_path = os.path.join(output_dir, "presentation.mp4")
+    cmd = [
+        "ffmpeg", "-y",
+        "-f", "concat", "-safe", "0", "-i", concat_path,
+        "-i", audio_path,
+        "-c:v", "libx264", "-pix_fmt", "yuv420p",
+        "-c:a", "aac", "-b:a", "128k",
+        "-shortest",
+        "-movflags", "+faststart",
+        output_path,
+    ]
+
+    result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
+    if result.returncode != 0:
+        raise RuntimeError(f"FFmpeg video generation failed: {result.stderr}")
+
+    video_duration = _get_duration(output_path)
+    return output_path, video_duration
+
+
+def _get_duration(file_path: str) -> float:
+    """Get media duration using FFprobe."""
+    cmd = [
+        "ffprobe", "-v", "error",
+        "-show_entries", "format=duration",
+        "-of", "default=noprint_wrappers=1:nokey=1",
+        file_path,
+    ]
+    result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
+    return float(result.stdout.strip())