feat(training): add Media Pipeline — TTS Audio, Presentation Video, Bulk Generation
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 48s
CI / test-python-backend-compliance (push) Successful in 35s
CI / test-python-document-crawler (push) Successful in 22s
CI / test-python-dsms-gateway (push) Successful in 20s
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 48s
CI / test-python-backend-compliance (push) Successful in 35s
CI / test-python-document-crawler (push) Successful in 22s
CI / test-python-dsms-gateway (push) Successful in 20s
Phase A: 8 new IT-Security training modules (SEC-PWD, SEC-DESK, SEC-KIAI, SEC-BYOD, SEC-VIDEO, SEC-USB, SEC-INC, SEC-HOME) with CTM entries. Bulk content and quiz generation endpoints for all 28 modules. Phase B: Piper TTS service (Python/FastAPI) for local German speech synthesis. training_media table, TTSClient in Go backend, audio generation endpoints, AudioPlayer component in frontend. MinIO storage integration. Phase C: FFmpeg presentation video pipeline — LLM generates slide scripts, ImageMagick renders 1920x1080 slides, FFmpeg combines with audio to MP4. VideoPlayer and ScriptPreview components in frontend. New files: 15 created, 9 modified - compliance-tts-service/ (Dockerfile, main.py, tts_engine.py, storage.py, slide_renderer.py, video_generator.py) - migrations 014-016 (training engine, IT-security modules, media table) - training package (models, store, content_generator, media, handlers) - frontend (AudioPlayer, VideoPlayer, ScriptPreview, api, types, page) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
127
compliance-tts-service/video_generator.py
Normal file
127
compliance-tts-service/video_generator.py
Normal file
@@ -0,0 +1,127 @@
|
||||
"""FFmpeg video generator — combines slides + audio into presentation video."""
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
from slide_renderer import render_slide, render_title_slide
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def generate_presentation_video(
|
||||
script: dict,
|
||||
audio_object_key: str,
|
||||
output_dir: str,
|
||||
storage,
|
||||
audio_bucket: str,
|
||||
) -> tuple[str, float]:
|
||||
"""
|
||||
Generate a presentation video from a slide script and audio.
|
||||
|
||||
1. Download audio from MinIO
|
||||
2. Get audio duration
|
||||
3. Render slides as PNGs
|
||||
4. Calculate timing per slide (proportional to text length)
|
||||
5. Create FFmpeg concat list
|
||||
6. Combine slides + audio into MP4
|
||||
|
||||
Returns (mp4_path, duration_seconds).
|
||||
"""
|
||||
title = script.get("title", "Compliance Training")
|
||||
sections = script.get("sections", [])
|
||||
|
||||
if not sections:
|
||||
raise ValueError("Script has no sections")
|
||||
|
||||
# Step 1: Download audio
|
||||
audio_path = os.path.join(output_dir, "audio.mp3")
|
||||
storage.client.download_file(audio_bucket, audio_object_key, audio_path)
|
||||
|
||||
# Step 2: Get audio duration
|
||||
duration = _get_duration(audio_path)
|
||||
|
||||
# Step 3: Render slides
|
||||
slides_dir = os.path.join(output_dir, "slides")
|
||||
os.makedirs(slides_dir, exist_ok=True)
|
||||
|
||||
slide_paths = []
|
||||
text_lengths = []
|
||||
|
||||
# Title slide
|
||||
title_path = os.path.join(slides_dir, "slide_000.png")
|
||||
render_title_slide(title, "Compliance Schulung", title_path)
|
||||
slide_paths.append(title_path)
|
||||
text_lengths.append(len(title) + 20) # Small weight for title
|
||||
|
||||
# Content slides
|
||||
module_code = script.get("module_code", "")
|
||||
total_slides = len(sections) + 1 # +1 for title
|
||||
|
||||
for i, section in enumerate(sections):
|
||||
slide_path = os.path.join(slides_dir, f"slide_{i+1:03d}.png")
|
||||
render_slide(
|
||||
heading=section.get("heading", ""),
|
||||
text=section.get("text", ""),
|
||||
bullet_points=section.get("bullet_points", []),
|
||||
slide_number=i + 2, # 1-based, title is 1
|
||||
total_slides=total_slides,
|
||||
module_code=module_code,
|
||||
output_path=slide_path,
|
||||
)
|
||||
slide_paths.append(slide_path)
|
||||
|
||||
# Text length for timing
|
||||
text_len = len(section.get("heading", "")) + len(section.get("text", ""))
|
||||
text_len += sum(len(bp) for bp in section.get("bullet_points", []))
|
||||
text_lengths.append(max(text_len, 50))
|
||||
|
||||
# Step 4: Calculate timing
|
||||
total_text = sum(text_lengths)
|
||||
slide_durations = [(tl / total_text) * duration for tl in text_lengths]
|
||||
|
||||
# Minimum 3 seconds per slide
|
||||
for i in range(len(slide_durations)):
|
||||
if slide_durations[i] < 3.0:
|
||||
slide_durations[i] = 3.0
|
||||
|
||||
# Step 5: Create FFmpeg concat file
|
||||
concat_path = os.path.join(output_dir, "concat.txt")
|
||||
with open(concat_path, "w") as f:
|
||||
for slide_path, dur in zip(slide_paths, slide_durations):
|
||||
f.write(f"file '{slide_path}'\n")
|
||||
f.write(f"duration {dur:.2f}\n")
|
||||
# Repeat last slide for FFmpeg concat demuxer
|
||||
f.write(f"file '{slide_paths[-1]}'\n")
|
||||
|
||||
# Step 6: Combine with FFmpeg
|
||||
output_path = os.path.join(output_dir, "presentation.mp4")
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-f", "concat", "-safe", "0", "-i", concat_path,
|
||||
"-i", audio_path,
|
||||
"-c:v", "libx264", "-pix_fmt", "yuv420p",
|
||||
"-c:a", "aac", "-b:a", "128k",
|
||||
"-shortest",
|
||||
"-movflags", "+faststart",
|
||||
output_path,
|
||||
]
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"FFmpeg video generation failed: {result.stderr}")
|
||||
|
||||
video_duration = _get_duration(output_path)
|
||||
return output_path, video_duration
|
||||
|
||||
|
||||
def _get_duration(file_path: str) -> float:
|
||||
"""Get media duration using FFprobe."""
|
||||
cmd = [
|
||||
"ffprobe", "-v", "error",
|
||||
"-show_entries", "format=duration",
|
||||
"-of", "default=noprint_wrappers=1:nokey=1",
|
||||
file_path,
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
|
||||
return float(result.stdout.strip())
|
||||
Reference in New Issue
Block a user