merge: sync with origin/main, take upstream on conflicts
# Conflicts: # admin-compliance/lib/sdk/types.ts # admin-compliance/lib/sdk/vendor-compliance/types.ts
This commit is contained in:
@@ -15,8 +15,16 @@ WORKDIR /app
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Download Piper voice model (German, thorsten, high quality)
|
||||
RUN mkdir -p /app/models && wget -q -O /app/models/de_DE-thorsten-high.onnx "https://huggingface.co/rhasspy/piper-voices/resolve/main/de/de_DE/thorsten/high/de_DE-thorsten-high.onnx" && wget -q -O /app/models/de_DE-thorsten-high.onnx.json "https://huggingface.co/rhasspy/piper-voices/resolve/main/de/de_DE/thorsten/high/de_DE-thorsten-high.onnx.json"
|
||||
# Download Piper voice models
|
||||
RUN mkdir -p /app/models && \
|
||||
wget -q -O /app/models/de_DE-thorsten-high.onnx \
|
||||
"https://huggingface.co/rhasspy/piper-voices/resolve/main/de/de_DE/thorsten/high/de_DE-thorsten-high.onnx" && \
|
||||
wget -q -O /app/models/de_DE-thorsten-high.onnx.json \
|
||||
"https://huggingface.co/rhasspy/piper-voices/resolve/main/de/de_DE/thorsten/high/de_DE-thorsten-high.onnx.json" && \
|
||||
wget -q -O /app/models/en_US-lessac-high.onnx \
|
||||
"https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/high/en_US-lessac-high.onnx" && \
|
||||
wget -q -O /app/models/en_US-lessac-high.onnx.json \
|
||||
"https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/high/en_US-lessac-high.onnx.json"
|
||||
|
||||
# Copy application
|
||||
COPY . .
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
"""Compliance TTS Service — Piper TTS + FFmpeg Audio/Video Pipeline."""
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
import uuid
|
||||
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi.responses import FileResponse, Response
|
||||
from pydantic import BaseModel
|
||||
|
||||
from storage import StorageClient
|
||||
@@ -21,6 +23,7 @@ MINIO_ACCESS_KEY = os.getenv("MINIO_ACCESS_KEY", "breakpilot")
|
||||
MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY", "breakpilot123")
|
||||
MINIO_SECURE = os.getenv("MINIO_SECURE", "false").lower() == "true"
|
||||
PIPER_MODEL_PATH = os.getenv("PIPER_MODEL_PATH", "/app/models/de_DE-thorsten-high.onnx")
|
||||
PIPER_MODEL_EN_PATH = os.getenv("PIPER_MODEL_EN_PATH", "/app/models/en_US-lessac-high.onnx")
|
||||
|
||||
AUDIO_BUCKET = "compliance-training-audio"
|
||||
VIDEO_BUCKET = "compliance-training-video"
|
||||
@@ -28,6 +31,7 @@ VIDEO_BUCKET = "compliance-training-video"
|
||||
# Initialize services
|
||||
storage = StorageClient(MINIO_ENDPOINT, MINIO_ACCESS_KEY, MINIO_SECRET_KEY, secure=MINIO_SECURE)
|
||||
tts = PiperTTS(PIPER_MODEL_PATH)
|
||||
tts_en = PiperTTS(PIPER_MODEL_EN_PATH) if os.path.exists(PIPER_MODEL_EN_PATH) else None
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
@@ -70,6 +74,17 @@ class GenerateVideoResponse(BaseModel):
|
||||
size_bytes: int
|
||||
|
||||
|
||||
class PresignedURLRequest(BaseModel):
|
||||
bucket: str
|
||||
object_key: str
|
||||
expires: int = 3600
|
||||
|
||||
|
||||
class PresignedURLResponse(BaseModel):
|
||||
url: str
|
||||
expires_in: int
|
||||
|
||||
|
||||
class VoiceInfo(BaseModel):
|
||||
id: str
|
||||
language: str
|
||||
@@ -93,16 +108,119 @@ async def health():
|
||||
@app.get("/voices")
|
||||
async def list_voices():
|
||||
"""List available TTS voices."""
|
||||
return {
|
||||
"voices": [
|
||||
VoiceInfo(
|
||||
id="de_DE-thorsten-high",
|
||||
language="de",
|
||||
name="Thorsten (High Quality)",
|
||||
quality="high",
|
||||
),
|
||||
],
|
||||
}
|
||||
voices = [
|
||||
VoiceInfo(
|
||||
id="de_DE-thorsten-high",
|
||||
language="de",
|
||||
name="Thorsten (High Quality)",
|
||||
quality="high",
|
||||
),
|
||||
]
|
||||
if tts_en is not None:
|
||||
voices.append(VoiceInfo(
|
||||
id="en_US-lessac-high",
|
||||
language="en",
|
||||
name="Lessac (High Quality)",
|
||||
quality="high",
|
||||
))
|
||||
return {"voices": voices}
|
||||
|
||||
|
||||
class SynthesizeDirectRequest(BaseModel):
|
||||
text: str
|
||||
language: str = "de"
|
||||
|
||||
|
||||
# Simple disk cache for synthesized audio (avoids re-synthesis of same text)
|
||||
TTS_CACHE_DIR = "/tmp/tts-cache"
|
||||
os.makedirs(TTS_CACHE_DIR, exist_ok=True)
|
||||
|
||||
|
||||
EDGE_TTS_VOICES = {
|
||||
"de": "de-DE-ConradNeural",
|
||||
"en": "en-US-GuyNeural",
|
||||
}
|
||||
|
||||
|
||||
async def _edge_tts_synthesize(text: str, language: str, output_path: str) -> bool:
|
||||
"""Synthesize using Edge TTS (Microsoft Neural Voices). Returns True on success."""
|
||||
try:
|
||||
import edge_tts
|
||||
voice = EDGE_TTS_VOICES.get(language, EDGE_TTS_VOICES["de"])
|
||||
communicate = edge_tts.Communicate(text, voice)
|
||||
await communicate.save(output_path)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning(f"Edge TTS failed, falling back to Piper: {e}")
|
||||
return False
|
||||
|
||||
|
||||
@app.post("/synthesize-direct")
|
||||
async def synthesize_direct(req: SynthesizeDirectRequest):
|
||||
"""Synthesize text and return MP3 audio directly (no MinIO upload).
|
||||
|
||||
Uses Edge TTS (Microsoft Neural Voices) for high-quality speech.
|
||||
Falls back to Piper TTS if Edge TTS is unavailable (e.g. no internet).
|
||||
Includes disk caching so identical text is only synthesized once.
|
||||
"""
|
||||
if not req.text.strip():
|
||||
raise HTTPException(status_code=400, detail="Text is empty")
|
||||
|
||||
# Cache key based on text + language hash
|
||||
text_hash = hashlib.sha256(f"{req.language}:{req.text}".encode()).hexdigest()[:16]
|
||||
cache_path = os.path.join(TTS_CACHE_DIR, f"{text_hash}.mp3")
|
||||
|
||||
if os.path.exists(cache_path):
|
||||
logger.info(f"TTS cache hit: {text_hash}")
|
||||
return FileResponse(
|
||||
cache_path,
|
||||
media_type="audio/mpeg",
|
||||
headers={"X-TTS-Cache": "hit"},
|
||||
)
|
||||
|
||||
# Try Edge TTS first (high quality neural voices)
|
||||
success = await _edge_tts_synthesize(req.text, req.language, cache_path)
|
||||
|
||||
if success and os.path.exists(cache_path):
|
||||
size = os.path.getsize(cache_path)
|
||||
logger.info(f"Edge TTS ({req.language}): {len(req.text)} chars, {size} bytes, cached as {text_hash}")
|
||||
return FileResponse(
|
||||
cache_path,
|
||||
media_type="audio/mpeg",
|
||||
headers={"X-TTS-Cache": "miss", "X-TTS-Engine": "edge"},
|
||||
)
|
||||
|
||||
# Fallback: Piper TTS
|
||||
engine = tts
|
||||
if req.language == "en" and tts_en is not None:
|
||||
engine = tts_en
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
try:
|
||||
mp3_path, duration = engine.synthesize_to_mp3(req.text, tmpdir)
|
||||
import shutil
|
||||
shutil.copy2(mp3_path, cache_path)
|
||||
logger.info(f"Piper TTS ({req.language}): {len(req.text)} chars, {duration:.1f}s, cached as {text_hash}")
|
||||
except Exception as e:
|
||||
logger.error(f"TTS synthesis failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
return FileResponse(
|
||||
cache_path,
|
||||
media_type="audio/mpeg",
|
||||
headers={"X-TTS-Cache": "miss", "X-TTS-Engine": "piper"},
|
||||
)
|
||||
|
||||
|
||||
@app.post("/presigned-url", response_model=PresignedURLResponse)
|
||||
async def get_presigned_url(req: PresignedURLRequest):
|
||||
"""Generate a presigned URL for accessing a stored media file."""
|
||||
try:
|
||||
url = storage.get_presigned_url(req.bucket, req.object_key, req.expires)
|
||||
return PresignedURLResponse(url=url, expires_in=req.expires)
|
||||
except Exception as e:
|
||||
logger.error(f"Presigned URL generation failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.post("/synthesize", response_model=SynthesizeResponse)
|
||||
@@ -132,6 +250,112 @@ async def synthesize(req: SynthesizeRequest):
|
||||
)
|
||||
|
||||
|
||||
class SynthesizeSectionsRequest(BaseModel):
|
||||
sections: list[dict] # [{text, heading}]
|
||||
voice: str = "de_DE-thorsten-high"
|
||||
module_id: str = ""
|
||||
|
||||
|
||||
class SynthesizeSectionsResponse(BaseModel):
|
||||
sections: list[dict]
|
||||
total_duration: float
|
||||
|
||||
|
||||
class GenerateInteractiveVideoRequest(BaseModel):
|
||||
script: dict
|
||||
audio: dict # SynthesizeSectionsResponse
|
||||
module_id: str
|
||||
|
||||
|
||||
class GenerateInteractiveVideoResponse(BaseModel):
|
||||
video_id: str
|
||||
bucket: str
|
||||
object_key: str
|
||||
duration_seconds: float
|
||||
size_bytes: int
|
||||
|
||||
|
||||
@app.post("/synthesize-sections", response_model=SynthesizeSectionsResponse)
|
||||
async def synthesize_sections(req: SynthesizeSectionsRequest):
|
||||
"""Synthesize audio for multiple sections, returning per-section timing."""
|
||||
if not req.sections:
|
||||
raise HTTPException(status_code=400, detail="No sections provided")
|
||||
|
||||
results = []
|
||||
cumulative = 0.0
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
for i, section in enumerate(req.sections):
|
||||
text = section.get("text", "")
|
||||
heading = section.get("heading", f"Section {i+1}")
|
||||
|
||||
if not text.strip():
|
||||
results.append({
|
||||
"heading": heading,
|
||||
"audio_path": "",
|
||||
"audio_object_key": "",
|
||||
"duration": 0.0,
|
||||
"start_timestamp": cumulative,
|
||||
})
|
||||
continue
|
||||
|
||||
try:
|
||||
mp3_path, duration = tts.synthesize_to_mp3(text, tmpdir, suffix=f"_section_{i}")
|
||||
object_key = f"audio/{req.module_id}/section_{i}.mp3"
|
||||
storage.upload_file(AUDIO_BUCKET, object_key, mp3_path, "audio/mpeg")
|
||||
|
||||
results.append({
|
||||
"heading": heading,
|
||||
"audio_path": mp3_path,
|
||||
"audio_object_key": object_key,
|
||||
"duration": round(duration, 2),
|
||||
"start_timestamp": round(cumulative, 2),
|
||||
})
|
||||
cumulative += duration
|
||||
except Exception as e:
|
||||
logger.error(f"Section {i} synthesis failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Section {i} synthesis failed: {e}")
|
||||
|
||||
return SynthesizeSectionsResponse(
|
||||
sections=results,
|
||||
total_duration=round(cumulative, 2),
|
||||
)
|
||||
|
||||
|
||||
@app.post("/generate-interactive-video", response_model=GenerateInteractiveVideoResponse)
|
||||
async def generate_interactive_video(req: GenerateInteractiveVideoRequest):
|
||||
"""Generate an interactive presentation video with checkpoint slides."""
|
||||
try:
|
||||
from video_generator import generate_interactive_presentation_video
|
||||
except ImportError:
|
||||
raise HTTPException(status_code=501, detail="Interactive video generation not available")
|
||||
|
||||
video_id = str(uuid.uuid4())
|
||||
object_key = f"video/{req.module_id}/interactive.mp4"
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
try:
|
||||
mp4_path, duration = generate_interactive_presentation_video(
|
||||
script=req.script,
|
||||
audio_sections=req.audio.get("sections", []),
|
||||
output_dir=tmpdir,
|
||||
storage=storage,
|
||||
audio_bucket=AUDIO_BUCKET,
|
||||
)
|
||||
size_bytes = storage.upload_file(VIDEO_BUCKET, object_key, mp4_path, "video/mp4")
|
||||
except Exception as e:
|
||||
logger.error(f"Interactive video generation failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
return GenerateInteractiveVideoResponse(
|
||||
video_id=video_id,
|
||||
bucket=VIDEO_BUCKET,
|
||||
object_key=object_key,
|
||||
duration_seconds=round(duration, 2),
|
||||
size_bytes=size_bytes,
|
||||
)
|
||||
|
||||
|
||||
@app.post("/generate-video", response_model=GenerateVideoResponse)
|
||||
async def generate_video(req: GenerateVideoRequest):
|
||||
"""Generate a presentation video from slides + audio."""
|
||||
|
||||
@@ -3,3 +3,4 @@ uvicorn[standard]==0.27.1
|
||||
boto3==1.34.25
|
||||
python-multipart==0.0.6
|
||||
pydantic==2.6.1
|
||||
edge-tts==7.2.7
|
||||
|
||||
@@ -130,3 +130,97 @@ def render_title_slide(
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"ImageMagick title slide failed: {result.stderr}")
|
||||
|
||||
|
||||
def render_checkpoint_slide(
|
||||
title: str,
|
||||
question_preview: str,
|
||||
question_count: int,
|
||||
output_path: str,
|
||||
) -> None:
|
||||
"""Render a checkpoint slide with red border and quiz preview."""
|
||||
border_width = 12
|
||||
cmd = [
|
||||
"convert",
|
||||
"-size", f"{WIDTH}x{HEIGHT}",
|
||||
"xc:white",
|
||||
# Red border (full rectangle, then white inner)
|
||||
"-fill", "#c0392b",
|
||||
"-draw", f"rectangle 0,0 {WIDTH},{HEIGHT}",
|
||||
"-fill", "white",
|
||||
"-draw", f"rectangle {border_width},{border_width} {WIDTH - border_width},{HEIGHT - border_width}",
|
||||
# Red header bar
|
||||
"-fill", "#c0392b",
|
||||
"-draw", f"rectangle {border_width},{border_width} {WIDTH - border_width},{HEADER_HEIGHT + border_width}",
|
||||
# CHECKPOINT label
|
||||
"-fill", "white",
|
||||
"-font", FONT_BOLD,
|
||||
"-pointsize", "48",
|
||||
"-gravity", "NorthWest",
|
||||
"-annotate", f"+{60 + border_width}+{(HEADER_HEIGHT - 48) // 2 + border_width}",
|
||||
f"CHECKPOINT: {title[:50]}",
|
||||
]
|
||||
|
||||
y_pos = HEADER_HEIGHT + border_width + 60
|
||||
|
||||
# Instruction text
|
||||
cmd.extend([
|
||||
"-fill", "#333333",
|
||||
"-font", FONT,
|
||||
"-pointsize", "32",
|
||||
"-gravity", "NorthWest",
|
||||
"-annotate", f"+80+{y_pos}",
|
||||
"Bitte beantworten Sie die folgenden Fragen,",
|
||||
])
|
||||
y_pos += 44
|
||||
|
||||
cmd.extend([
|
||||
"-fill", "#333333",
|
||||
"-font", FONT,
|
||||
"-pointsize", "32",
|
||||
"-gravity", "NorthWest",
|
||||
"-annotate", f"+80+{y_pos}",
|
||||
"um mit der Schulung fortzufahren.",
|
||||
])
|
||||
y_pos += 80
|
||||
|
||||
# Question preview
|
||||
if question_preview:
|
||||
preview = textwrap.fill(question_preview, width=70)
|
||||
cmd.extend([
|
||||
"-fill", "#666666",
|
||||
"-font", FONT,
|
||||
"-pointsize", "26",
|
||||
"-gravity", "NorthWest",
|
||||
"-annotate", f"+80+{y_pos}",
|
||||
f"Erste Frage: {preview[:120]}...",
|
||||
])
|
||||
y_pos += 50
|
||||
|
||||
# Question count
|
||||
cmd.extend([
|
||||
"-fill", "#888888",
|
||||
"-font", FONT,
|
||||
"-pointsize", "24",
|
||||
"-gravity", "NorthWest",
|
||||
"-annotate", f"+80+{y_pos}",
|
||||
f"{question_count} Fragen in diesem Checkpoint",
|
||||
])
|
||||
|
||||
# Footer
|
||||
cmd.extend([
|
||||
"-fill", "#f0f0f0",
|
||||
"-draw", f"rectangle {border_width},{HEIGHT - FOOTER_HEIGHT - border_width} {WIDTH - border_width},{HEIGHT - border_width}",
|
||||
"-fill", "#c0392b",
|
||||
"-font", FONT_BOLD,
|
||||
"-pointsize", "22",
|
||||
"-gravity", "South",
|
||||
"-annotate", f"+0+{(FOOTER_HEIGHT - 22) // 2 + border_width}",
|
||||
"Video wird pausiert — Quiz im Player beantworten",
|
||||
])
|
||||
|
||||
cmd.append(output_path)
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"ImageMagick checkpoint slide failed: {result.stderr}")
|
||||
|
||||
@@ -74,7 +74,7 @@ class PiperTTS:
|
||||
if proc.returncode != 0:
|
||||
raise RuntimeError(f"Piper failed: {proc.stderr}")
|
||||
|
||||
def synthesize_to_mp3(self, text: str, output_dir: str) -> tuple[str, float]:
|
||||
def synthesize_to_mp3(self, text: str, output_dir: str, suffix: str = "") -> tuple[str, float]:
|
||||
"""
|
||||
Synthesize text to MP3.
|
||||
Splits text into sentences, synthesizes each, concatenates, encodes to MP3.
|
||||
@@ -88,16 +88,16 @@ class PiperTTS:
|
||||
wav_files = []
|
||||
try:
|
||||
for i, sentence in enumerate(sentences):
|
||||
wav_path = os.path.join(output_dir, f"seg_{i:04d}.wav")
|
||||
wav_path = os.path.join(output_dir, f"seg{suffix}_{i:04d}.wav")
|
||||
self.synthesize_to_wav(sentence, wav_path)
|
||||
wav_files.append(wav_path)
|
||||
|
||||
# Concatenate WAV files
|
||||
combined_wav = os.path.join(output_dir, "combined.wav")
|
||||
combined_wav = os.path.join(output_dir, f"combined{suffix}.wav")
|
||||
self._concatenate_wavs(wav_files, combined_wav)
|
||||
|
||||
# Convert to MP3
|
||||
mp3_path = os.path.join(output_dir, "output.mp3")
|
||||
mp3_path = os.path.join(output_dir, f"output{suffix}.mp3")
|
||||
self._wav_to_mp3(combined_wav, mp3_path)
|
||||
|
||||
# Get duration
|
||||
|
||||
@@ -115,6 +115,139 @@ def generate_presentation_video(
|
||||
return output_path, video_duration
|
||||
|
||||
|
||||
def generate_interactive_presentation_video(
|
||||
script: dict,
|
||||
audio_sections: list[dict],
|
||||
output_dir: str,
|
||||
storage,
|
||||
audio_bucket: str,
|
||||
) -> tuple[str, float]:
|
||||
"""
|
||||
Generate an interactive presentation video from narrator script + per-section audio.
|
||||
|
||||
Includes checkpoint slides (red-bordered pause markers) between sections.
|
||||
Returns (mp4_path, duration_seconds).
|
||||
"""
|
||||
from slide_renderer import render_slide, render_title_slide, render_checkpoint_slide
|
||||
|
||||
title = script.get("title", "Compliance Training")
|
||||
sections = script.get("sections", [])
|
||||
|
||||
if not sections:
|
||||
raise ValueError("Script has no sections")
|
||||
if not audio_sections:
|
||||
raise ValueError("No audio sections provided")
|
||||
|
||||
# Step 1: Download all section audio files
|
||||
audio_paths = []
|
||||
for i, sec in enumerate(audio_sections):
|
||||
obj_key = sec.get("audio_object_key", "")
|
||||
if not obj_key:
|
||||
continue
|
||||
audio_path = os.path.join(output_dir, f"section_{i}.mp3")
|
||||
storage.client.download_file(audio_bucket, obj_key, audio_path)
|
||||
audio_paths.append((i, audio_path, sec.get("duration", 0.0)))
|
||||
|
||||
# Step 2: Render slides
|
||||
slides_dir = os.path.join(output_dir, "slides")
|
||||
os.makedirs(slides_dir, exist_ok=True)
|
||||
|
||||
# All slide entries: (png_path, duration)
|
||||
slide_entries = []
|
||||
|
||||
# Title slide (5 seconds)
|
||||
title_path = os.path.join(slides_dir, "slide_000_title.png")
|
||||
render_title_slide(title, "Interaktive Compliance-Schulung", title_path)
|
||||
slide_entries.append((title_path, 5.0))
|
||||
|
||||
total_content_slides = sum(1 for _ in sections) # for numbering
|
||||
slide_num = 1
|
||||
|
||||
for i, section in enumerate(sections):
|
||||
heading = section.get("heading", "")
|
||||
narrator_text = section.get("narrator_text", "")
|
||||
bullet_points = section.get("bullet_points", [])
|
||||
|
||||
# Content slide for this section
|
||||
slide_path = os.path.join(slides_dir, f"slide_{i+1:03d}_content.png")
|
||||
render_slide(
|
||||
heading=heading,
|
||||
text=narrator_text[:200] if len(narrator_text) > 200 else narrator_text,
|
||||
bullet_points=bullet_points,
|
||||
slide_number=slide_num + 1,
|
||||
total_slides=total_content_slides + 1,
|
||||
module_code=script.get("module_code", ""),
|
||||
output_path=slide_path,
|
||||
)
|
||||
slide_num += 1
|
||||
|
||||
# Duration = matching audio section duration
|
||||
section_duration = 5.0 # fallback
|
||||
if i < len(audio_paths):
|
||||
section_duration = audio_paths[i][2] or 5.0
|
||||
slide_entries.append((slide_path, section_duration))
|
||||
|
||||
# Checkpoint slide (if this section has a checkpoint)
|
||||
checkpoint = section.get("checkpoint")
|
||||
if checkpoint:
|
||||
cp_title = checkpoint.get("title", f"Checkpoint {i+1}")
|
||||
questions = checkpoint.get("questions", [])
|
||||
question_preview = questions[0].get("question", "") if questions else ""
|
||||
cp_path = os.path.join(slides_dir, f"slide_{i+1:03d}_checkpoint.png")
|
||||
render_checkpoint_slide(cp_title, question_preview, len(questions), cp_path)
|
||||
slide_entries.append((cp_path, 3.0)) # 3s still frame as pause marker
|
||||
|
||||
# Step 3: Concatenate all section audio files into one
|
||||
combined_audio = os.path.join(output_dir, "combined_audio.mp3")
|
||||
if len(audio_paths) == 1:
|
||||
import shutil
|
||||
shutil.copy2(audio_paths[0][1], combined_audio)
|
||||
elif len(audio_paths) > 1:
|
||||
# Use FFmpeg to concatenate audio
|
||||
audio_list_path = os.path.join(output_dir, "audio_list.txt")
|
||||
with open(audio_list_path, "w") as f:
|
||||
for _, apath, _ in audio_paths:
|
||||
f.write(f"file '{apath}'\n")
|
||||
cmd = [
|
||||
"ffmpeg", "-y", "-f", "concat", "-safe", "0",
|
||||
"-i", audio_list_path, "-c", "copy", combined_audio,
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"FFmpeg audio concat failed: {result.stderr}")
|
||||
else:
|
||||
raise ValueError("No audio files to concatenate")
|
||||
|
||||
# Step 4: Create FFmpeg concat file for slides
|
||||
concat_path = os.path.join(output_dir, "concat.txt")
|
||||
with open(concat_path, "w") as f:
|
||||
for slide_path, dur in slide_entries:
|
||||
f.write(f"file '{slide_path}'\n")
|
||||
f.write(f"duration {dur:.2f}\n")
|
||||
# Repeat last slide for FFmpeg concat demuxer
|
||||
f.write(f"file '{slide_entries[-1][0]}'\n")
|
||||
|
||||
# Step 5: Combine slides + audio into MP4
|
||||
output_path = os.path.join(output_dir, "interactive.mp4")
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-f", "concat", "-safe", "0", "-i", concat_path,
|
||||
"-i", combined_audio,
|
||||
"-c:v", "libx264", "-pix_fmt", "yuv420p",
|
||||
"-c:a", "aac", "-b:a", "128k",
|
||||
"-shortest",
|
||||
"-movflags", "+faststart",
|
||||
output_path,
|
||||
]
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"FFmpeg interactive video failed: {result.stderr}")
|
||||
|
||||
video_duration = _get_duration(output_path)
|
||||
return output_path, video_duration
|
||||
|
||||
|
||||
def _get_duration(file_path: str) -> float:
|
||||
"""Get media duration using FFprobe."""
|
||||
cmd = [
|
||||
|
||||
Reference in New Issue
Block a user