Files
breakpilot-compliance/ai-compliance-sdk/internal/training/media.go
Benjamin Boenisch 9b8b7ca073
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 48s
CI / test-python-backend-compliance (push) Successful in 35s
CI / test-python-document-crawler (push) Successful in 22s
CI / test-python-dsms-gateway (push) Successful in 20s
feat(training): add Media Pipeline — TTS Audio, Presentation Video, Bulk Generation
Phase A: 8 new IT-Security training modules (SEC-PWD, SEC-DESK, SEC-KIAI,
SEC-BYOD, SEC-VIDEO, SEC-USB, SEC-INC, SEC-HOME) with CTM entries.
Bulk content and quiz generation endpoints for all 28 modules.

Phase B: Piper TTS service (Python/FastAPI) for local German speech synthesis.
training_media table, TTSClient in Go backend, audio generation endpoints,
AudioPlayer component in frontend. MinIO storage integration.

Phase C: FFmpeg presentation video pipeline — LLM generates slide scripts,
ImageMagick renders 1920x1080 slides, FFmpeg combines with audio to MP4.
VideoPlayer and ScriptPreview components in frontend.

New files: 15 created, 9 modified
- compliance-tts-service/ (Dockerfile, main.py, tts_engine.py, storage.py,
  slide_renderer.py, video_generator.py)
- migrations 014-016 (training engine, IT-security modules, media table)
- training package (models, store, content_generator, media, handlers)
- frontend (AudioPlayer, VideoPlayer, ScriptPreview, api, types, page)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 21:42:33 +01:00

187 lines
5.6 KiB
Go

package training
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"time"
"github.com/google/uuid"
)
// MediaType represents audio or video
type MediaType string
const (
MediaTypeAudio MediaType = "audio"
MediaTypeVideo MediaType = "video"
)
// MediaStatus represents the processing status
type MediaStatus string
const (
MediaStatusProcessing MediaStatus = "processing"
MediaStatusCompleted MediaStatus = "completed"
MediaStatusFailed MediaStatus = "failed"
)
// TrainingMedia represents a generated media file
type TrainingMedia struct {
ID uuid.UUID `json:"id"`
ModuleID uuid.UUID `json:"module_id"`
ContentID *uuid.UUID `json:"content_id,omitempty"`
MediaType MediaType `json:"media_type"`
Status MediaStatus `json:"status"`
Bucket string `json:"bucket"`
ObjectKey string `json:"object_key"`
FileSizeBytes int64 `json:"file_size_bytes"`
DurationSeconds float64 `json:"duration_seconds"`
MimeType string `json:"mime_type"`
VoiceModel string `json:"voice_model"`
Language string `json:"language"`
Metadata json.RawMessage `json:"metadata"`
ErrorMessage string `json:"error_message,omitempty"`
GeneratedBy string `json:"generated_by"`
IsPublished bool `json:"is_published"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
// ============================================================================
// TTS Client
// ============================================================================
// TTSClient communicates with the compliance-tts-service
type TTSClient struct {
baseURL string
httpClient *http.Client
}
// NewTTSClient creates a new TTS service client
func NewTTSClient(baseURL string) *TTSClient {
return &TTSClient{
baseURL: baseURL,
httpClient: &http.Client{
Timeout: 5 * time.Minute,
},
}
}
// TTSSynthesizeRequest is the request to synthesize audio
type TTSSynthesizeRequest struct {
Text string `json:"text"`
Language string `json:"language"`
Voice string `json:"voice"`
ModuleID string `json:"module_id"`
ContentID string `json:"content_id,omitempty"`
}
// TTSSynthesizeResponse is the response from audio synthesis
type TTSSynthesizeResponse struct {
AudioID string `json:"audio_id"`
Bucket string `json:"bucket"`
ObjectKey string `json:"object_key"`
DurationSeconds float64 `json:"duration_seconds"`
SizeBytes int64 `json:"size_bytes"`
}
// TTSGenerateVideoRequest is the request to generate a video
type TTSGenerateVideoRequest struct {
Script map[string]interface{} `json:"script"`
AudioObjectKey string `json:"audio_object_key"`
ModuleID string `json:"module_id"`
}
// TTSGenerateVideoResponse is the response from video generation
type TTSGenerateVideoResponse struct {
VideoID string `json:"video_id"`
Bucket string `json:"bucket"`
ObjectKey string `json:"object_key"`
DurationSeconds float64 `json:"duration_seconds"`
SizeBytes int64 `json:"size_bytes"`
}
// Synthesize calls the TTS service to create audio
func (c *TTSClient) Synthesize(ctx context.Context, req *TTSSynthesizeRequest) (*TTSSynthesizeResponse, error) {
body, err := json.Marshal(req)
if err != nil {
return nil, fmt.Errorf("marshal request: %w", err)
}
httpReq, err := http.NewRequestWithContext(ctx, "POST", c.baseURL+"/synthesize", bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("create request: %w", err)
}
httpReq.Header.Set("Content-Type", "application/json")
resp, err := c.httpClient.Do(httpReq)
if err != nil {
return nil, fmt.Errorf("TTS service request failed: %w", err)
}
defer resp.Body.Close()
respBody, _ := io.ReadAll(resp.Body)
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("TTS service error (%d): %s", resp.StatusCode, string(respBody))
}
var result TTSSynthesizeResponse
if err := json.Unmarshal(respBody, &result); err != nil {
return nil, fmt.Errorf("parse TTS response: %w", err)
}
return &result, nil
}
// GenerateVideo calls the TTS service to create a presentation video
func (c *TTSClient) GenerateVideo(ctx context.Context, req *TTSGenerateVideoRequest) (*TTSGenerateVideoResponse, error) {
body, err := json.Marshal(req)
if err != nil {
return nil, fmt.Errorf("marshal request: %w", err)
}
httpReq, err := http.NewRequestWithContext(ctx, "POST", c.baseURL+"/generate-video", bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("create request: %w", err)
}
httpReq.Header.Set("Content-Type", "application/json")
resp, err := c.httpClient.Do(httpReq)
if err != nil {
return nil, fmt.Errorf("TTS service request failed: %w", err)
}
defer resp.Body.Close()
respBody, _ := io.ReadAll(resp.Body)
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("TTS service error (%d): %s", resp.StatusCode, string(respBody))
}
var result TTSGenerateVideoResponse
if err := json.Unmarshal(respBody, &result); err != nil {
return nil, fmt.Errorf("parse TTS response: %w", err)
}
return &result, nil
}
// IsHealthy checks if the TTS service is responsive
func (c *TTSClient) IsHealthy(ctx context.Context) bool {
httpReq, err := http.NewRequestWithContext(ctx, "GET", c.baseURL+"/health", nil)
if err != nil {
return false
}
resp, err := c.httpClient.Do(httpReq)
if err != nil {
return false
}
defer resp.Body.Close()
return resp.StatusCode == http.StatusOK
}