Files
breakpilot-compliance/ai-compliance-sdk/internal/training/content_generator.go
Benjamin Admin 4f6bc8f6f6
Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 37s
CI/CD / test-python-backend-compliance (push) Successful in 39s
CI/CD / test-python-document-crawler (push) Successful in 26s
CI/CD / test-python-dsms-gateway (push) Successful in 23s
CI/CD / validate-canonical-controls (push) Successful in 12s
CI/CD / Deploy (push) Has been skipped
feat(training+controls): interactive video pipeline, training blocks, control generator, CE libraries
Interactive Training Videos (CP-TRAIN):
- DB migration 022: training_checkpoints + checkpoint_progress tables
- NarratorScript generation via Anthropic (AI Teacher persona, German)
- TTS batch synthesis + interactive video pipeline (slides + checkpoint slides + FFmpeg)
- 4 new API endpoints: generate-interactive, interactive-manifest, checkpoint submit, checkpoint progress
- InteractiveVideoPlayer component (HTML5 Video, quiz overlay, seek protection, progress tracking)
- Learner portal integration with automatic completion on all checkpoints passed
- 30 new tests (handler validation + grading logic + manifest/progress + seek protection)

Training Blocks:
- Block generator, block store, block config CRUD + preview/generate endpoints
- Migration 021: training_blocks schema

Control Generator + Canonical Library:
- Control generator routes + service enhancements
- Canonical control library helpers, sidebar entry
- Citation backfill service + tests
- CE libraries data (hazard, protection, evidence, lifecycle, components)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-16 21:41:48 +01:00

979 lines
30 KiB
Go

package training
import (
"context"
"encoding/json"
"fmt"
"strings"
"github.com/breakpilot/ai-compliance-sdk/internal/llm"
"github.com/google/uuid"
)
// ContentGenerator generates training content and quiz questions via LLM
type ContentGenerator struct {
registry *llm.ProviderRegistry
piiDetector *llm.PIIDetector
store *Store
ttsClient *TTSClient
}
// NewContentGenerator creates a new content generator
func NewContentGenerator(registry *llm.ProviderRegistry, piiDetector *llm.PIIDetector, store *Store, ttsClient *TTSClient) *ContentGenerator {
return &ContentGenerator{
registry: registry,
piiDetector: piiDetector,
store: store,
ttsClient: ttsClient,
}
}
// GenerateModuleContent generates training content for a module via LLM
func (g *ContentGenerator) GenerateModuleContent(ctx context.Context, module TrainingModule, language string) (*ModuleContent, error) {
if language == "" {
language = "de"
}
prompt := buildContentPrompt(module, language)
resp, err := g.registry.Chat(ctx, &llm.ChatRequest{
Messages: []llm.Message{
{Role: "system", Content: getContentSystemPrompt(language)},
{Role: "user", Content: prompt},
},
Temperature: 0.15,
MaxTokens: 4096,
})
if err != nil {
return nil, fmt.Errorf("LLM content generation failed: %w", err)
}
contentBody := resp.Message.Content
// PII check on generated content
if g.piiDetector != nil && g.piiDetector.ContainsPII(contentBody) {
findings := g.piiDetector.FindPII(contentBody)
for _, f := range findings {
contentBody = strings.ReplaceAll(contentBody, f.Match, "[REDACTED]")
}
}
// Create summary (first 200 chars)
summary := contentBody
if len(summary) > 200 {
summary = summary[:200] + "..."
}
content := &ModuleContent{
ModuleID: module.ID,
ContentFormat: ContentFormatMarkdown,
ContentBody: contentBody,
Summary: summary,
GeneratedBy: "llm_" + resp.Provider,
LLMModel: resp.Model,
IsPublished: false,
}
if err := g.store.CreateModuleContent(ctx, content); err != nil {
return nil, fmt.Errorf("failed to save content: %w", err)
}
// Audit log
g.store.LogAction(ctx, &AuditLogEntry{
TenantID: module.TenantID,
Action: AuditActionContentGenerated,
EntityType: AuditEntityModule,
EntityID: &module.ID,
Details: map[string]interface{}{
"module_code": module.ModuleCode,
"provider": resp.Provider,
"model": resp.Model,
"content_id": content.ID.String(),
"version": content.Version,
"tokens_used": resp.Usage.TotalTokens,
},
})
return content, nil
}
// GenerateQuizQuestions generates quiz questions for a module based on its content
func (g *ContentGenerator) GenerateQuizQuestions(ctx context.Context, module TrainingModule, count int) ([]QuizQuestion, error) {
if count <= 0 {
count = 5
}
// Get the published content for context
content, err := g.store.GetPublishedContent(ctx, module.ID)
if err != nil {
return nil, err
}
contentContext := ""
if content != nil {
contentContext = content.ContentBody
}
prompt := buildQuizPrompt(module, contentContext, count)
resp, err := g.registry.Chat(ctx, &llm.ChatRequest{
Messages: []llm.Message{
{Role: "system", Content: getQuizSystemPrompt()},
{Role: "user", Content: prompt},
},
Temperature: 0.2,
MaxTokens: 4096,
})
if err != nil {
return nil, fmt.Errorf("LLM quiz generation failed: %w", err)
}
// Parse the JSON response
questions, err := parseQuizResponse(resp.Message.Content, module.ID)
if err != nil {
return nil, fmt.Errorf("failed to parse quiz response: %w", err)
}
// Save questions to store
for i := range questions {
questions[i].SortOrder = i + 1
if err := g.store.CreateQuizQuestion(ctx, &questions[i]); err != nil {
return nil, fmt.Errorf("failed to save question %d: %w", i+1, err)
}
}
return questions, nil
}
// ============================================================================
// Prompt Templates
// ============================================================================
func getContentSystemPrompt(language string) string {
if language == "en" {
return "You are a compliance training content expert. Generate professional, accurate training material in Markdown format. Focus on practical relevance and legal accuracy. Do not include any personal data or fictional names."
}
return "Du bist ein Experte fuer Compliance-Schulungsinhalte. Erstelle professionelle, praezise Schulungsmaterialien im Markdown-Format. Fokussiere dich auf praktische Relevanz und rechtliche Genauigkeit. Verwende keine personenbezogenen Daten oder fiktiven Namen."
}
func getQuizSystemPrompt() string {
return `Du bist ein Experte fuer Compliance-Pruefungsfragen. Erstelle Multiple-Choice-Fragen als JSON-Array.
Jede Frage hat genau 4 Antwortoptionen, davon genau eine richtige.
Antworte NUR mit dem JSON-Array, ohne zusaetzlichen Text.
Format:
[
{
"question": "Frage hier?",
"options": ["Option A", "Option B", "Option C", "Option D"],
"correct_index": 0,
"explanation": "Erklaerung warum Option A richtig ist.",
"difficulty": "medium"
}
]`
}
func buildContentPrompt(module TrainingModule, language string) string {
regulationLabels := map[RegulationArea]string{
RegulationDSGVO: "Datenschutz-Grundverordnung (DSGVO)",
RegulationNIS2: "NIS-2-Richtlinie",
RegulationISO27001: "ISO 27001 / ISMS",
RegulationAIAct: "EU AI Act / KI-Verordnung",
RegulationGeschGehG: "Geschaeftsgeheimnisgesetz (GeschGehG)",
RegulationHinSchG: "Hinweisgeberschutzgesetz (HinSchG)",
}
regulation := regulationLabels[module.RegulationArea]
if regulation == "" {
regulation = string(module.RegulationArea)
}
return fmt.Sprintf(`Erstelle Schulungsmaterial fuer folgendes Compliance-Modul:
**Modulcode:** %s
**Titel:** %s
**Beschreibung:** %s
**Regulierungsbereich:** %s
**Dauer:** %d Minuten
**NIS2-relevant:** %v
Das Material soll:
1. Eine kurze Einfuehrung in das Thema geben
2. Die wichtigsten rechtlichen Grundlagen erklaeren
3. Praktische Handlungsanweisungen fuer den Arbeitsalltag enthalten
4. Typische Fehler und Risiken aufzeigen
5. Eine Zusammenfassung der Kernpunkte bieten
Verwende klare, verstaendliche Sprache. Zielgruppe sind Mitarbeiter in Unternehmen (50-1.500 MA).
Formatiere den Inhalt als Markdown mit Ueberschriften, Aufzaehlungen und Hervorhebungen.`,
module.ModuleCode, module.Title, module.Description,
regulation, module.DurationMinutes, module.NIS2Relevant)
}
func buildQuizPrompt(module TrainingModule, contentContext string, count int) string {
prompt := fmt.Sprintf(`Erstelle %d Multiple-Choice-Pruefungsfragen fuer das Compliance-Modul:
**Modulcode:** %s
**Titel:** %s
**Regulierungsbereich:** %s`, count, module.ModuleCode, module.Title, string(module.RegulationArea))
if contentContext != "" {
// Truncate content to avoid token limit
if len(contentContext) > 3000 {
contentContext = contentContext[:3000] + "..."
}
prompt += fmt.Sprintf(`
**Schulungsinhalt als Kontext:**
%s`, contentContext)
}
prompt += fmt.Sprintf(`
Erstelle genau %d Fragen mit je 4 Antwortoptionen.
Verteile die Schwierigkeitsgrade: easy, medium, hard.
Antworte NUR mit dem JSON-Array.`, count)
return prompt
}
// parseQuizResponse parses LLM JSON response into QuizQuestion structs
func parseQuizResponse(response string, moduleID uuid.UUID) ([]QuizQuestion, error) {
// Try to extract JSON from the response (LLM might add text around it)
jsonStr := response
start := strings.Index(response, "[")
end := strings.LastIndex(response, "]")
if start >= 0 && end > start {
jsonStr = response[start : end+1]
}
type rawQuestion struct {
Question string `json:"question"`
Options []string `json:"options"`
CorrectIndex int `json:"correct_index"`
Explanation string `json:"explanation"`
Difficulty string `json:"difficulty"`
}
var rawQuestions []rawQuestion
if err := json.Unmarshal([]byte(jsonStr), &rawQuestions); err != nil {
return nil, fmt.Errorf("invalid JSON from LLM: %w", err)
}
var questions []QuizQuestion
for _, rq := range rawQuestions {
difficulty := Difficulty(rq.Difficulty)
if difficulty != DifficultyEasy && difficulty != DifficultyMedium && difficulty != DifficultyHard {
difficulty = DifficultyMedium
}
q := QuizQuestion{
ModuleID: moduleID,
Question: rq.Question,
Options: rq.Options,
CorrectIndex: rq.CorrectIndex,
Explanation: rq.Explanation,
Difficulty: difficulty,
IsActive: true,
}
if len(q.Options) != 4 {
continue // Skip malformed questions
}
if q.CorrectIndex < 0 || q.CorrectIndex >= len(q.Options) {
continue
}
questions = append(questions, q)
}
if questions == nil {
questions = []QuizQuestion{}
}
return questions, nil
}
// GenerateBlockContent generates training content for a module based on linked canonical controls
func (g *ContentGenerator) GenerateBlockContent(
ctx context.Context,
module TrainingModule,
controls []CanonicalControlSummary,
language string,
) (*ModuleContent, error) {
if language == "" {
language = "de"
}
prompt := buildBlockContentPrompt(module, controls, language)
resp, err := g.registry.Chat(ctx, &llm.ChatRequest{
Messages: []llm.Message{
{Role: "system", Content: getContentSystemPrompt(language)},
{Role: "user", Content: prompt},
},
Temperature: 0.15,
MaxTokens: 8192,
})
if err != nil {
return nil, fmt.Errorf("LLM block content generation failed: %w", err)
}
contentBody := resp.Message.Content
// PII check
if g.piiDetector != nil && g.piiDetector.ContainsPII(contentBody) {
findings := g.piiDetector.FindPII(contentBody)
for _, f := range findings {
contentBody = strings.ReplaceAll(contentBody, f.Match, "[REDACTED]")
}
}
summary := contentBody
if len(summary) > 200 {
summary = summary[:200] + "..."
}
content := &ModuleContent{
ModuleID: module.ID,
ContentFormat: ContentFormatMarkdown,
ContentBody: contentBody,
Summary: summary,
GeneratedBy: "llm_block_" + resp.Provider,
LLMModel: resp.Model,
IsPublished: false,
}
if err := g.store.CreateModuleContent(ctx, content); err != nil {
return nil, fmt.Errorf("failed to save block content: %w", err)
}
// Audit log
g.store.LogAction(ctx, &AuditLogEntry{
TenantID: module.TenantID,
Action: AuditActionContentGenerated,
EntityType: AuditEntityModule,
EntityID: &module.ID,
Details: map[string]interface{}{
"module_code": module.ModuleCode,
"provider": resp.Provider,
"model": resp.Model,
"content_id": content.ID.String(),
"version": content.Version,
"tokens_used": resp.Usage.TotalTokens,
"controls_count": len(controls),
"source": "block_generator",
},
})
return content, nil
}
// buildBlockContentPrompt creates a prompt that incorporates canonical controls
func buildBlockContentPrompt(module TrainingModule, controls []CanonicalControlSummary, language string) string {
var sb strings.Builder
if language == "en" {
sb.WriteString(fmt.Sprintf("Create training material for the following compliance module:\n\n"))
sb.WriteString(fmt.Sprintf("**Module Code:** %s\n", module.ModuleCode))
sb.WriteString(fmt.Sprintf("**Title:** %s\n", module.Title))
sb.WriteString(fmt.Sprintf("**Duration:** %d minutes\n\n", module.DurationMinutes))
sb.WriteString(fmt.Sprintf("This module is based on %d security controls:\n\n", len(controls)))
} else {
sb.WriteString(fmt.Sprintf("Erstelle Schulungsmaterial fuer folgendes Compliance-Modul:\n\n"))
sb.WriteString(fmt.Sprintf("**Modulcode:** %s\n", module.ModuleCode))
sb.WriteString(fmt.Sprintf("**Titel:** %s\n", module.Title))
sb.WriteString(fmt.Sprintf("**Dauer:** %d Minuten\n\n", module.DurationMinutes))
sb.WriteString(fmt.Sprintf("Dieses Modul basiert auf %d Sicherheits-Controls:\n\n", len(controls)))
}
for i, ctrl := range controls {
sb.WriteString(fmt.Sprintf("### Control %d: %s — %s\n", i+1, ctrl.ControlID, ctrl.Title))
sb.WriteString(fmt.Sprintf("**Ziel:** %s\n", ctrl.Objective))
if len(ctrl.Requirements) > 0 {
sb.WriteString("**Anforderungen:**\n")
for _, req := range ctrl.Requirements {
sb.WriteString(fmt.Sprintf("- %s\n", req))
}
}
sb.WriteString("\n")
}
if language == "en" {
sb.WriteString(`Create the material as Markdown:
1. Introduction: Why are these controls important?
2. Per control: Explanation, practical tips, examples
3. Summary + action items
4. Checklist for daily work
Use clear, understandable language. Target audience: employees in companies (50-1,500 employees).`)
} else {
sb.WriteString(`Erstelle das Material als Markdown:
1. Einfuehrung: Warum sind diese Controls wichtig?
2. Pro Control: Erklaerung, praktische Hinweise, Beispiele
3. Zusammenfassung + Handlungsanweisungen
4. Checkliste fuer den Alltag
Verwende klare, verstaendliche Sprache. Zielgruppe sind Mitarbeiter in Unternehmen (50-1.500 MA).
Formatiere den Inhalt als Markdown mit Ueberschriften, Aufzaehlungen und Hervorhebungen.`)
}
return sb.String()
}
// GenerateAllModuleContent generates text content for all modules that don't have published content yet
func (g *ContentGenerator) GenerateAllModuleContent(ctx context.Context, tenantID uuid.UUID, language string) (*BulkResult, error) {
if language == "" {
language = "de"
}
modules, _, err := g.store.ListModules(ctx, tenantID, &ModuleFilters{Limit: 100})
if err != nil {
return nil, fmt.Errorf("failed to list modules: %w", err)
}
result := &BulkResult{}
for _, module := range modules {
// Check if module already has published content
content, _ := g.store.GetPublishedContent(ctx, module.ID)
if content != nil {
result.Skipped++
continue
}
_, err := g.GenerateModuleContent(ctx, module, language)
if err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("%s: %v", module.ModuleCode, err))
continue
}
result.Generated++
}
return result, nil
}
// GenerateAllQuizQuestions generates quiz questions for all modules that don't have questions yet
func (g *ContentGenerator) GenerateAllQuizQuestions(ctx context.Context, tenantID uuid.UUID, count int) (*BulkResult, error) {
if count <= 0 {
count = 5
}
modules, _, err := g.store.ListModules(ctx, tenantID, &ModuleFilters{Limit: 100})
if err != nil {
return nil, fmt.Errorf("failed to list modules: %w", err)
}
result := &BulkResult{}
for _, module := range modules {
// Check if module already has quiz questions
questions, _ := g.store.ListQuizQuestions(ctx, module.ID)
if len(questions) > 0 {
result.Skipped++
continue
}
_, err := g.GenerateQuizQuestions(ctx, module, count)
if err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("%s: %v", module.ModuleCode, err))
continue
}
result.Generated++
}
return result, nil
}
// GenerateAudio generates audio for a module using the TTS service
func (g *ContentGenerator) GenerateAudio(ctx context.Context, module TrainingModule) (*TrainingMedia, error) {
// Get published content
content, err := g.store.GetPublishedContent(ctx, module.ID)
if err != nil {
return nil, fmt.Errorf("failed to get content: %w", err)
}
if content == nil {
return nil, fmt.Errorf("no published content for module %s", module.ModuleCode)
}
if g.ttsClient == nil {
return nil, fmt.Errorf("TTS client not configured")
}
// Create media record (processing)
media := &TrainingMedia{
ModuleID: module.ID,
ContentID: &content.ID,
MediaType: MediaTypeAudio,
Status: MediaStatusProcessing,
Bucket: "compliance-training-audio",
ObjectKey: fmt.Sprintf("audio/%s/%s.mp3", module.ID.String(), content.ID.String()),
MimeType: "audio/mpeg",
VoiceModel: "de_DE-thorsten-high",
Language: "de",
GeneratedBy: "tts_piper",
}
if err := g.store.CreateMedia(ctx, media); err != nil {
return nil, fmt.Errorf("failed to create media record: %w", err)
}
// Call TTS service
ttsResp, err := g.ttsClient.Synthesize(ctx, &TTSSynthesizeRequest{
Text: content.ContentBody,
Language: "de",
Voice: "thorsten-high",
ModuleID: module.ID.String(),
ContentID: content.ID.String(),
})
if err != nil {
g.store.UpdateMediaStatus(ctx, media.ID, MediaStatusFailed, 0, 0, err.Error())
return nil, fmt.Errorf("TTS synthesis failed: %w", err)
}
// Update media record
media.Status = MediaStatusCompleted
media.FileSizeBytes = ttsResp.SizeBytes
media.DurationSeconds = ttsResp.DurationSeconds
media.ObjectKey = ttsResp.ObjectKey
media.Bucket = ttsResp.Bucket
g.store.UpdateMediaStatus(ctx, media.ID, MediaStatusCompleted, ttsResp.SizeBytes, ttsResp.DurationSeconds, "")
// Audit log
g.store.LogAction(ctx, &AuditLogEntry{
TenantID: module.TenantID,
Action: AuditAction("audio_generated"),
EntityType: AuditEntityModule,
EntityID: &module.ID,
Details: map[string]interface{}{
"module_code": module.ModuleCode,
"media_id": media.ID.String(),
"duration_seconds": ttsResp.DurationSeconds,
"size_bytes": ttsResp.SizeBytes,
},
})
return media, nil
}
// VideoScript represents a structured presentation script
type VideoScript struct {
Title string `json:"title"`
Sections []VideoScriptSection `json:"sections"`
}
// VideoScriptSection is one slide in the presentation
type VideoScriptSection struct {
Heading string `json:"heading"`
Text string `json:"text"`
BulletPoints []string `json:"bullet_points"`
}
// GenerateVideoScript generates a structured video script from module content via LLM
func (g *ContentGenerator) GenerateVideoScript(ctx context.Context, module TrainingModule) (*VideoScript, error) {
content, err := g.store.GetPublishedContent(ctx, module.ID)
if err != nil {
return nil, fmt.Errorf("failed to get content: %w", err)
}
if content == nil {
return nil, fmt.Errorf("no published content for module %s", module.ModuleCode)
}
prompt := fmt.Sprintf(`Erstelle ein strukturiertes Folien-Script fuer eine Praesentations-Video-Schulung.
**Modul:** %s — %s
**Inhalt:**
%s
Erstelle 5-8 Folien. Jede Folie hat:
- heading: Kurze Ueberschrift (max 60 Zeichen)
- text: Erklaerungstext (1-2 Saetze)
- bullet_points: 2-4 Kernpunkte
Antworte NUR mit einem JSON-Objekt in diesem Format:
{
"title": "Titel der Praesentation",
"sections": [
{
"heading": "Folienueberschrift",
"text": "Erklaerungstext fuer diese Folie.",
"bullet_points": ["Punkt 1", "Punkt 2", "Punkt 3"]
}
]
}`, module.ModuleCode, module.Title, truncateText(content.ContentBody, 3000))
resp, err := g.registry.Chat(ctx, &llm.ChatRequest{
Messages: []llm.Message{
{Role: "system", Content: "Du bist ein Experte fuer Compliance-Schulungspraesentationen. Erstelle strukturierte Folien-Scripts als JSON. Antworte NUR mit dem JSON-Objekt."},
{Role: "user", Content: prompt},
},
Temperature: 0.15,
MaxTokens: 4096,
})
if err != nil {
return nil, fmt.Errorf("LLM video script generation failed: %w", err)
}
// Parse JSON response
var script VideoScript
jsonStr := resp.Message.Content
start := strings.Index(jsonStr, "{")
end := strings.LastIndex(jsonStr, "}")
if start >= 0 && end > start {
jsonStr = jsonStr[start : end+1]
}
if err := json.Unmarshal([]byte(jsonStr), &script); err != nil {
return nil, fmt.Errorf("failed to parse video script JSON: %w", err)
}
if len(script.Sections) == 0 {
return nil, fmt.Errorf("video script has no sections")
}
return &script, nil
}
// GenerateVideo generates a presentation video for a module
func (g *ContentGenerator) GenerateVideo(ctx context.Context, module TrainingModule) (*TrainingMedia, error) {
if g.ttsClient == nil {
return nil, fmt.Errorf("TTS client not configured")
}
// Check for published audio, generate if missing
audio, _ := g.store.GetPublishedAudio(ctx, module.ID)
if audio == nil {
// Try to generate audio first
var err error
audio, err = g.GenerateAudio(ctx, module)
if err != nil {
return nil, fmt.Errorf("audio generation required but failed: %w", err)
}
// Auto-publish the audio
g.store.PublishMedia(ctx, audio.ID, true)
}
// Generate video script via LLM
script, err := g.GenerateVideoScript(ctx, module)
if err != nil {
return nil, fmt.Errorf("video script generation failed: %w", err)
}
// Create media record
media := &TrainingMedia{
ModuleID: module.ID,
MediaType: MediaTypeVideo,
Status: MediaStatusProcessing,
Bucket: "compliance-training-video",
ObjectKey: fmt.Sprintf("video/%s/presentation.mp4", module.ID.String()),
MimeType: "video/mp4",
Language: "de",
GeneratedBy: "tts_ffmpeg",
}
if err := g.store.CreateMedia(ctx, media); err != nil {
return nil, fmt.Errorf("failed to create media record: %w", err)
}
// Build script map for TTS service
scriptMap := map[string]interface{}{
"title": script.Title,
"module_code": module.ModuleCode,
"sections": script.Sections,
}
// Call TTS service video generation
videoResp, err := g.ttsClient.GenerateVideo(ctx, &TTSGenerateVideoRequest{
Script: scriptMap,
AudioObjectKey: audio.ObjectKey,
ModuleID: module.ID.String(),
})
if err != nil {
g.store.UpdateMediaStatus(ctx, media.ID, MediaStatusFailed, 0, 0, err.Error())
return nil, fmt.Errorf("video generation failed: %w", err)
}
// Update media record
media.Status = MediaStatusCompleted
media.FileSizeBytes = videoResp.SizeBytes
media.DurationSeconds = videoResp.DurationSeconds
media.ObjectKey = videoResp.ObjectKey
media.Bucket = videoResp.Bucket
g.store.UpdateMediaStatus(ctx, media.ID, MediaStatusCompleted, videoResp.SizeBytes, videoResp.DurationSeconds, "")
// Audit log
g.store.LogAction(ctx, &AuditLogEntry{
TenantID: module.TenantID,
Action: AuditAction("video_generated"),
EntityType: AuditEntityModule,
EntityID: &module.ID,
Details: map[string]interface{}{
"module_code": module.ModuleCode,
"media_id": media.ID.String(),
"duration_seconds": videoResp.DurationSeconds,
"size_bytes": videoResp.SizeBytes,
"slides": len(script.Sections),
},
})
return media, nil
}
func truncateText(text string, maxLen int) string {
if len(text) <= maxLen {
return text
}
return text[:maxLen] + "..."
}
// ============================================================================
// Interactive Video Pipeline
// ============================================================================
const narratorSystemPrompt = `Du bist ein professioneller AI Teacher fuer Compliance-Schulungen.
Dein Stil ist foermlich aber freundlich, klar und paedagogisch wertvoll.
Du sprichst die Lernenden direkt an ("Sie") und fuehrst sie durch die Schulung.
Du erzeugst IMMER deutschsprachige Inhalte.
Dein Output ist ein JSON-Objekt im Format NarratorScript.
Jede Section sollte etwa 3 Minuten Sprechzeit haben (~450 Woerter Narrator-Text).
Nach jeder Section kommt ein Checkpoint mit 3-5 Quiz-Fragen.
Die Fragen testen das Verstaendnis des gerade Gelernten.
Jede Frage hat genau 4 Antwortmoeglichkeiten, wobei correct_index (0-basiert) die richtige Antwort angibt.
Antworte NUR mit dem JSON-Objekt, ohne Markdown-Codeblock-Wrapper.`
// GenerateNarratorScript generates a narrator-style video script with checkpoints via LLM
func (g *ContentGenerator) GenerateNarratorScript(ctx context.Context, module TrainingModule) (*NarratorScript, error) {
content, err := g.store.GetPublishedContent(ctx, module.ID)
if err != nil {
return nil, fmt.Errorf("failed to get content: %w", err)
}
contentContext := ""
if content != nil {
contentContext = fmt.Sprintf("\n\n**Vorhandener Schulungsinhalt (als Basis):**\n%s", truncateText(content.ContentBody, 4000))
}
prompt := fmt.Sprintf(`Erstelle ein interaktives Schulungsvideo-Skript mit Erzaehlerpersona und Checkpoints.
**Modul:** %s — %s
**Verordnung:** %s
**Beschreibung:** %s
**Dauer:** ca. %d Minuten
%s
Erstelle ein NarratorScript-JSON mit:
- "title": Titel der Schulung
- "intro": Begruessungstext ("Hallo, ich bin Ihr AI Teacher. Heute lernen Sie...")
- "sections": Array mit 3-4 Abschnitten, jeder mit:
- "heading": Abschnittsueberschrift
- "narrator_text": Fliesstext im Erzaehlstil (~450 Woerter, ~3 Min Sprechzeit)
- "bullet_points": 3-5 Kernpunkte fuer die Folie
- "transition": Ueberleitung zum naechsten Abschnitt oder Checkpoint
- "checkpoint": Quiz-Block mit:
- "title": Checkpoint-Titel
- "questions": Array mit 3-5 Fragen, je:
- "question": Fragetext
- "options": Array mit 4 Antworten
- "correct_index": Index der richtigen Antwort (0-basiert)
- "explanation": Erklaerung der richtigen Antwort
- "outro": Abschlussworte
- "total_duration_estimate": geschaetzte Gesamtdauer in Sekunden
Antworte NUR mit dem JSON-Objekt.`,
module.ModuleCode, module.Title,
string(module.RegulationArea),
module.Description,
module.DurationMinutes,
contentContext,
)
resp, err := g.registry.Chat(ctx, &llm.ChatRequest{
Messages: []llm.Message{
{Role: "system", Content: narratorSystemPrompt},
{Role: "user", Content: prompt},
},
Temperature: 0.2,
MaxTokens: 8192,
})
if err != nil {
return nil, fmt.Errorf("LLM narrator script generation failed: %w", err)
}
return parseNarratorScript(resp.Message.Content)
}
// parseNarratorScript extracts a NarratorScript from LLM output
func parseNarratorScript(content string) (*NarratorScript, error) {
// Find JSON object in response
start := strings.Index(content, "{")
end := strings.LastIndex(content, "}")
if start < 0 || end <= start {
return nil, fmt.Errorf("no JSON object found in LLM response")
}
jsonStr := content[start : end+1]
var script NarratorScript
if err := json.Unmarshal([]byte(jsonStr), &script); err != nil {
return nil, fmt.Errorf("failed to parse narrator script JSON: %w", err)
}
if len(script.Sections) == 0 {
return nil, fmt.Errorf("narrator script has no sections")
}
return &script, nil
}
// GenerateInteractiveVideo orchestrates the full interactive video pipeline:
// NarratorScript → TTS Audio → Slides+Video → DB Checkpoints + Quiz Questions
func (g *ContentGenerator) GenerateInteractiveVideo(ctx context.Context, module TrainingModule) (*TrainingMedia, error) {
if g.ttsClient == nil {
return nil, fmt.Errorf("TTS client not configured")
}
// 1. Generate NarratorScript via LLM
script, err := g.GenerateNarratorScript(ctx, module)
if err != nil {
return nil, fmt.Errorf("narrator script generation failed: %w", err)
}
// 2. Synthesize audio per section via TTS service
sections := make([]SectionAudio, len(script.Sections))
for i, s := range script.Sections {
// Combine narrator text with intro/outro for first/last section
text := s.NarratorText
if i == 0 && script.Intro != "" {
text = script.Intro + "\n\n" + text
}
if i == len(script.Sections)-1 && script.Outro != "" {
text = text + "\n\n" + script.Outro
}
sections[i] = SectionAudio{
Text: text,
Heading: s.Heading,
}
}
audioResp, err := g.ttsClient.SynthesizeSections(ctx, &SynthesizeSectionsRequest{
Sections: sections,
Voice: "de_DE-thorsten-high",
ModuleID: module.ID.String(),
})
if err != nil {
return nil, fmt.Errorf("section audio synthesis failed: %w", err)
}
// 3. Generate interactive video via TTS service
videoResp, err := g.ttsClient.GenerateInteractiveVideo(ctx, &GenerateInteractiveVideoRequest{
Script: script,
Audio: audioResp,
ModuleID: module.ID.String(),
})
if err != nil {
return nil, fmt.Errorf("interactive video generation failed: %w", err)
}
// 4. Save TrainingMedia record
scriptJSON, _ := json.Marshal(script)
media := &TrainingMedia{
ModuleID: module.ID,
MediaType: MediaTypeInteractiveVideo,
Status: MediaStatusProcessing,
Bucket: "compliance-training-video",
ObjectKey: fmt.Sprintf("video/%s/interactive.mp4", module.ID.String()),
MimeType: "video/mp4",
Language: "de",
GeneratedBy: "tts_ffmpeg_interactive",
Metadata: scriptJSON,
}
if err := g.store.CreateMedia(ctx, media); err != nil {
return nil, fmt.Errorf("failed to create media record: %w", err)
}
// Update media with video result
media.Status = MediaStatusCompleted
media.FileSizeBytes = videoResp.SizeBytes
media.DurationSeconds = videoResp.DurationSeconds
media.ObjectKey = videoResp.ObjectKey
media.Bucket = videoResp.Bucket
g.store.UpdateMediaStatus(ctx, media.ID, MediaStatusCompleted, videoResp.SizeBytes, videoResp.DurationSeconds, "")
// Auto-publish
g.store.PublishMedia(ctx, media.ID, true)
// 5. Create Checkpoints + Quiz Questions in DB
// Clear old checkpoints first
g.store.DeleteCheckpointsForModule(ctx, module.ID)
for i, section := range script.Sections {
if section.Checkpoint == nil {
continue
}
// Calculate timestamp from cumulative audio durations
var timestamp float64
if i < len(audioResp.Sections) {
// Checkpoint timestamp = end of this section's audio
timestamp = audioResp.Sections[i].StartTimestamp + audioResp.Sections[i].Duration
}
cp := &Checkpoint{
ModuleID: module.ID,
CheckpointIndex: i,
Title: section.Checkpoint.Title,
TimestampSeconds: timestamp,
}
if err := g.store.CreateCheckpoint(ctx, cp); err != nil {
return nil, fmt.Errorf("failed to create checkpoint %d: %w", i, err)
}
// Save quiz questions for this checkpoint
for j, q := range section.Checkpoint.Questions {
question := &QuizQuestion{
ModuleID: module.ID,
Question: q.Question,
Options: q.Options,
CorrectIndex: q.CorrectIndex,
Explanation: q.Explanation,
Difficulty: DifficultyMedium,
SortOrder: j,
}
if err := g.store.CreateCheckpointQuizQuestion(ctx, question, cp.ID); err != nil {
return nil, fmt.Errorf("failed to create checkpoint question: %w", err)
}
}
}
// 6. Audit log
g.store.LogAction(ctx, &AuditLogEntry{
TenantID: module.TenantID,
Action: AuditAction("interactive_video_generated"),
EntityType: AuditEntityModule,
EntityID: &module.ID,
Details: map[string]interface{}{
"module_code": module.ModuleCode,
"media_id": media.ID.String(),
"duration_seconds": videoResp.DurationSeconds,
"sections": len(script.Sections),
"checkpoints": countCheckpoints(script),
},
})
return media, nil
}
func countCheckpoints(script *NarratorScript) int {
count := 0
for _, s := range script.Sections {
if s.Checkpoint != nil {
count++
}
}
return count
}