package training import ( "context" "encoding/json" "fmt" "strings" "github.com/breakpilot/ai-compliance-sdk/internal/llm" ) // VideoScript represents a structured presentation script type VideoScript struct { Title string `json:"title"` Sections []VideoScriptSection `json:"sections"` } // VideoScriptSection is one slide in the presentation type VideoScriptSection struct { Heading string `json:"heading"` Text string `json:"text"` BulletPoints []string `json:"bullet_points"` } // GenerateAudio generates audio for a module using the TTS service func (g *ContentGenerator) GenerateAudio(ctx context.Context, module TrainingModule) (*TrainingMedia, error) { // Get published content content, err := g.store.GetPublishedContent(ctx, module.ID) if err != nil { return nil, fmt.Errorf("failed to get content: %w", err) } if content == nil { return nil, fmt.Errorf("no published content for module %s", module.ModuleCode) } if g.ttsClient == nil { return nil, fmt.Errorf("TTS client not configured") } // Create media record (processing) media := &TrainingMedia{ ModuleID: module.ID, ContentID: &content.ID, MediaType: MediaTypeAudio, Status: MediaStatusProcessing, Bucket: "compliance-training-audio", ObjectKey: fmt.Sprintf("audio/%s/%s.mp3", module.ID.String(), content.ID.String()), MimeType: "audio/mpeg", VoiceModel: "de_DE-thorsten-high", Language: "de", GeneratedBy: "tts_piper", } if err := g.store.CreateMedia(ctx, media); err != nil { return nil, fmt.Errorf("failed to create media record: %w", err) } // Call TTS service ttsResp, err := g.ttsClient.Synthesize(ctx, &TTSSynthesizeRequest{ Text: content.ContentBody, Language: "de", Voice: "thorsten-high", ModuleID: module.ID.String(), ContentID: content.ID.String(), }) if err != nil { g.store.UpdateMediaStatus(ctx, media.ID, MediaStatusFailed, 0, 0, err.Error()) return nil, fmt.Errorf("TTS synthesis failed: %w", err) } // Update media record media.Status = MediaStatusCompleted media.FileSizeBytes = ttsResp.SizeBytes media.DurationSeconds = ttsResp.DurationSeconds media.ObjectKey = ttsResp.ObjectKey media.Bucket = ttsResp.Bucket g.store.UpdateMediaStatus(ctx, media.ID, MediaStatusCompleted, ttsResp.SizeBytes, ttsResp.DurationSeconds, "") // Audit log g.store.LogAction(ctx, &AuditLogEntry{ TenantID: module.TenantID, Action: AuditAction("audio_generated"), EntityType: AuditEntityModule, EntityID: &module.ID, Details: map[string]interface{}{ "module_code": module.ModuleCode, "media_id": media.ID.String(), "duration_seconds": ttsResp.DurationSeconds, "size_bytes": ttsResp.SizeBytes, }, }) return media, nil } // GenerateVideoScript generates a structured video script from module content via LLM func (g *ContentGenerator) GenerateVideoScript(ctx context.Context, module TrainingModule) (*VideoScript, error) { content, err := g.store.GetPublishedContent(ctx, module.ID) if err != nil { return nil, fmt.Errorf("failed to get content: %w", err) } if content == nil { return nil, fmt.Errorf("no published content for module %s", module.ModuleCode) } prompt := fmt.Sprintf(`Erstelle ein strukturiertes Folien-Script fuer eine Praesentations-Video-Schulung. **Modul:** %s — %s **Inhalt:** %s Erstelle 5-8 Folien. Jede Folie hat: - heading: Kurze Ueberschrift (max 60 Zeichen) - text: Erklaerungstext (1-2 Saetze) - bullet_points: 2-4 Kernpunkte Antworte NUR mit einem JSON-Objekt in diesem Format: { "title": "Titel der Praesentation", "sections": [ { "heading": "Folienueberschrift", "text": "Erklaerungstext fuer diese Folie.", "bullet_points": ["Punkt 1", "Punkt 2", "Punkt 3"] } ] }`, module.ModuleCode, module.Title, truncateText(content.ContentBody, 3000)) resp, err := g.registry.Chat(ctx, &llm.ChatRequest{ Messages: []llm.Message{ {Role: "system", Content: "Du bist ein Experte fuer Compliance-Schulungspraesentationen. Erstelle strukturierte Folien-Scripts als JSON. Antworte NUR mit dem JSON-Objekt."}, {Role: "user", Content: prompt}, }, Temperature: 0.15, MaxTokens: 4096, }) if err != nil { return nil, fmt.Errorf("LLM video script generation failed: %w", err) } // Parse JSON response var script VideoScript jsonStr := resp.Message.Content start := strings.Index(jsonStr, "{") end := strings.LastIndex(jsonStr, "}") if start >= 0 && end > start { jsonStr = jsonStr[start : end+1] } if err := json.Unmarshal([]byte(jsonStr), &script); err != nil { return nil, fmt.Errorf("failed to parse video script JSON: %w", err) } if len(script.Sections) == 0 { return nil, fmt.Errorf("video script has no sections") } return &script, nil } // GenerateVideo generates a presentation video for a module func (g *ContentGenerator) GenerateVideo(ctx context.Context, module TrainingModule) (*TrainingMedia, error) { if g.ttsClient == nil { return nil, fmt.Errorf("TTS client not configured") } // Check for published audio, generate if missing audio, _ := g.store.GetPublishedAudio(ctx, module.ID) if audio == nil { // Try to generate audio first var err error audio, err = g.GenerateAudio(ctx, module) if err != nil { return nil, fmt.Errorf("audio generation required but failed: %w", err) } // Auto-publish the audio g.store.PublishMedia(ctx, audio.ID, true) } // Generate video script via LLM script, err := g.GenerateVideoScript(ctx, module) if err != nil { return nil, fmt.Errorf("video script generation failed: %w", err) } // Create media record media := &TrainingMedia{ ModuleID: module.ID, MediaType: MediaTypeVideo, Status: MediaStatusProcessing, Bucket: "compliance-training-video", ObjectKey: fmt.Sprintf("video/%s/presentation.mp4", module.ID.String()), MimeType: "video/mp4", Language: "de", GeneratedBy: "tts_ffmpeg", } if err := g.store.CreateMedia(ctx, media); err != nil { return nil, fmt.Errorf("failed to create media record: %w", err) } // Build script map for TTS service scriptMap := map[string]interface{}{ "title": script.Title, "module_code": module.ModuleCode, "sections": script.Sections, } // Call TTS service video generation videoResp, err := g.ttsClient.GenerateVideo(ctx, &TTSGenerateVideoRequest{ Script: scriptMap, AudioObjectKey: audio.ObjectKey, ModuleID: module.ID.String(), }) if err != nil { g.store.UpdateMediaStatus(ctx, media.ID, MediaStatusFailed, 0, 0, err.Error()) return nil, fmt.Errorf("video generation failed: %w", err) } // Update media record media.Status = MediaStatusCompleted media.FileSizeBytes = videoResp.SizeBytes media.DurationSeconds = videoResp.DurationSeconds media.ObjectKey = videoResp.ObjectKey media.Bucket = videoResp.Bucket g.store.UpdateMediaStatus(ctx, media.ID, MediaStatusCompleted, videoResp.SizeBytes, videoResp.DurationSeconds, "") // Audit log g.store.LogAction(ctx, &AuditLogEntry{ TenantID: module.TenantID, Action: AuditAction("video_generated"), EntityType: AuditEntityModule, EntityID: &module.ID, Details: map[string]interface{}{ "module_code": module.ModuleCode, "media_id": media.ID.String(), "duration_seconds": videoResp.DurationSeconds, "size_bytes": videoResp.SizeBytes, "slides": len(script.Sections), }, }) return media, nil } // ============================================================================ // Interactive Video Pipeline // ============================================================================ const narratorSystemPrompt = `Du bist ein professioneller AI Teacher fuer Compliance-Schulungen. Dein Stil ist foermlich aber freundlich, klar und paedagogisch wertvoll. Du sprichst die Lernenden direkt an ("Sie") und fuehrst sie durch die Schulung. Du erzeugst IMMER deutschsprachige Inhalte. Dein Output ist ein JSON-Objekt im Format NarratorScript. Jede Section sollte etwa 3 Minuten Sprechzeit haben (~450 Woerter Narrator-Text). Nach jeder Section kommt ein Checkpoint mit 3-5 Quiz-Fragen. Die Fragen testen das Verstaendnis des gerade Gelernten. Jede Frage hat genau 4 Antwortmoeglichkeiten, wobei correct_index (0-basiert) die richtige Antwort angibt. Antworte NUR mit dem JSON-Objekt, ohne Markdown-Codeblock-Wrapper.` // GenerateNarratorScript generates a narrator-style video script with checkpoints via LLM func (g *ContentGenerator) GenerateNarratorScript(ctx context.Context, module TrainingModule) (*NarratorScript, error) { content, err := g.store.GetPublishedContent(ctx, module.ID) if err != nil { return nil, fmt.Errorf("failed to get content: %w", err) } contentContext := "" if content != nil { contentContext = fmt.Sprintf("\n\n**Vorhandener Schulungsinhalt (als Basis):**\n%s", truncateText(content.ContentBody, 4000)) } prompt := fmt.Sprintf(`Erstelle ein interaktives Schulungsvideo-Skript mit Erzaehlerpersona und Checkpoints. **Modul:** %s — %s **Verordnung:** %s **Beschreibung:** %s **Dauer:** ca. %d Minuten %s Erstelle ein NarratorScript-JSON mit: - "title": Titel der Schulung - "intro": Begruessungstext ("Hallo, ich bin Ihr AI Teacher. Heute lernen Sie...") - "sections": Array mit 3-4 Abschnitten, jeder mit: - "heading": Abschnittsueberschrift - "narrator_text": Fliesstext im Erzaehlstil (~450 Woerter, ~3 Min Sprechzeit) - "bullet_points": 3-5 Kernpunkte fuer die Folie - "transition": Ueberleitung zum naechsten Abschnitt oder Checkpoint - "checkpoint": Quiz-Block mit: - "title": Checkpoint-Titel - "questions": Array mit 3-5 Fragen, je: - "question": Fragetext - "options": Array mit 4 Antworten - "correct_index": Index der richtigen Antwort (0-basiert) - "explanation": Erklaerung der richtigen Antwort - "outro": Abschlussworte - "total_duration_estimate": geschaetzte Gesamtdauer in Sekunden Antworte NUR mit dem JSON-Objekt.`, module.ModuleCode, module.Title, string(module.RegulationArea), module.Description, module.DurationMinutes, contentContext, ) resp, err := g.registry.Chat(ctx, &llm.ChatRequest{ Messages: []llm.Message{ {Role: "system", Content: narratorSystemPrompt}, {Role: "user", Content: prompt}, }, Temperature: 0.2, MaxTokens: 8192, }) if err != nil { return nil, fmt.Errorf("LLM narrator script generation failed: %w", err) } return parseNarratorScript(resp.Message.Content) } // parseNarratorScript extracts a NarratorScript from LLM output func parseNarratorScript(content string) (*NarratorScript, error) { // Find JSON object in response start := strings.Index(content, "{") end := strings.LastIndex(content, "}") if start < 0 || end <= start { return nil, fmt.Errorf("no JSON object found in LLM response") } jsonStr := content[start : end+1] var script NarratorScript if err := json.Unmarshal([]byte(jsonStr), &script); err != nil { return nil, fmt.Errorf("failed to parse narrator script JSON: %w", err) } if len(script.Sections) == 0 { return nil, fmt.Errorf("narrator script has no sections") } return &script, nil } // GenerateInteractiveVideo orchestrates the full interactive video pipeline: // NarratorScript → TTS Audio → Slides+Video → DB Checkpoints + Quiz Questions func (g *ContentGenerator) GenerateInteractiveVideo(ctx context.Context, module TrainingModule) (*TrainingMedia, error) { if g.ttsClient == nil { return nil, fmt.Errorf("TTS client not configured") } // 1. Generate NarratorScript via LLM script, err := g.GenerateNarratorScript(ctx, module) if err != nil { return nil, fmt.Errorf("narrator script generation failed: %w", err) } // 2. Synthesize audio per section via TTS service sections := make([]SectionAudio, len(script.Sections)) for i, s := range script.Sections { // Combine narrator text with intro/outro for first/last section text := s.NarratorText if i == 0 && script.Intro != "" { text = script.Intro + "\n\n" + text } if i == len(script.Sections)-1 && script.Outro != "" { text = text + "\n\n" + script.Outro } sections[i] = SectionAudio{ Text: text, Heading: s.Heading, } } audioResp, err := g.ttsClient.SynthesizeSections(ctx, &SynthesizeSectionsRequest{ Sections: sections, Voice: "de_DE-thorsten-high", ModuleID: module.ID.String(), }) if err != nil { return nil, fmt.Errorf("section audio synthesis failed: %w", err) } // 3. Generate interactive video via TTS service videoResp, err := g.ttsClient.GenerateInteractiveVideo(ctx, &GenerateInteractiveVideoRequest{ Script: script, Audio: audioResp, ModuleID: module.ID.String(), }) if err != nil { return nil, fmt.Errorf("interactive video generation failed: %w", err) } // 4. Save TrainingMedia record scriptJSON, _ := json.Marshal(script) media := &TrainingMedia{ ModuleID: module.ID, MediaType: MediaTypeInteractiveVideo, Status: MediaStatusProcessing, Bucket: "compliance-training-video", ObjectKey: fmt.Sprintf("video/%s/interactive.mp4", module.ID.String()), MimeType: "video/mp4", Language: "de", GeneratedBy: "tts_ffmpeg_interactive", Metadata: scriptJSON, } if err := g.store.CreateMedia(ctx, media); err != nil { return nil, fmt.Errorf("failed to create media record: %w", err) } // Update media with video result media.Status = MediaStatusCompleted media.FileSizeBytes = videoResp.SizeBytes media.DurationSeconds = videoResp.DurationSeconds media.ObjectKey = videoResp.ObjectKey media.Bucket = videoResp.Bucket g.store.UpdateMediaStatus(ctx, media.ID, MediaStatusCompleted, videoResp.SizeBytes, videoResp.DurationSeconds, "") // Auto-publish g.store.PublishMedia(ctx, media.ID, true) // 5. Create Checkpoints + Quiz Questions in DB // Clear old checkpoints first g.store.DeleteCheckpointsForModule(ctx, module.ID) for i, section := range script.Sections { if section.Checkpoint == nil { continue } // Calculate timestamp from cumulative audio durations var timestamp float64 if i < len(audioResp.Sections) { // Checkpoint timestamp = end of this section's audio timestamp = audioResp.Sections[i].StartTimestamp + audioResp.Sections[i].Duration } cp := &Checkpoint{ ModuleID: module.ID, CheckpointIndex: i, Title: section.Checkpoint.Title, TimestampSeconds: timestamp, } if err := g.store.CreateCheckpoint(ctx, cp); err != nil { return nil, fmt.Errorf("failed to create checkpoint %d: %w", i, err) } // Save quiz questions for this checkpoint for j, q := range section.Checkpoint.Questions { question := &QuizQuestion{ ModuleID: module.ID, Question: q.Question, Options: q.Options, CorrectIndex: q.CorrectIndex, Explanation: q.Explanation, Difficulty: DifficultyMedium, SortOrder: j, } if err := g.store.CreateCheckpointQuizQuestion(ctx, question, cp.ID); err != nil { return nil, fmt.Errorf("failed to create checkpoint question: %w", err) } } } // 6. Audit log g.store.LogAction(ctx, &AuditLogEntry{ TenantID: module.TenantID, Action: AuditAction("interactive_video_generated"), EntityType: AuditEntityModule, EntityID: &module.ID, Details: map[string]interface{}{ "module_code": module.ModuleCode, "media_id": media.ID.String(), "duration_seconds": videoResp.DurationSeconds, "sections": len(script.Sections), "checkpoints": countCheckpoints(script), }, }) return media, nil } func countCheckpoints(script *NarratorScript) int { count := 0 for _, s := range script.Sections { if s.Checkpoint != nil { count++ } } return count }