package training import ( "context" "encoding/json" "fmt" "strings" "github.com/breakpilot/ai-compliance-sdk/internal/llm" "github.com/google/uuid" ) // ContentGenerator generates training content and quiz questions via LLM type ContentGenerator struct { registry *llm.ProviderRegistry piiDetector *llm.PIIDetector store *Store ttsClient *TTSClient } // NewContentGenerator creates a new content generator func NewContentGenerator(registry *llm.ProviderRegistry, piiDetector *llm.PIIDetector, store *Store, ttsClient *TTSClient) *ContentGenerator { return &ContentGenerator{ registry: registry, piiDetector: piiDetector, store: store, ttsClient: ttsClient, } } // GenerateModuleContent generates training content for a module via LLM func (g *ContentGenerator) GenerateModuleContent(ctx context.Context, module TrainingModule, language string) (*ModuleContent, error) { if language == "" { language = "de" } prompt := buildContentPrompt(module, language) resp, err := g.registry.Chat(ctx, &llm.ChatRequest{ Messages: []llm.Message{ {Role: "system", Content: getContentSystemPrompt(language)}, {Role: "user", Content: prompt}, }, Temperature: 0.15, MaxTokens: 4096, }) if err != nil { return nil, fmt.Errorf("LLM content generation failed: %w", err) } contentBody := resp.Message.Content // PII check on generated content if g.piiDetector != nil && g.piiDetector.ContainsPII(contentBody) { findings := g.piiDetector.FindPII(contentBody) for _, f := range findings { contentBody = strings.ReplaceAll(contentBody, f.Match, "[REDACTED]") } } // Create summary (first 200 chars) summary := contentBody if len(summary) > 200 { summary = summary[:200] + "..." } content := &ModuleContent{ ModuleID: module.ID, ContentFormat: ContentFormatMarkdown, ContentBody: contentBody, Summary: summary, GeneratedBy: "llm_" + resp.Provider, LLMModel: resp.Model, IsPublished: false, } if err := g.store.CreateModuleContent(ctx, content); err != nil { return nil, fmt.Errorf("failed to save content: %w", err) } // Audit log g.store.LogAction(ctx, &AuditLogEntry{ TenantID: module.TenantID, Action: AuditActionContentGenerated, EntityType: AuditEntityModule, EntityID: &module.ID, Details: map[string]interface{}{ "module_code": module.ModuleCode, "provider": resp.Provider, "model": resp.Model, "content_id": content.ID.String(), "version": content.Version, "tokens_used": resp.Usage.TotalTokens, }, }) return content, nil } // GenerateQuizQuestions generates quiz questions for a module based on its content func (g *ContentGenerator) GenerateQuizQuestions(ctx context.Context, module TrainingModule, count int) ([]QuizQuestion, error) { if count <= 0 { count = 5 } // Get the published content for context content, err := g.store.GetPublishedContent(ctx, module.ID) if err != nil { return nil, err } contentContext := "" if content != nil { contentContext = content.ContentBody } prompt := buildQuizPrompt(module, contentContext, count) resp, err := g.registry.Chat(ctx, &llm.ChatRequest{ Messages: []llm.Message{ {Role: "system", Content: getQuizSystemPrompt()}, {Role: "user", Content: prompt}, }, Temperature: 0.2, MaxTokens: 4096, }) if err != nil { return nil, fmt.Errorf("LLM quiz generation failed: %w", err) } // Parse the JSON response questions, err := parseQuizResponse(resp.Message.Content, module.ID) if err != nil { return nil, fmt.Errorf("failed to parse quiz response: %w", err) } // Save questions to store for i := range questions { questions[i].SortOrder = i + 1 if err := g.store.CreateQuizQuestion(ctx, &questions[i]); err != nil { return nil, fmt.Errorf("failed to save question %d: %w", i+1, err) } } return questions, nil } // ============================================================================ // Prompt Templates // ============================================================================ func getContentSystemPrompt(language string) string { if language == "en" { return "You are a compliance training content expert. Generate professional, accurate training material in Markdown format. Focus on practical relevance and legal accuracy. Do not include any personal data or fictional names." } return "Du bist ein Experte fuer Compliance-Schulungsinhalte. Erstelle professionelle, praezise Schulungsmaterialien im Markdown-Format. Fokussiere dich auf praktische Relevanz und rechtliche Genauigkeit. Verwende keine personenbezogenen Daten oder fiktiven Namen." } func getQuizSystemPrompt() string { return `Du bist ein Experte fuer Compliance-Pruefungsfragen. Erstelle Multiple-Choice-Fragen als JSON-Array. Jede Frage hat genau 4 Antwortoptionen, davon genau eine richtige. Antworte NUR mit dem JSON-Array, ohne zusaetzlichen Text. Format: [ { "question": "Frage hier?", "options": ["Option A", "Option B", "Option C", "Option D"], "correct_index": 0, "explanation": "Erklaerung warum Option A richtig ist.", "difficulty": "medium" } ]` } func buildContentPrompt(module TrainingModule, language string) string { regulationLabels := map[RegulationArea]string{ RegulationDSGVO: "Datenschutz-Grundverordnung (DSGVO)", RegulationNIS2: "NIS-2-Richtlinie", RegulationISO27001: "ISO 27001 / ISMS", RegulationAIAct: "EU AI Act / KI-Verordnung", RegulationGeschGehG: "Geschaeftsgeheimnisgesetz (GeschGehG)", RegulationHinSchG: "Hinweisgeberschutzgesetz (HinSchG)", } regulation := regulationLabels[module.RegulationArea] if regulation == "" { regulation = string(module.RegulationArea) } return fmt.Sprintf(`Erstelle Schulungsmaterial fuer folgendes Compliance-Modul: **Modulcode:** %s **Titel:** %s **Beschreibung:** %s **Regulierungsbereich:** %s **Dauer:** %d Minuten **NIS2-relevant:** %v Das Material soll: 1. Eine kurze Einfuehrung in das Thema geben 2. Die wichtigsten rechtlichen Grundlagen erklaeren 3. Praktische Handlungsanweisungen fuer den Arbeitsalltag enthalten 4. Typische Fehler und Risiken aufzeigen 5. Eine Zusammenfassung der Kernpunkte bieten Verwende klare, verstaendliche Sprache. Zielgruppe sind Mitarbeiter in Unternehmen (50-1.500 MA). Formatiere den Inhalt als Markdown mit Ueberschriften, Aufzaehlungen und Hervorhebungen.`, module.ModuleCode, module.Title, module.Description, regulation, module.DurationMinutes, module.NIS2Relevant) } func buildQuizPrompt(module TrainingModule, contentContext string, count int) string { prompt := fmt.Sprintf(`Erstelle %d Multiple-Choice-Pruefungsfragen fuer das Compliance-Modul: **Modulcode:** %s **Titel:** %s **Regulierungsbereich:** %s`, count, module.ModuleCode, module.Title, string(module.RegulationArea)) if contentContext != "" { // Truncate content to avoid token limit if len(contentContext) > 3000 { contentContext = contentContext[:3000] + "..." } prompt += fmt.Sprintf(` **Schulungsinhalt als Kontext:** %s`, contentContext) } prompt += fmt.Sprintf(` Erstelle genau %d Fragen mit je 4 Antwortoptionen. Verteile die Schwierigkeitsgrade: easy, medium, hard. Antworte NUR mit dem JSON-Array.`, count) return prompt } // parseQuizResponse parses LLM JSON response into QuizQuestion structs func parseQuizResponse(response string, moduleID uuid.UUID) ([]QuizQuestion, error) { // Try to extract JSON from the response (LLM might add text around it) jsonStr := response start := strings.Index(response, "[") end := strings.LastIndex(response, "]") if start >= 0 && end > start { jsonStr = response[start : end+1] } type rawQuestion struct { Question string `json:"question"` Options []string `json:"options"` CorrectIndex int `json:"correct_index"` Explanation string `json:"explanation"` Difficulty string `json:"difficulty"` } var rawQuestions []rawQuestion if err := json.Unmarshal([]byte(jsonStr), &rawQuestions); err != nil { return nil, fmt.Errorf("invalid JSON from LLM: %w", err) } var questions []QuizQuestion for _, rq := range rawQuestions { difficulty := Difficulty(rq.Difficulty) if difficulty != DifficultyEasy && difficulty != DifficultyMedium && difficulty != DifficultyHard { difficulty = DifficultyMedium } q := QuizQuestion{ ModuleID: moduleID, Question: rq.Question, Options: rq.Options, CorrectIndex: rq.CorrectIndex, Explanation: rq.Explanation, Difficulty: difficulty, IsActive: true, } if len(q.Options) != 4 { continue // Skip malformed questions } if q.CorrectIndex < 0 || q.CorrectIndex >= len(q.Options) { continue } questions = append(questions, q) } if questions == nil { questions = []QuizQuestion{} } return questions, nil } // GenerateAllModuleContent generates text content for all modules that don't have published content yet func (g *ContentGenerator) GenerateAllModuleContent(ctx context.Context, tenantID uuid.UUID, language string) (*BulkResult, error) { if language == "" { language = "de" } modules, _, err := g.store.ListModules(ctx, tenantID, &ModuleFilters{Limit: 100}) if err != nil { return nil, fmt.Errorf("failed to list modules: %w", err) } result := &BulkResult{} for _, module := range modules { // Check if module already has published content content, _ := g.store.GetPublishedContent(ctx, module.ID) if content != nil { result.Skipped++ continue } _, err := g.GenerateModuleContent(ctx, module, language) if err != nil { result.Errors = append(result.Errors, fmt.Sprintf("%s: %v", module.ModuleCode, err)) continue } result.Generated++ } return result, nil } // GenerateAllQuizQuestions generates quiz questions for all modules that don't have questions yet func (g *ContentGenerator) GenerateAllQuizQuestions(ctx context.Context, tenantID uuid.UUID, count int) (*BulkResult, error) { if count <= 0 { count = 5 } modules, _, err := g.store.ListModules(ctx, tenantID, &ModuleFilters{Limit: 100}) if err != nil { return nil, fmt.Errorf("failed to list modules: %w", err) } result := &BulkResult{} for _, module := range modules { // Check if module already has quiz questions questions, _ := g.store.ListQuizQuestions(ctx, module.ID) if len(questions) > 0 { result.Skipped++ continue } _, err := g.GenerateQuizQuestions(ctx, module, count) if err != nil { result.Errors = append(result.Errors, fmt.Sprintf("%s: %v", module.ModuleCode, err)) continue } result.Generated++ } return result, nil } // GenerateAudio generates audio for a module using the TTS service func (g *ContentGenerator) GenerateAudio(ctx context.Context, module TrainingModule) (*TrainingMedia, error) { // Get published content content, err := g.store.GetPublishedContent(ctx, module.ID) if err != nil { return nil, fmt.Errorf("failed to get content: %w", err) } if content == nil { return nil, fmt.Errorf("no published content for module %s", module.ModuleCode) } if g.ttsClient == nil { return nil, fmt.Errorf("TTS client not configured") } // Create media record (processing) media := &TrainingMedia{ ModuleID: module.ID, ContentID: &content.ID, MediaType: MediaTypeAudio, Status: MediaStatusProcessing, Bucket: "compliance-training-audio", ObjectKey: fmt.Sprintf("audio/%s/%s.mp3", module.ID.String(), content.ID.String()), MimeType: "audio/mpeg", VoiceModel: "de_DE-thorsten-high", Language: "de", GeneratedBy: "tts_piper", } if err := g.store.CreateMedia(ctx, media); err != nil { return nil, fmt.Errorf("failed to create media record: %w", err) } // Call TTS service ttsResp, err := g.ttsClient.Synthesize(ctx, &TTSSynthesizeRequest{ Text: content.ContentBody, Language: "de", Voice: "thorsten-high", ModuleID: module.ID.String(), ContentID: content.ID.String(), }) if err != nil { g.store.UpdateMediaStatus(ctx, media.ID, MediaStatusFailed, 0, 0, err.Error()) return nil, fmt.Errorf("TTS synthesis failed: %w", err) } // Update media record media.Status = MediaStatusCompleted media.FileSizeBytes = ttsResp.SizeBytes media.DurationSeconds = ttsResp.DurationSeconds media.ObjectKey = ttsResp.ObjectKey media.Bucket = ttsResp.Bucket g.store.UpdateMediaStatus(ctx, media.ID, MediaStatusCompleted, ttsResp.SizeBytes, ttsResp.DurationSeconds, "") // Audit log g.store.LogAction(ctx, &AuditLogEntry{ TenantID: module.TenantID, Action: AuditAction("audio_generated"), EntityType: AuditEntityModule, EntityID: &module.ID, Details: map[string]interface{}{ "module_code": module.ModuleCode, "media_id": media.ID.String(), "duration_seconds": ttsResp.DurationSeconds, "size_bytes": ttsResp.SizeBytes, }, }) return media, nil } // VideoScript represents a structured presentation script type VideoScript struct { Title string `json:"title"` Sections []VideoScriptSection `json:"sections"` } // VideoScriptSection is one slide in the presentation type VideoScriptSection struct { Heading string `json:"heading"` Text string `json:"text"` BulletPoints []string `json:"bullet_points"` } // GenerateVideoScript generates a structured video script from module content via LLM func (g *ContentGenerator) GenerateVideoScript(ctx context.Context, module TrainingModule) (*VideoScript, error) { content, err := g.store.GetPublishedContent(ctx, module.ID) if err != nil { return nil, fmt.Errorf("failed to get content: %w", err) } if content == nil { return nil, fmt.Errorf("no published content for module %s", module.ModuleCode) } prompt := fmt.Sprintf(`Erstelle ein strukturiertes Folien-Script fuer eine Praesentations-Video-Schulung. **Modul:** %s — %s **Inhalt:** %s Erstelle 5-8 Folien. Jede Folie hat: - heading: Kurze Ueberschrift (max 60 Zeichen) - text: Erklaerungstext (1-2 Saetze) - bullet_points: 2-4 Kernpunkte Antworte NUR mit einem JSON-Objekt in diesem Format: { "title": "Titel der Praesentation", "sections": [ { "heading": "Folienueberschrift", "text": "Erklaerungstext fuer diese Folie.", "bullet_points": ["Punkt 1", "Punkt 2", "Punkt 3"] } ] }`, module.ModuleCode, module.Title, truncateText(content.ContentBody, 3000)) resp, err := g.registry.Chat(ctx, &llm.ChatRequest{ Messages: []llm.Message{ {Role: "system", Content: "Du bist ein Experte fuer Compliance-Schulungspraesentationen. Erstelle strukturierte Folien-Scripts als JSON. Antworte NUR mit dem JSON-Objekt."}, {Role: "user", Content: prompt}, }, Temperature: 0.15, MaxTokens: 4096, }) if err != nil { return nil, fmt.Errorf("LLM video script generation failed: %w", err) } // Parse JSON response var script VideoScript jsonStr := resp.Message.Content start := strings.Index(jsonStr, "{") end := strings.LastIndex(jsonStr, "}") if start >= 0 && end > start { jsonStr = jsonStr[start : end+1] } if err := json.Unmarshal([]byte(jsonStr), &script); err != nil { return nil, fmt.Errorf("failed to parse video script JSON: %w", err) } if len(script.Sections) == 0 { return nil, fmt.Errorf("video script has no sections") } return &script, nil } // GenerateVideo generates a presentation video for a module func (g *ContentGenerator) GenerateVideo(ctx context.Context, module TrainingModule) (*TrainingMedia, error) { if g.ttsClient == nil { return nil, fmt.Errorf("TTS client not configured") } // Check for published audio, generate if missing audio, _ := g.store.GetPublishedAudio(ctx, module.ID) if audio == nil { // Try to generate audio first var err error audio, err = g.GenerateAudio(ctx, module) if err != nil { return nil, fmt.Errorf("audio generation required but failed: %w", err) } // Auto-publish the audio g.store.PublishMedia(ctx, audio.ID, true) } // Generate video script via LLM script, err := g.GenerateVideoScript(ctx, module) if err != nil { return nil, fmt.Errorf("video script generation failed: %w", err) } // Create media record media := &TrainingMedia{ ModuleID: module.ID, MediaType: MediaTypeVideo, Status: MediaStatusProcessing, Bucket: "compliance-training-video", ObjectKey: fmt.Sprintf("video/%s/presentation.mp4", module.ID.String()), MimeType: "video/mp4", Language: "de", GeneratedBy: "tts_ffmpeg", } if err := g.store.CreateMedia(ctx, media); err != nil { return nil, fmt.Errorf("failed to create media record: %w", err) } // Build script map for TTS service scriptMap := map[string]interface{}{ "title": script.Title, "module_code": module.ModuleCode, "sections": script.Sections, } // Call TTS service video generation videoResp, err := g.ttsClient.GenerateVideo(ctx, &TTSGenerateVideoRequest{ Script: scriptMap, AudioObjectKey: audio.ObjectKey, ModuleID: module.ID.String(), }) if err != nil { g.store.UpdateMediaStatus(ctx, media.ID, MediaStatusFailed, 0, 0, err.Error()) return nil, fmt.Errorf("video generation failed: %w", err) } // Update media record media.Status = MediaStatusCompleted media.FileSizeBytes = videoResp.SizeBytes media.DurationSeconds = videoResp.DurationSeconds media.ObjectKey = videoResp.ObjectKey media.Bucket = videoResp.Bucket g.store.UpdateMediaStatus(ctx, media.ID, MediaStatusCompleted, videoResp.SizeBytes, videoResp.DurationSeconds, "") // Audit log g.store.LogAction(ctx, &AuditLogEntry{ TenantID: module.TenantID, Action: AuditAction("video_generated"), EntityType: AuditEntityModule, EntityID: &module.ID, Details: map[string]interface{}{ "module_code": module.ModuleCode, "media_id": media.ID.String(), "duration_seconds": videoResp.DurationSeconds, "size_bytes": videoResp.SizeBytes, "slides": len(script.Sections), }, }) return media, nil } func truncateText(text string, maxLen int) string { if len(text) <= maxLen { return text } return text[:maxLen] + "..." }