Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 37s
CI/CD / test-python-backend-compliance (push) Successful in 39s
CI/CD / test-python-document-crawler (push) Successful in 26s
CI/CD / test-python-dsms-gateway (push) Successful in 23s
CI/CD / validate-canonical-controls (push) Successful in 12s
CI/CD / Deploy (push) Has been skipped
Interactive Training Videos (CP-TRAIN): - DB migration 022: training_checkpoints + checkpoint_progress tables - NarratorScript generation via Anthropic (AI Teacher persona, German) - TTS batch synthesis + interactive video pipeline (slides + checkpoint slides + FFmpeg) - 4 new API endpoints: generate-interactive, interactive-manifest, checkpoint submit, checkpoint progress - InteractiveVideoPlayer component (HTML5 Video, quiz overlay, seek protection, progress tracking) - Learner portal integration with automatic completion on all checkpoints passed - 30 new tests (handler validation + grading logic + manifest/progress + seek protection) Training Blocks: - Block generator, block store, block config CRUD + preview/generate endpoints - Migration 021: training_blocks schema Control Generator + Canonical Library: - Control generator routes + service enhancements - Canonical control library helpers, sidebar entry - Citation backfill service + tests - CE libraries data (hazard, protection, evidence, lifecycle, components) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
979 lines
30 KiB
Go
979 lines
30 KiB
Go
package training
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"strings"
|
|
|
|
"github.com/breakpilot/ai-compliance-sdk/internal/llm"
|
|
"github.com/google/uuid"
|
|
)
|
|
|
|
// ContentGenerator generates training content and quiz questions via LLM
|
|
type ContentGenerator struct {
|
|
registry *llm.ProviderRegistry
|
|
piiDetector *llm.PIIDetector
|
|
store *Store
|
|
ttsClient *TTSClient
|
|
}
|
|
|
|
// NewContentGenerator creates a new content generator
|
|
func NewContentGenerator(registry *llm.ProviderRegistry, piiDetector *llm.PIIDetector, store *Store, ttsClient *TTSClient) *ContentGenerator {
|
|
return &ContentGenerator{
|
|
registry: registry,
|
|
piiDetector: piiDetector,
|
|
store: store,
|
|
ttsClient: ttsClient,
|
|
}
|
|
}
|
|
|
|
// GenerateModuleContent generates training content for a module via LLM
|
|
func (g *ContentGenerator) GenerateModuleContent(ctx context.Context, module TrainingModule, language string) (*ModuleContent, error) {
|
|
if language == "" {
|
|
language = "de"
|
|
}
|
|
|
|
prompt := buildContentPrompt(module, language)
|
|
|
|
resp, err := g.registry.Chat(ctx, &llm.ChatRequest{
|
|
Messages: []llm.Message{
|
|
{Role: "system", Content: getContentSystemPrompt(language)},
|
|
{Role: "user", Content: prompt},
|
|
},
|
|
Temperature: 0.15,
|
|
MaxTokens: 4096,
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("LLM content generation failed: %w", err)
|
|
}
|
|
|
|
contentBody := resp.Message.Content
|
|
|
|
// PII check on generated content
|
|
if g.piiDetector != nil && g.piiDetector.ContainsPII(contentBody) {
|
|
findings := g.piiDetector.FindPII(contentBody)
|
|
for _, f := range findings {
|
|
contentBody = strings.ReplaceAll(contentBody, f.Match, "[REDACTED]")
|
|
}
|
|
}
|
|
|
|
// Create summary (first 200 chars)
|
|
summary := contentBody
|
|
if len(summary) > 200 {
|
|
summary = summary[:200] + "..."
|
|
}
|
|
|
|
content := &ModuleContent{
|
|
ModuleID: module.ID,
|
|
ContentFormat: ContentFormatMarkdown,
|
|
ContentBody: contentBody,
|
|
Summary: summary,
|
|
GeneratedBy: "llm_" + resp.Provider,
|
|
LLMModel: resp.Model,
|
|
IsPublished: false,
|
|
}
|
|
|
|
if err := g.store.CreateModuleContent(ctx, content); err != nil {
|
|
return nil, fmt.Errorf("failed to save content: %w", err)
|
|
}
|
|
|
|
// Audit log
|
|
g.store.LogAction(ctx, &AuditLogEntry{
|
|
TenantID: module.TenantID,
|
|
Action: AuditActionContentGenerated,
|
|
EntityType: AuditEntityModule,
|
|
EntityID: &module.ID,
|
|
Details: map[string]interface{}{
|
|
"module_code": module.ModuleCode,
|
|
"provider": resp.Provider,
|
|
"model": resp.Model,
|
|
"content_id": content.ID.String(),
|
|
"version": content.Version,
|
|
"tokens_used": resp.Usage.TotalTokens,
|
|
},
|
|
})
|
|
|
|
return content, nil
|
|
}
|
|
|
|
// GenerateQuizQuestions generates quiz questions for a module based on its content
|
|
func (g *ContentGenerator) GenerateQuizQuestions(ctx context.Context, module TrainingModule, count int) ([]QuizQuestion, error) {
|
|
if count <= 0 {
|
|
count = 5
|
|
}
|
|
|
|
// Get the published content for context
|
|
content, err := g.store.GetPublishedContent(ctx, module.ID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
contentContext := ""
|
|
if content != nil {
|
|
contentContext = content.ContentBody
|
|
}
|
|
|
|
prompt := buildQuizPrompt(module, contentContext, count)
|
|
|
|
resp, err := g.registry.Chat(ctx, &llm.ChatRequest{
|
|
Messages: []llm.Message{
|
|
{Role: "system", Content: getQuizSystemPrompt()},
|
|
{Role: "user", Content: prompt},
|
|
},
|
|
Temperature: 0.2,
|
|
MaxTokens: 4096,
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("LLM quiz generation failed: %w", err)
|
|
}
|
|
|
|
// Parse the JSON response
|
|
questions, err := parseQuizResponse(resp.Message.Content, module.ID)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to parse quiz response: %w", err)
|
|
}
|
|
|
|
// Save questions to store
|
|
for i := range questions {
|
|
questions[i].SortOrder = i + 1
|
|
if err := g.store.CreateQuizQuestion(ctx, &questions[i]); err != nil {
|
|
return nil, fmt.Errorf("failed to save question %d: %w", i+1, err)
|
|
}
|
|
}
|
|
|
|
return questions, nil
|
|
}
|
|
|
|
// ============================================================================
|
|
// Prompt Templates
|
|
// ============================================================================
|
|
|
|
func getContentSystemPrompt(language string) string {
|
|
if language == "en" {
|
|
return "You are a compliance training content expert. Generate professional, accurate training material in Markdown format. Focus on practical relevance and legal accuracy. Do not include any personal data or fictional names."
|
|
}
|
|
return "Du bist ein Experte fuer Compliance-Schulungsinhalte. Erstelle professionelle, praezise Schulungsmaterialien im Markdown-Format. Fokussiere dich auf praktische Relevanz und rechtliche Genauigkeit. Verwende keine personenbezogenen Daten oder fiktiven Namen."
|
|
}
|
|
|
|
func getQuizSystemPrompt() string {
|
|
return `Du bist ein Experte fuer Compliance-Pruefungsfragen. Erstelle Multiple-Choice-Fragen als JSON-Array.
|
|
Jede Frage hat genau 4 Antwortoptionen, davon genau eine richtige.
|
|
Antworte NUR mit dem JSON-Array, ohne zusaetzlichen Text.
|
|
|
|
Format:
|
|
[
|
|
{
|
|
"question": "Frage hier?",
|
|
"options": ["Option A", "Option B", "Option C", "Option D"],
|
|
"correct_index": 0,
|
|
"explanation": "Erklaerung warum Option A richtig ist.",
|
|
"difficulty": "medium"
|
|
}
|
|
]`
|
|
}
|
|
|
|
func buildContentPrompt(module TrainingModule, language string) string {
|
|
regulationLabels := map[RegulationArea]string{
|
|
RegulationDSGVO: "Datenschutz-Grundverordnung (DSGVO)",
|
|
RegulationNIS2: "NIS-2-Richtlinie",
|
|
RegulationISO27001: "ISO 27001 / ISMS",
|
|
RegulationAIAct: "EU AI Act / KI-Verordnung",
|
|
RegulationGeschGehG: "Geschaeftsgeheimnisgesetz (GeschGehG)",
|
|
RegulationHinSchG: "Hinweisgeberschutzgesetz (HinSchG)",
|
|
}
|
|
|
|
regulation := regulationLabels[module.RegulationArea]
|
|
if regulation == "" {
|
|
regulation = string(module.RegulationArea)
|
|
}
|
|
|
|
return fmt.Sprintf(`Erstelle Schulungsmaterial fuer folgendes Compliance-Modul:
|
|
|
|
**Modulcode:** %s
|
|
**Titel:** %s
|
|
**Beschreibung:** %s
|
|
**Regulierungsbereich:** %s
|
|
**Dauer:** %d Minuten
|
|
**NIS2-relevant:** %v
|
|
|
|
Das Material soll:
|
|
1. Eine kurze Einfuehrung in das Thema geben
|
|
2. Die wichtigsten rechtlichen Grundlagen erklaeren
|
|
3. Praktische Handlungsanweisungen fuer den Arbeitsalltag enthalten
|
|
4. Typische Fehler und Risiken aufzeigen
|
|
5. Eine Zusammenfassung der Kernpunkte bieten
|
|
|
|
Verwende klare, verstaendliche Sprache. Zielgruppe sind Mitarbeiter in Unternehmen (50-1.500 MA).
|
|
Formatiere den Inhalt als Markdown mit Ueberschriften, Aufzaehlungen und Hervorhebungen.`,
|
|
module.ModuleCode, module.Title, module.Description,
|
|
regulation, module.DurationMinutes, module.NIS2Relevant)
|
|
}
|
|
|
|
func buildQuizPrompt(module TrainingModule, contentContext string, count int) string {
|
|
prompt := fmt.Sprintf(`Erstelle %d Multiple-Choice-Pruefungsfragen fuer das Compliance-Modul:
|
|
|
|
**Modulcode:** %s
|
|
**Titel:** %s
|
|
**Regulierungsbereich:** %s`, count, module.ModuleCode, module.Title, string(module.RegulationArea))
|
|
|
|
if contentContext != "" {
|
|
// Truncate content to avoid token limit
|
|
if len(contentContext) > 3000 {
|
|
contentContext = contentContext[:3000] + "..."
|
|
}
|
|
prompt += fmt.Sprintf(`
|
|
|
|
**Schulungsinhalt als Kontext:**
|
|
%s`, contentContext)
|
|
}
|
|
|
|
prompt += fmt.Sprintf(`
|
|
|
|
Erstelle genau %d Fragen mit je 4 Antwortoptionen.
|
|
Verteile die Schwierigkeitsgrade: easy, medium, hard.
|
|
Antworte NUR mit dem JSON-Array.`, count)
|
|
|
|
return prompt
|
|
}
|
|
|
|
// parseQuizResponse parses LLM JSON response into QuizQuestion structs
|
|
func parseQuizResponse(response string, moduleID uuid.UUID) ([]QuizQuestion, error) {
|
|
// Try to extract JSON from the response (LLM might add text around it)
|
|
jsonStr := response
|
|
start := strings.Index(response, "[")
|
|
end := strings.LastIndex(response, "]")
|
|
if start >= 0 && end > start {
|
|
jsonStr = response[start : end+1]
|
|
}
|
|
|
|
type rawQuestion struct {
|
|
Question string `json:"question"`
|
|
Options []string `json:"options"`
|
|
CorrectIndex int `json:"correct_index"`
|
|
Explanation string `json:"explanation"`
|
|
Difficulty string `json:"difficulty"`
|
|
}
|
|
|
|
var rawQuestions []rawQuestion
|
|
if err := json.Unmarshal([]byte(jsonStr), &rawQuestions); err != nil {
|
|
return nil, fmt.Errorf("invalid JSON from LLM: %w", err)
|
|
}
|
|
|
|
var questions []QuizQuestion
|
|
for _, rq := range rawQuestions {
|
|
difficulty := Difficulty(rq.Difficulty)
|
|
if difficulty != DifficultyEasy && difficulty != DifficultyMedium && difficulty != DifficultyHard {
|
|
difficulty = DifficultyMedium
|
|
}
|
|
|
|
q := QuizQuestion{
|
|
ModuleID: moduleID,
|
|
Question: rq.Question,
|
|
Options: rq.Options,
|
|
CorrectIndex: rq.CorrectIndex,
|
|
Explanation: rq.Explanation,
|
|
Difficulty: difficulty,
|
|
IsActive: true,
|
|
}
|
|
|
|
if len(q.Options) != 4 {
|
|
continue // Skip malformed questions
|
|
}
|
|
if q.CorrectIndex < 0 || q.CorrectIndex >= len(q.Options) {
|
|
continue
|
|
}
|
|
|
|
questions = append(questions, q)
|
|
}
|
|
|
|
if questions == nil {
|
|
questions = []QuizQuestion{}
|
|
}
|
|
|
|
return questions, nil
|
|
}
|
|
|
|
// GenerateBlockContent generates training content for a module based on linked canonical controls
|
|
func (g *ContentGenerator) GenerateBlockContent(
|
|
ctx context.Context,
|
|
module TrainingModule,
|
|
controls []CanonicalControlSummary,
|
|
language string,
|
|
) (*ModuleContent, error) {
|
|
if language == "" {
|
|
language = "de"
|
|
}
|
|
|
|
prompt := buildBlockContentPrompt(module, controls, language)
|
|
|
|
resp, err := g.registry.Chat(ctx, &llm.ChatRequest{
|
|
Messages: []llm.Message{
|
|
{Role: "system", Content: getContentSystemPrompt(language)},
|
|
{Role: "user", Content: prompt},
|
|
},
|
|
Temperature: 0.15,
|
|
MaxTokens: 8192,
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("LLM block content generation failed: %w", err)
|
|
}
|
|
|
|
contentBody := resp.Message.Content
|
|
|
|
// PII check
|
|
if g.piiDetector != nil && g.piiDetector.ContainsPII(contentBody) {
|
|
findings := g.piiDetector.FindPII(contentBody)
|
|
for _, f := range findings {
|
|
contentBody = strings.ReplaceAll(contentBody, f.Match, "[REDACTED]")
|
|
}
|
|
}
|
|
|
|
summary := contentBody
|
|
if len(summary) > 200 {
|
|
summary = summary[:200] + "..."
|
|
}
|
|
|
|
content := &ModuleContent{
|
|
ModuleID: module.ID,
|
|
ContentFormat: ContentFormatMarkdown,
|
|
ContentBody: contentBody,
|
|
Summary: summary,
|
|
GeneratedBy: "llm_block_" + resp.Provider,
|
|
LLMModel: resp.Model,
|
|
IsPublished: false,
|
|
}
|
|
|
|
if err := g.store.CreateModuleContent(ctx, content); err != nil {
|
|
return nil, fmt.Errorf("failed to save block content: %w", err)
|
|
}
|
|
|
|
// Audit log
|
|
g.store.LogAction(ctx, &AuditLogEntry{
|
|
TenantID: module.TenantID,
|
|
Action: AuditActionContentGenerated,
|
|
EntityType: AuditEntityModule,
|
|
EntityID: &module.ID,
|
|
Details: map[string]interface{}{
|
|
"module_code": module.ModuleCode,
|
|
"provider": resp.Provider,
|
|
"model": resp.Model,
|
|
"content_id": content.ID.String(),
|
|
"version": content.Version,
|
|
"tokens_used": resp.Usage.TotalTokens,
|
|
"controls_count": len(controls),
|
|
"source": "block_generator",
|
|
},
|
|
})
|
|
|
|
return content, nil
|
|
}
|
|
|
|
// buildBlockContentPrompt creates a prompt that incorporates canonical controls
|
|
func buildBlockContentPrompt(module TrainingModule, controls []CanonicalControlSummary, language string) string {
|
|
var sb strings.Builder
|
|
|
|
if language == "en" {
|
|
sb.WriteString(fmt.Sprintf("Create training material for the following compliance module:\n\n"))
|
|
sb.WriteString(fmt.Sprintf("**Module Code:** %s\n", module.ModuleCode))
|
|
sb.WriteString(fmt.Sprintf("**Title:** %s\n", module.Title))
|
|
sb.WriteString(fmt.Sprintf("**Duration:** %d minutes\n\n", module.DurationMinutes))
|
|
sb.WriteString(fmt.Sprintf("This module is based on %d security controls:\n\n", len(controls)))
|
|
} else {
|
|
sb.WriteString(fmt.Sprintf("Erstelle Schulungsmaterial fuer folgendes Compliance-Modul:\n\n"))
|
|
sb.WriteString(fmt.Sprintf("**Modulcode:** %s\n", module.ModuleCode))
|
|
sb.WriteString(fmt.Sprintf("**Titel:** %s\n", module.Title))
|
|
sb.WriteString(fmt.Sprintf("**Dauer:** %d Minuten\n\n", module.DurationMinutes))
|
|
sb.WriteString(fmt.Sprintf("Dieses Modul basiert auf %d Sicherheits-Controls:\n\n", len(controls)))
|
|
}
|
|
|
|
for i, ctrl := range controls {
|
|
sb.WriteString(fmt.Sprintf("### Control %d: %s — %s\n", i+1, ctrl.ControlID, ctrl.Title))
|
|
sb.WriteString(fmt.Sprintf("**Ziel:** %s\n", ctrl.Objective))
|
|
if len(ctrl.Requirements) > 0 {
|
|
sb.WriteString("**Anforderungen:**\n")
|
|
for _, req := range ctrl.Requirements {
|
|
sb.WriteString(fmt.Sprintf("- %s\n", req))
|
|
}
|
|
}
|
|
sb.WriteString("\n")
|
|
}
|
|
|
|
if language == "en" {
|
|
sb.WriteString(`Create the material as Markdown:
|
|
1. Introduction: Why are these controls important?
|
|
2. Per control: Explanation, practical tips, examples
|
|
3. Summary + action items
|
|
4. Checklist for daily work
|
|
|
|
Use clear, understandable language. Target audience: employees in companies (50-1,500 employees).`)
|
|
} else {
|
|
sb.WriteString(`Erstelle das Material als Markdown:
|
|
1. Einfuehrung: Warum sind diese Controls wichtig?
|
|
2. Pro Control: Erklaerung, praktische Hinweise, Beispiele
|
|
3. Zusammenfassung + Handlungsanweisungen
|
|
4. Checkliste fuer den Alltag
|
|
|
|
Verwende klare, verstaendliche Sprache. Zielgruppe sind Mitarbeiter in Unternehmen (50-1.500 MA).
|
|
Formatiere den Inhalt als Markdown mit Ueberschriften, Aufzaehlungen und Hervorhebungen.`)
|
|
}
|
|
|
|
return sb.String()
|
|
}
|
|
|
|
// GenerateAllModuleContent generates text content for all modules that don't have published content yet
|
|
func (g *ContentGenerator) GenerateAllModuleContent(ctx context.Context, tenantID uuid.UUID, language string) (*BulkResult, error) {
|
|
if language == "" {
|
|
language = "de"
|
|
}
|
|
|
|
modules, _, err := g.store.ListModules(ctx, tenantID, &ModuleFilters{Limit: 100})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to list modules: %w", err)
|
|
}
|
|
|
|
result := &BulkResult{}
|
|
for _, module := range modules {
|
|
// Check if module already has published content
|
|
content, _ := g.store.GetPublishedContent(ctx, module.ID)
|
|
if content != nil {
|
|
result.Skipped++
|
|
continue
|
|
}
|
|
|
|
_, err := g.GenerateModuleContent(ctx, module, language)
|
|
if err != nil {
|
|
result.Errors = append(result.Errors, fmt.Sprintf("%s: %v", module.ModuleCode, err))
|
|
continue
|
|
}
|
|
result.Generated++
|
|
}
|
|
|
|
return result, nil
|
|
}
|
|
|
|
// GenerateAllQuizQuestions generates quiz questions for all modules that don't have questions yet
|
|
func (g *ContentGenerator) GenerateAllQuizQuestions(ctx context.Context, tenantID uuid.UUID, count int) (*BulkResult, error) {
|
|
if count <= 0 {
|
|
count = 5
|
|
}
|
|
|
|
modules, _, err := g.store.ListModules(ctx, tenantID, &ModuleFilters{Limit: 100})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to list modules: %w", err)
|
|
}
|
|
|
|
result := &BulkResult{}
|
|
for _, module := range modules {
|
|
// Check if module already has quiz questions
|
|
questions, _ := g.store.ListQuizQuestions(ctx, module.ID)
|
|
if len(questions) > 0 {
|
|
result.Skipped++
|
|
continue
|
|
}
|
|
|
|
_, err := g.GenerateQuizQuestions(ctx, module, count)
|
|
if err != nil {
|
|
result.Errors = append(result.Errors, fmt.Sprintf("%s: %v", module.ModuleCode, err))
|
|
continue
|
|
}
|
|
result.Generated++
|
|
}
|
|
|
|
return result, nil
|
|
}
|
|
|
|
// GenerateAudio generates audio for a module using the TTS service
|
|
func (g *ContentGenerator) GenerateAudio(ctx context.Context, module TrainingModule) (*TrainingMedia, error) {
|
|
// Get published content
|
|
content, err := g.store.GetPublishedContent(ctx, module.ID)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get content: %w", err)
|
|
}
|
|
if content == nil {
|
|
return nil, fmt.Errorf("no published content for module %s", module.ModuleCode)
|
|
}
|
|
|
|
if g.ttsClient == nil {
|
|
return nil, fmt.Errorf("TTS client not configured")
|
|
}
|
|
|
|
// Create media record (processing)
|
|
media := &TrainingMedia{
|
|
ModuleID: module.ID,
|
|
ContentID: &content.ID,
|
|
MediaType: MediaTypeAudio,
|
|
Status: MediaStatusProcessing,
|
|
Bucket: "compliance-training-audio",
|
|
ObjectKey: fmt.Sprintf("audio/%s/%s.mp3", module.ID.String(), content.ID.String()),
|
|
MimeType: "audio/mpeg",
|
|
VoiceModel: "de_DE-thorsten-high",
|
|
Language: "de",
|
|
GeneratedBy: "tts_piper",
|
|
}
|
|
|
|
if err := g.store.CreateMedia(ctx, media); err != nil {
|
|
return nil, fmt.Errorf("failed to create media record: %w", err)
|
|
}
|
|
|
|
// Call TTS service
|
|
ttsResp, err := g.ttsClient.Synthesize(ctx, &TTSSynthesizeRequest{
|
|
Text: content.ContentBody,
|
|
Language: "de",
|
|
Voice: "thorsten-high",
|
|
ModuleID: module.ID.String(),
|
|
ContentID: content.ID.String(),
|
|
})
|
|
|
|
if err != nil {
|
|
g.store.UpdateMediaStatus(ctx, media.ID, MediaStatusFailed, 0, 0, err.Error())
|
|
return nil, fmt.Errorf("TTS synthesis failed: %w", err)
|
|
}
|
|
|
|
// Update media record
|
|
media.Status = MediaStatusCompleted
|
|
media.FileSizeBytes = ttsResp.SizeBytes
|
|
media.DurationSeconds = ttsResp.DurationSeconds
|
|
media.ObjectKey = ttsResp.ObjectKey
|
|
media.Bucket = ttsResp.Bucket
|
|
|
|
g.store.UpdateMediaStatus(ctx, media.ID, MediaStatusCompleted, ttsResp.SizeBytes, ttsResp.DurationSeconds, "")
|
|
|
|
// Audit log
|
|
g.store.LogAction(ctx, &AuditLogEntry{
|
|
TenantID: module.TenantID,
|
|
Action: AuditAction("audio_generated"),
|
|
EntityType: AuditEntityModule,
|
|
EntityID: &module.ID,
|
|
Details: map[string]interface{}{
|
|
"module_code": module.ModuleCode,
|
|
"media_id": media.ID.String(),
|
|
"duration_seconds": ttsResp.DurationSeconds,
|
|
"size_bytes": ttsResp.SizeBytes,
|
|
},
|
|
})
|
|
|
|
return media, nil
|
|
}
|
|
|
|
// VideoScript represents a structured presentation script
|
|
type VideoScript struct {
|
|
Title string `json:"title"`
|
|
Sections []VideoScriptSection `json:"sections"`
|
|
}
|
|
|
|
// VideoScriptSection is one slide in the presentation
|
|
type VideoScriptSection struct {
|
|
Heading string `json:"heading"`
|
|
Text string `json:"text"`
|
|
BulletPoints []string `json:"bullet_points"`
|
|
}
|
|
|
|
// GenerateVideoScript generates a structured video script from module content via LLM
|
|
func (g *ContentGenerator) GenerateVideoScript(ctx context.Context, module TrainingModule) (*VideoScript, error) {
|
|
content, err := g.store.GetPublishedContent(ctx, module.ID)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get content: %w", err)
|
|
}
|
|
if content == nil {
|
|
return nil, fmt.Errorf("no published content for module %s", module.ModuleCode)
|
|
}
|
|
|
|
prompt := fmt.Sprintf(`Erstelle ein strukturiertes Folien-Script fuer eine Praesentations-Video-Schulung.
|
|
|
|
**Modul:** %s — %s
|
|
**Inhalt:**
|
|
%s
|
|
|
|
Erstelle 5-8 Folien. Jede Folie hat:
|
|
- heading: Kurze Ueberschrift (max 60 Zeichen)
|
|
- text: Erklaerungstext (1-2 Saetze)
|
|
- bullet_points: 2-4 Kernpunkte
|
|
|
|
Antworte NUR mit einem JSON-Objekt in diesem Format:
|
|
{
|
|
"title": "Titel der Praesentation",
|
|
"sections": [
|
|
{
|
|
"heading": "Folienueberschrift",
|
|
"text": "Erklaerungstext fuer diese Folie.",
|
|
"bullet_points": ["Punkt 1", "Punkt 2", "Punkt 3"]
|
|
}
|
|
]
|
|
}`, module.ModuleCode, module.Title, truncateText(content.ContentBody, 3000))
|
|
|
|
resp, err := g.registry.Chat(ctx, &llm.ChatRequest{
|
|
Messages: []llm.Message{
|
|
{Role: "system", Content: "Du bist ein Experte fuer Compliance-Schulungspraesentationen. Erstelle strukturierte Folien-Scripts als JSON. Antworte NUR mit dem JSON-Objekt."},
|
|
{Role: "user", Content: prompt},
|
|
},
|
|
Temperature: 0.15,
|
|
MaxTokens: 4096,
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("LLM video script generation failed: %w", err)
|
|
}
|
|
|
|
// Parse JSON response
|
|
var script VideoScript
|
|
jsonStr := resp.Message.Content
|
|
start := strings.Index(jsonStr, "{")
|
|
end := strings.LastIndex(jsonStr, "}")
|
|
if start >= 0 && end > start {
|
|
jsonStr = jsonStr[start : end+1]
|
|
}
|
|
|
|
if err := json.Unmarshal([]byte(jsonStr), &script); err != nil {
|
|
return nil, fmt.Errorf("failed to parse video script JSON: %w", err)
|
|
}
|
|
|
|
if len(script.Sections) == 0 {
|
|
return nil, fmt.Errorf("video script has no sections")
|
|
}
|
|
|
|
return &script, nil
|
|
}
|
|
|
|
// GenerateVideo generates a presentation video for a module
|
|
func (g *ContentGenerator) GenerateVideo(ctx context.Context, module TrainingModule) (*TrainingMedia, error) {
|
|
if g.ttsClient == nil {
|
|
return nil, fmt.Errorf("TTS client not configured")
|
|
}
|
|
|
|
// Check for published audio, generate if missing
|
|
audio, _ := g.store.GetPublishedAudio(ctx, module.ID)
|
|
if audio == nil {
|
|
// Try to generate audio first
|
|
var err error
|
|
audio, err = g.GenerateAudio(ctx, module)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("audio generation required but failed: %w", err)
|
|
}
|
|
// Auto-publish the audio
|
|
g.store.PublishMedia(ctx, audio.ID, true)
|
|
}
|
|
|
|
// Generate video script via LLM
|
|
script, err := g.GenerateVideoScript(ctx, module)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("video script generation failed: %w", err)
|
|
}
|
|
|
|
// Create media record
|
|
media := &TrainingMedia{
|
|
ModuleID: module.ID,
|
|
MediaType: MediaTypeVideo,
|
|
Status: MediaStatusProcessing,
|
|
Bucket: "compliance-training-video",
|
|
ObjectKey: fmt.Sprintf("video/%s/presentation.mp4", module.ID.String()),
|
|
MimeType: "video/mp4",
|
|
Language: "de",
|
|
GeneratedBy: "tts_ffmpeg",
|
|
}
|
|
|
|
if err := g.store.CreateMedia(ctx, media); err != nil {
|
|
return nil, fmt.Errorf("failed to create media record: %w", err)
|
|
}
|
|
|
|
// Build script map for TTS service
|
|
scriptMap := map[string]interface{}{
|
|
"title": script.Title,
|
|
"module_code": module.ModuleCode,
|
|
"sections": script.Sections,
|
|
}
|
|
|
|
// Call TTS service video generation
|
|
videoResp, err := g.ttsClient.GenerateVideo(ctx, &TTSGenerateVideoRequest{
|
|
Script: scriptMap,
|
|
AudioObjectKey: audio.ObjectKey,
|
|
ModuleID: module.ID.String(),
|
|
})
|
|
|
|
if err != nil {
|
|
g.store.UpdateMediaStatus(ctx, media.ID, MediaStatusFailed, 0, 0, err.Error())
|
|
return nil, fmt.Errorf("video generation failed: %w", err)
|
|
}
|
|
|
|
// Update media record
|
|
media.Status = MediaStatusCompleted
|
|
media.FileSizeBytes = videoResp.SizeBytes
|
|
media.DurationSeconds = videoResp.DurationSeconds
|
|
media.ObjectKey = videoResp.ObjectKey
|
|
media.Bucket = videoResp.Bucket
|
|
|
|
g.store.UpdateMediaStatus(ctx, media.ID, MediaStatusCompleted, videoResp.SizeBytes, videoResp.DurationSeconds, "")
|
|
|
|
// Audit log
|
|
g.store.LogAction(ctx, &AuditLogEntry{
|
|
TenantID: module.TenantID,
|
|
Action: AuditAction("video_generated"),
|
|
EntityType: AuditEntityModule,
|
|
EntityID: &module.ID,
|
|
Details: map[string]interface{}{
|
|
"module_code": module.ModuleCode,
|
|
"media_id": media.ID.String(),
|
|
"duration_seconds": videoResp.DurationSeconds,
|
|
"size_bytes": videoResp.SizeBytes,
|
|
"slides": len(script.Sections),
|
|
},
|
|
})
|
|
|
|
return media, nil
|
|
}
|
|
|
|
func truncateText(text string, maxLen int) string {
|
|
if len(text) <= maxLen {
|
|
return text
|
|
}
|
|
return text[:maxLen] + "..."
|
|
}
|
|
|
|
// ============================================================================
|
|
// Interactive Video Pipeline
|
|
// ============================================================================
|
|
|
|
const narratorSystemPrompt = `Du bist ein professioneller AI Teacher fuer Compliance-Schulungen.
|
|
Dein Stil ist foermlich aber freundlich, klar und paedagogisch wertvoll.
|
|
Du sprichst die Lernenden direkt an ("Sie") und fuehrst sie durch die Schulung.
|
|
Du erzeugst IMMER deutschsprachige Inhalte.
|
|
|
|
Dein Output ist ein JSON-Objekt im Format NarratorScript.
|
|
Jede Section sollte etwa 3 Minuten Sprechzeit haben (~450 Woerter Narrator-Text).
|
|
Nach jeder Section kommt ein Checkpoint mit 3-5 Quiz-Fragen.
|
|
Die Fragen testen das Verstaendnis des gerade Gelernten.
|
|
Jede Frage hat genau 4 Antwortmoeglichkeiten, wobei correct_index (0-basiert) die richtige Antwort angibt.
|
|
|
|
Antworte NUR mit dem JSON-Objekt, ohne Markdown-Codeblock-Wrapper.`
|
|
|
|
// GenerateNarratorScript generates a narrator-style video script with checkpoints via LLM
|
|
func (g *ContentGenerator) GenerateNarratorScript(ctx context.Context, module TrainingModule) (*NarratorScript, error) {
|
|
content, err := g.store.GetPublishedContent(ctx, module.ID)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get content: %w", err)
|
|
}
|
|
|
|
contentContext := ""
|
|
if content != nil {
|
|
contentContext = fmt.Sprintf("\n\n**Vorhandener Schulungsinhalt (als Basis):**\n%s", truncateText(content.ContentBody, 4000))
|
|
}
|
|
|
|
prompt := fmt.Sprintf(`Erstelle ein interaktives Schulungsvideo-Skript mit Erzaehlerpersona und Checkpoints.
|
|
|
|
**Modul:** %s — %s
|
|
**Verordnung:** %s
|
|
**Beschreibung:** %s
|
|
**Dauer:** ca. %d Minuten
|
|
%s
|
|
|
|
Erstelle ein NarratorScript-JSON mit:
|
|
- "title": Titel der Schulung
|
|
- "intro": Begruessungstext ("Hallo, ich bin Ihr AI Teacher. Heute lernen Sie...")
|
|
- "sections": Array mit 3-4 Abschnitten, jeder mit:
|
|
- "heading": Abschnittsueberschrift
|
|
- "narrator_text": Fliesstext im Erzaehlstil (~450 Woerter, ~3 Min Sprechzeit)
|
|
- "bullet_points": 3-5 Kernpunkte fuer die Folie
|
|
- "transition": Ueberleitung zum naechsten Abschnitt oder Checkpoint
|
|
- "checkpoint": Quiz-Block mit:
|
|
- "title": Checkpoint-Titel
|
|
- "questions": Array mit 3-5 Fragen, je:
|
|
- "question": Fragetext
|
|
- "options": Array mit 4 Antworten
|
|
- "correct_index": Index der richtigen Antwort (0-basiert)
|
|
- "explanation": Erklaerung der richtigen Antwort
|
|
- "outro": Abschlussworte
|
|
- "total_duration_estimate": geschaetzte Gesamtdauer in Sekunden
|
|
|
|
Antworte NUR mit dem JSON-Objekt.`,
|
|
module.ModuleCode, module.Title,
|
|
string(module.RegulationArea),
|
|
module.Description,
|
|
module.DurationMinutes,
|
|
contentContext,
|
|
)
|
|
|
|
resp, err := g.registry.Chat(ctx, &llm.ChatRequest{
|
|
Messages: []llm.Message{
|
|
{Role: "system", Content: narratorSystemPrompt},
|
|
{Role: "user", Content: prompt},
|
|
},
|
|
Temperature: 0.2,
|
|
MaxTokens: 8192,
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("LLM narrator script generation failed: %w", err)
|
|
}
|
|
|
|
return parseNarratorScript(resp.Message.Content)
|
|
}
|
|
|
|
// parseNarratorScript extracts a NarratorScript from LLM output
|
|
func parseNarratorScript(content string) (*NarratorScript, error) {
|
|
// Find JSON object in response
|
|
start := strings.Index(content, "{")
|
|
end := strings.LastIndex(content, "}")
|
|
if start < 0 || end <= start {
|
|
return nil, fmt.Errorf("no JSON object found in LLM response")
|
|
}
|
|
jsonStr := content[start : end+1]
|
|
|
|
var script NarratorScript
|
|
if err := json.Unmarshal([]byte(jsonStr), &script); err != nil {
|
|
return nil, fmt.Errorf("failed to parse narrator script JSON: %w", err)
|
|
}
|
|
|
|
if len(script.Sections) == 0 {
|
|
return nil, fmt.Errorf("narrator script has no sections")
|
|
}
|
|
|
|
return &script, nil
|
|
}
|
|
|
|
// GenerateInteractiveVideo orchestrates the full interactive video pipeline:
|
|
// NarratorScript → TTS Audio → Slides+Video → DB Checkpoints + Quiz Questions
|
|
func (g *ContentGenerator) GenerateInteractiveVideo(ctx context.Context, module TrainingModule) (*TrainingMedia, error) {
|
|
if g.ttsClient == nil {
|
|
return nil, fmt.Errorf("TTS client not configured")
|
|
}
|
|
|
|
// 1. Generate NarratorScript via LLM
|
|
script, err := g.GenerateNarratorScript(ctx, module)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("narrator script generation failed: %w", err)
|
|
}
|
|
|
|
// 2. Synthesize audio per section via TTS service
|
|
sections := make([]SectionAudio, len(script.Sections))
|
|
for i, s := range script.Sections {
|
|
// Combine narrator text with intro/outro for first/last section
|
|
text := s.NarratorText
|
|
if i == 0 && script.Intro != "" {
|
|
text = script.Intro + "\n\n" + text
|
|
}
|
|
if i == len(script.Sections)-1 && script.Outro != "" {
|
|
text = text + "\n\n" + script.Outro
|
|
}
|
|
sections[i] = SectionAudio{
|
|
Text: text,
|
|
Heading: s.Heading,
|
|
}
|
|
}
|
|
|
|
audioResp, err := g.ttsClient.SynthesizeSections(ctx, &SynthesizeSectionsRequest{
|
|
Sections: sections,
|
|
Voice: "de_DE-thorsten-high",
|
|
ModuleID: module.ID.String(),
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("section audio synthesis failed: %w", err)
|
|
}
|
|
|
|
// 3. Generate interactive video via TTS service
|
|
videoResp, err := g.ttsClient.GenerateInteractiveVideo(ctx, &GenerateInteractiveVideoRequest{
|
|
Script: script,
|
|
Audio: audioResp,
|
|
ModuleID: module.ID.String(),
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("interactive video generation failed: %w", err)
|
|
}
|
|
|
|
// 4. Save TrainingMedia record
|
|
scriptJSON, _ := json.Marshal(script)
|
|
media := &TrainingMedia{
|
|
ModuleID: module.ID,
|
|
MediaType: MediaTypeInteractiveVideo,
|
|
Status: MediaStatusProcessing,
|
|
Bucket: "compliance-training-video",
|
|
ObjectKey: fmt.Sprintf("video/%s/interactive.mp4", module.ID.String()),
|
|
MimeType: "video/mp4",
|
|
Language: "de",
|
|
GeneratedBy: "tts_ffmpeg_interactive",
|
|
Metadata: scriptJSON,
|
|
}
|
|
|
|
if err := g.store.CreateMedia(ctx, media); err != nil {
|
|
return nil, fmt.Errorf("failed to create media record: %w", err)
|
|
}
|
|
|
|
// Update media with video result
|
|
media.Status = MediaStatusCompleted
|
|
media.FileSizeBytes = videoResp.SizeBytes
|
|
media.DurationSeconds = videoResp.DurationSeconds
|
|
media.ObjectKey = videoResp.ObjectKey
|
|
media.Bucket = videoResp.Bucket
|
|
g.store.UpdateMediaStatus(ctx, media.ID, MediaStatusCompleted, videoResp.SizeBytes, videoResp.DurationSeconds, "")
|
|
|
|
// Auto-publish
|
|
g.store.PublishMedia(ctx, media.ID, true)
|
|
|
|
// 5. Create Checkpoints + Quiz Questions in DB
|
|
// Clear old checkpoints first
|
|
g.store.DeleteCheckpointsForModule(ctx, module.ID)
|
|
|
|
for i, section := range script.Sections {
|
|
if section.Checkpoint == nil {
|
|
continue
|
|
}
|
|
|
|
// Calculate timestamp from cumulative audio durations
|
|
var timestamp float64
|
|
if i < len(audioResp.Sections) {
|
|
// Checkpoint timestamp = end of this section's audio
|
|
timestamp = audioResp.Sections[i].StartTimestamp + audioResp.Sections[i].Duration
|
|
}
|
|
|
|
cp := &Checkpoint{
|
|
ModuleID: module.ID,
|
|
CheckpointIndex: i,
|
|
Title: section.Checkpoint.Title,
|
|
TimestampSeconds: timestamp,
|
|
}
|
|
if err := g.store.CreateCheckpoint(ctx, cp); err != nil {
|
|
return nil, fmt.Errorf("failed to create checkpoint %d: %w", i, err)
|
|
}
|
|
|
|
// Save quiz questions for this checkpoint
|
|
for j, q := range section.Checkpoint.Questions {
|
|
question := &QuizQuestion{
|
|
ModuleID: module.ID,
|
|
Question: q.Question,
|
|
Options: q.Options,
|
|
CorrectIndex: q.CorrectIndex,
|
|
Explanation: q.Explanation,
|
|
Difficulty: DifficultyMedium,
|
|
SortOrder: j,
|
|
}
|
|
if err := g.store.CreateCheckpointQuizQuestion(ctx, question, cp.ID); err != nil {
|
|
return nil, fmt.Errorf("failed to create checkpoint question: %w", err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// 6. Audit log
|
|
g.store.LogAction(ctx, &AuditLogEntry{
|
|
TenantID: module.TenantID,
|
|
Action: AuditAction("interactive_video_generated"),
|
|
EntityType: AuditEntityModule,
|
|
EntityID: &module.ID,
|
|
Details: map[string]interface{}{
|
|
"module_code": module.ModuleCode,
|
|
"media_id": media.ID.String(),
|
|
"duration_seconds": videoResp.DurationSeconds,
|
|
"sections": len(script.Sections),
|
|
"checkpoints": countCheckpoints(script),
|
|
},
|
|
})
|
|
|
|
return media, nil
|
|
}
|
|
|
|
func countCheckpoints(script *NarratorScript) int {
|
|
count := 0
|
|
for _, s := range script.Sections {
|
|
if s.Checkpoint != nil {
|
|
count++
|
|
}
|
|
}
|
|
return count
|
|
}
|