feat(training+controls): interactive video pipeline, training blocks, control generator, CE libraries
Some checks failed
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Failing after 37s
CI/CD / test-python-backend-compliance (push) Successful in 39s
CI/CD / test-python-document-crawler (push) Successful in 26s
CI/CD / test-python-dsms-gateway (push) Successful in 23s
CI/CD / validate-canonical-controls (push) Successful in 12s
CI/CD / Deploy (push) Has been skipped

Interactive Training Videos (CP-TRAIN):
- DB migration 022: training_checkpoints + checkpoint_progress tables
- NarratorScript generation via Anthropic (AI Teacher persona, German)
- TTS batch synthesis + interactive video pipeline (slides + checkpoint slides + FFmpeg)
- 4 new API endpoints: generate-interactive, interactive-manifest, checkpoint submit, checkpoint progress
- InteractiveVideoPlayer component (HTML5 Video, quiz overlay, seek protection, progress tracking)
- Learner portal integration with automatic completion on all checkpoints passed
- 30 new tests (handler validation + grading logic + manifest/progress + seek protection)

Training Blocks:
- Block generator, block store, block config CRUD + preview/generate endpoints
- Migration 021: training_blocks schema

Control Generator + Canonical Library:
- Control generator routes + service enhancements
- Canonical control library helpers, sidebar entry
- Citation backfill service + tests
- CE libraries data (hazard, protection, evidence, lifecycle, components)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-03-16 21:41:48 +01:00
parent d2133dbfa2
commit 4f6bc8f6f6
50 changed files with 17299 additions and 198 deletions

View File

@@ -294,6 +294,133 @@ func parseQuizResponse(response string, moduleID uuid.UUID) ([]QuizQuestion, err
return questions, nil
}
// GenerateBlockContent generates training content for a module based on linked canonical controls
func (g *ContentGenerator) GenerateBlockContent(
ctx context.Context,
module TrainingModule,
controls []CanonicalControlSummary,
language string,
) (*ModuleContent, error) {
if language == "" {
language = "de"
}
prompt := buildBlockContentPrompt(module, controls, language)
resp, err := g.registry.Chat(ctx, &llm.ChatRequest{
Messages: []llm.Message{
{Role: "system", Content: getContentSystemPrompt(language)},
{Role: "user", Content: prompt},
},
Temperature: 0.15,
MaxTokens: 8192,
})
if err != nil {
return nil, fmt.Errorf("LLM block content generation failed: %w", err)
}
contentBody := resp.Message.Content
// PII check
if g.piiDetector != nil && g.piiDetector.ContainsPII(contentBody) {
findings := g.piiDetector.FindPII(contentBody)
for _, f := range findings {
contentBody = strings.ReplaceAll(contentBody, f.Match, "[REDACTED]")
}
}
summary := contentBody
if len(summary) > 200 {
summary = summary[:200] + "..."
}
content := &ModuleContent{
ModuleID: module.ID,
ContentFormat: ContentFormatMarkdown,
ContentBody: contentBody,
Summary: summary,
GeneratedBy: "llm_block_" + resp.Provider,
LLMModel: resp.Model,
IsPublished: false,
}
if err := g.store.CreateModuleContent(ctx, content); err != nil {
return nil, fmt.Errorf("failed to save block content: %w", err)
}
// Audit log
g.store.LogAction(ctx, &AuditLogEntry{
TenantID: module.TenantID,
Action: AuditActionContentGenerated,
EntityType: AuditEntityModule,
EntityID: &module.ID,
Details: map[string]interface{}{
"module_code": module.ModuleCode,
"provider": resp.Provider,
"model": resp.Model,
"content_id": content.ID.String(),
"version": content.Version,
"tokens_used": resp.Usage.TotalTokens,
"controls_count": len(controls),
"source": "block_generator",
},
})
return content, nil
}
// buildBlockContentPrompt creates a prompt that incorporates canonical controls
func buildBlockContentPrompt(module TrainingModule, controls []CanonicalControlSummary, language string) string {
var sb strings.Builder
if language == "en" {
sb.WriteString(fmt.Sprintf("Create training material for the following compliance module:\n\n"))
sb.WriteString(fmt.Sprintf("**Module Code:** %s\n", module.ModuleCode))
sb.WriteString(fmt.Sprintf("**Title:** %s\n", module.Title))
sb.WriteString(fmt.Sprintf("**Duration:** %d minutes\n\n", module.DurationMinutes))
sb.WriteString(fmt.Sprintf("This module is based on %d security controls:\n\n", len(controls)))
} else {
sb.WriteString(fmt.Sprintf("Erstelle Schulungsmaterial fuer folgendes Compliance-Modul:\n\n"))
sb.WriteString(fmt.Sprintf("**Modulcode:** %s\n", module.ModuleCode))
sb.WriteString(fmt.Sprintf("**Titel:** %s\n", module.Title))
sb.WriteString(fmt.Sprintf("**Dauer:** %d Minuten\n\n", module.DurationMinutes))
sb.WriteString(fmt.Sprintf("Dieses Modul basiert auf %d Sicherheits-Controls:\n\n", len(controls)))
}
for i, ctrl := range controls {
sb.WriteString(fmt.Sprintf("### Control %d: %s — %s\n", i+1, ctrl.ControlID, ctrl.Title))
sb.WriteString(fmt.Sprintf("**Ziel:** %s\n", ctrl.Objective))
if len(ctrl.Requirements) > 0 {
sb.WriteString("**Anforderungen:**\n")
for _, req := range ctrl.Requirements {
sb.WriteString(fmt.Sprintf("- %s\n", req))
}
}
sb.WriteString("\n")
}
if language == "en" {
sb.WriteString(`Create the material as Markdown:
1. Introduction: Why are these controls important?
2. Per control: Explanation, practical tips, examples
3. Summary + action items
4. Checklist for daily work
Use clear, understandable language. Target audience: employees in companies (50-1,500 employees).`)
} else {
sb.WriteString(`Erstelle das Material als Markdown:
1. Einfuehrung: Warum sind diese Controls wichtig?
2. Pro Control: Erklaerung, praktische Hinweise, Beispiele
3. Zusammenfassung + Handlungsanweisungen
4. Checkliste fuer den Alltag
Verwende klare, verstaendliche Sprache. Zielgruppe sind Mitarbeiter in Unternehmen (50-1.500 MA).
Formatiere den Inhalt als Markdown mit Ueberschriften, Aufzaehlungen und Hervorhebungen.`)
}
return sb.String()
}
// GenerateAllModuleContent generates text content for all modules that don't have published content yet
func (g *ContentGenerator) GenerateAllModuleContent(ctx context.Context, tenantID uuid.UUID, language string) (*BulkResult, error) {
if language == "" {
@@ -600,3 +727,252 @@ func truncateText(text string, maxLen int) string {
}
return text[:maxLen] + "..."
}
// ============================================================================
// Interactive Video Pipeline
// ============================================================================
const narratorSystemPrompt = `Du bist ein professioneller AI Teacher fuer Compliance-Schulungen.
Dein Stil ist foermlich aber freundlich, klar und paedagogisch wertvoll.
Du sprichst die Lernenden direkt an ("Sie") und fuehrst sie durch die Schulung.
Du erzeugst IMMER deutschsprachige Inhalte.
Dein Output ist ein JSON-Objekt im Format NarratorScript.
Jede Section sollte etwa 3 Minuten Sprechzeit haben (~450 Woerter Narrator-Text).
Nach jeder Section kommt ein Checkpoint mit 3-5 Quiz-Fragen.
Die Fragen testen das Verstaendnis des gerade Gelernten.
Jede Frage hat genau 4 Antwortmoeglichkeiten, wobei correct_index (0-basiert) die richtige Antwort angibt.
Antworte NUR mit dem JSON-Objekt, ohne Markdown-Codeblock-Wrapper.`
// GenerateNarratorScript generates a narrator-style video script with checkpoints via LLM
func (g *ContentGenerator) GenerateNarratorScript(ctx context.Context, module TrainingModule) (*NarratorScript, error) {
content, err := g.store.GetPublishedContent(ctx, module.ID)
if err != nil {
return nil, fmt.Errorf("failed to get content: %w", err)
}
contentContext := ""
if content != nil {
contentContext = fmt.Sprintf("\n\n**Vorhandener Schulungsinhalt (als Basis):**\n%s", truncateText(content.ContentBody, 4000))
}
prompt := fmt.Sprintf(`Erstelle ein interaktives Schulungsvideo-Skript mit Erzaehlerpersona und Checkpoints.
**Modul:** %s — %s
**Verordnung:** %s
**Beschreibung:** %s
**Dauer:** ca. %d Minuten
%s
Erstelle ein NarratorScript-JSON mit:
- "title": Titel der Schulung
- "intro": Begruessungstext ("Hallo, ich bin Ihr AI Teacher. Heute lernen Sie...")
- "sections": Array mit 3-4 Abschnitten, jeder mit:
- "heading": Abschnittsueberschrift
- "narrator_text": Fliesstext im Erzaehlstil (~450 Woerter, ~3 Min Sprechzeit)
- "bullet_points": 3-5 Kernpunkte fuer die Folie
- "transition": Ueberleitung zum naechsten Abschnitt oder Checkpoint
- "checkpoint": Quiz-Block mit:
- "title": Checkpoint-Titel
- "questions": Array mit 3-5 Fragen, je:
- "question": Fragetext
- "options": Array mit 4 Antworten
- "correct_index": Index der richtigen Antwort (0-basiert)
- "explanation": Erklaerung der richtigen Antwort
- "outro": Abschlussworte
- "total_duration_estimate": geschaetzte Gesamtdauer in Sekunden
Antworte NUR mit dem JSON-Objekt.`,
module.ModuleCode, module.Title,
string(module.RegulationArea),
module.Description,
module.DurationMinutes,
contentContext,
)
resp, err := g.registry.Chat(ctx, &llm.ChatRequest{
Messages: []llm.Message{
{Role: "system", Content: narratorSystemPrompt},
{Role: "user", Content: prompt},
},
Temperature: 0.2,
MaxTokens: 8192,
})
if err != nil {
return nil, fmt.Errorf("LLM narrator script generation failed: %w", err)
}
return parseNarratorScript(resp.Message.Content)
}
// parseNarratorScript extracts a NarratorScript from LLM output
func parseNarratorScript(content string) (*NarratorScript, error) {
// Find JSON object in response
start := strings.Index(content, "{")
end := strings.LastIndex(content, "}")
if start < 0 || end <= start {
return nil, fmt.Errorf("no JSON object found in LLM response")
}
jsonStr := content[start : end+1]
var script NarratorScript
if err := json.Unmarshal([]byte(jsonStr), &script); err != nil {
return nil, fmt.Errorf("failed to parse narrator script JSON: %w", err)
}
if len(script.Sections) == 0 {
return nil, fmt.Errorf("narrator script has no sections")
}
return &script, nil
}
// GenerateInteractiveVideo orchestrates the full interactive video pipeline:
// NarratorScript → TTS Audio → Slides+Video → DB Checkpoints + Quiz Questions
func (g *ContentGenerator) GenerateInteractiveVideo(ctx context.Context, module TrainingModule) (*TrainingMedia, error) {
if g.ttsClient == nil {
return nil, fmt.Errorf("TTS client not configured")
}
// 1. Generate NarratorScript via LLM
script, err := g.GenerateNarratorScript(ctx, module)
if err != nil {
return nil, fmt.Errorf("narrator script generation failed: %w", err)
}
// 2. Synthesize audio per section via TTS service
sections := make([]SectionAudio, len(script.Sections))
for i, s := range script.Sections {
// Combine narrator text with intro/outro for first/last section
text := s.NarratorText
if i == 0 && script.Intro != "" {
text = script.Intro + "\n\n" + text
}
if i == len(script.Sections)-1 && script.Outro != "" {
text = text + "\n\n" + script.Outro
}
sections[i] = SectionAudio{
Text: text,
Heading: s.Heading,
}
}
audioResp, err := g.ttsClient.SynthesizeSections(ctx, &SynthesizeSectionsRequest{
Sections: sections,
Voice: "de_DE-thorsten-high",
ModuleID: module.ID.String(),
})
if err != nil {
return nil, fmt.Errorf("section audio synthesis failed: %w", err)
}
// 3. Generate interactive video via TTS service
videoResp, err := g.ttsClient.GenerateInteractiveVideo(ctx, &GenerateInteractiveVideoRequest{
Script: script,
Audio: audioResp,
ModuleID: module.ID.String(),
})
if err != nil {
return nil, fmt.Errorf("interactive video generation failed: %w", err)
}
// 4. Save TrainingMedia record
scriptJSON, _ := json.Marshal(script)
media := &TrainingMedia{
ModuleID: module.ID,
MediaType: MediaTypeInteractiveVideo,
Status: MediaStatusProcessing,
Bucket: "compliance-training-video",
ObjectKey: fmt.Sprintf("video/%s/interactive.mp4", module.ID.String()),
MimeType: "video/mp4",
Language: "de",
GeneratedBy: "tts_ffmpeg_interactive",
Metadata: scriptJSON,
}
if err := g.store.CreateMedia(ctx, media); err != nil {
return nil, fmt.Errorf("failed to create media record: %w", err)
}
// Update media with video result
media.Status = MediaStatusCompleted
media.FileSizeBytes = videoResp.SizeBytes
media.DurationSeconds = videoResp.DurationSeconds
media.ObjectKey = videoResp.ObjectKey
media.Bucket = videoResp.Bucket
g.store.UpdateMediaStatus(ctx, media.ID, MediaStatusCompleted, videoResp.SizeBytes, videoResp.DurationSeconds, "")
// Auto-publish
g.store.PublishMedia(ctx, media.ID, true)
// 5. Create Checkpoints + Quiz Questions in DB
// Clear old checkpoints first
g.store.DeleteCheckpointsForModule(ctx, module.ID)
for i, section := range script.Sections {
if section.Checkpoint == nil {
continue
}
// Calculate timestamp from cumulative audio durations
var timestamp float64
if i < len(audioResp.Sections) {
// Checkpoint timestamp = end of this section's audio
timestamp = audioResp.Sections[i].StartTimestamp + audioResp.Sections[i].Duration
}
cp := &Checkpoint{
ModuleID: module.ID,
CheckpointIndex: i,
Title: section.Checkpoint.Title,
TimestampSeconds: timestamp,
}
if err := g.store.CreateCheckpoint(ctx, cp); err != nil {
return nil, fmt.Errorf("failed to create checkpoint %d: %w", i, err)
}
// Save quiz questions for this checkpoint
for j, q := range section.Checkpoint.Questions {
question := &QuizQuestion{
ModuleID: module.ID,
Question: q.Question,
Options: q.Options,
CorrectIndex: q.CorrectIndex,
Explanation: q.Explanation,
Difficulty: DifficultyMedium,
SortOrder: j,
}
if err := g.store.CreateCheckpointQuizQuestion(ctx, question, cp.ID); err != nil {
return nil, fmt.Errorf("failed to create checkpoint question: %w", err)
}
}
}
// 6. Audit log
g.store.LogAction(ctx, &AuditLogEntry{
TenantID: module.TenantID,
Action: AuditAction("interactive_video_generated"),
EntityType: AuditEntityModule,
EntityID: &module.ID,
Details: map[string]interface{}{
"module_code": module.ModuleCode,
"media_id": media.ID.String(),
"duration_seconds": videoResp.DurationSeconds,
"sections": len(script.Sections),
"checkpoints": countCheckpoints(script),
},
})
return media, nil
}
func countCheckpoints(script *NarratorScript) int {
count := 0
for _, s := range script.Sections {
if s.Checkpoint != nil {
count++
}
}
return count
}