breakpilot-compliance/ai-compliance-sdk/internal/training/content_generator.go

package training

import (
	"context"
	"encoding/json"
	"fmt"
	"strings"

	"github.com/breakpilot/ai-compliance-sdk/internal/llm"
	"github.com/google/uuid"
)

// ContentGenerator generates training content and quiz questions via LLM
type ContentGenerator struct {
	registry    *llm.ProviderRegistry
	piiDetector *llm.PIIDetector
	store       *Store
	ttsClient   *TTSClient
}

// NewContentGenerator creates a new content generator
func NewContentGenerator(registry *llm.ProviderRegistry, piiDetector *llm.PIIDetector, store *Store, ttsClient *TTSClient) *ContentGenerator {
	return &ContentGenerator{
		registry:    registry,
		piiDetector: piiDetector,
		store:       store,
		ttsClient:   ttsClient,
	}
}

// GenerateModuleContent generates training content for a module via LLM
func (g *ContentGenerator) GenerateModuleContent(ctx context.Context, module TrainingModule, language string) (*ModuleContent, error) {
	if language == "" {
		language = "de"
	}

	prompt := buildContentPrompt(module, language)

	resp, err := g.registry.Chat(ctx, &llm.ChatRequest{
		Messages: []llm.Message{
			{Role: "system", Content: getContentSystemPrompt(language)},
			{Role: "user", Content: prompt},
		},
		Temperature: 0.15,
		MaxTokens:   4096,
	})
	if err != nil {
		return nil, fmt.Errorf("LLM content generation failed: %w", err)
	}

	contentBody := resp.Message.Content

	// PII check on generated content
	if g.piiDetector != nil && g.piiDetector.ContainsPII(contentBody) {
		findings := g.piiDetector.FindPII(contentBody)
		for _, f := range findings {
			contentBody = strings.ReplaceAll(contentBody, f.Match, "[REDACTED]")
		}
	}

	// Create summary (first 200 chars)
	summary := contentBody
	if len(summary) > 200 {
		summary = summary[:200] + "..."
	}

	content := &ModuleContent{
		ModuleID:      module.ID,
		ContentFormat: ContentFormatMarkdown,
		ContentBody:   contentBody,
		Summary:       summary,
		GeneratedBy:   "llm_" + resp.Provider,
		LLMModel:      resp.Model,
		IsPublished:   false,
	}

	if err := g.store.CreateModuleContent(ctx, content); err != nil {
		return nil, fmt.Errorf("failed to save content: %w", err)
	}

	// Audit log
	g.store.LogAction(ctx, &AuditLogEntry{
		TenantID:   module.TenantID,
		Action:     AuditActionContentGenerated,
		EntityType: AuditEntityModule,
		EntityID:   &module.ID,
		Details: map[string]interface{}{
			"module_code":  module.ModuleCode,
			"provider":     resp.Provider,
			"model":        resp.Model,
			"content_id":   content.ID.String(),
			"version":      content.Version,
			"tokens_used":  resp.Usage.TotalTokens,
		},
	})

	return content, nil
}

// GenerateQuizQuestions generates quiz questions for a module based on its content
func (g *ContentGenerator) GenerateQuizQuestions(ctx context.Context, module TrainingModule, count int) ([]QuizQuestion, error) {
	if count <= 0 {
		count = 5
	}

	// Get the published content for context
	content, err := g.store.GetPublishedContent(ctx, module.ID)
	if err != nil {
		return nil, err
	}

	contentContext := ""
	if content != nil {
		contentContext = content.ContentBody
	}

	prompt := buildQuizPrompt(module, contentContext, count)

	resp, err := g.registry.Chat(ctx, &llm.ChatRequest{
		Messages: []llm.Message{
			{Role: "system", Content: getQuizSystemPrompt()},
			{Role: "user", Content: prompt},
		},
		Temperature: 0.2,
		MaxTokens:   4096,
	})
	if err != nil {
		return nil, fmt.Errorf("LLM quiz generation failed: %w", err)
	}

	// Parse the JSON response
	questions, err := parseQuizResponse(resp.Message.Content, module.ID)
	if err != nil {
		return nil, fmt.Errorf("failed to parse quiz response: %w", err)
	}

	// Save questions to store
	for i := range questions {
		questions[i].SortOrder = i + 1
		if err := g.store.CreateQuizQuestion(ctx, &questions[i]); err != nil {
			return nil, fmt.Errorf("failed to save question %d: %w", i+1, err)
		}
	}

	return questions, nil
}

// ============================================================================
// Prompt Templates
// ============================================================================

func getContentSystemPrompt(language string) string {
	if language == "en" {
		return "You are a compliance training content expert. Generate professional, accurate training material in Markdown format. Focus on practical relevance and legal accuracy. Do not include any personal data or fictional names."
	}
	return "Du bist ein Experte fuer Compliance-Schulungsinhalte. Erstelle professionelle, praezise Schulungsmaterialien im Markdown-Format. Fokussiere dich auf praktische Relevanz und rechtliche Genauigkeit. Verwende keine personenbezogenen Daten oder fiktiven Namen."
}

func getQuizSystemPrompt() string {
	return `Du bist ein Experte fuer Compliance-Pruefungsfragen. Erstelle Multiple-Choice-Fragen als JSON-Array.
Jede Frage hat genau 4 Antwortoptionen, davon genau eine richtige.
Antworte NUR mit dem JSON-Array, ohne zusaetzlichen Text.

Format:
[
  {
    "question": "Frage hier?",
    "options": ["Option A", "Option B", "Option C", "Option D"],
    "correct_index": 0,
    "explanation": "Erklaerung warum Option A richtig ist.",
    "difficulty": "medium"
  }
]`
}

func buildContentPrompt(module TrainingModule, language string) string {
	regulationLabels := map[RegulationArea]string{
		RegulationDSGVO:    "Datenschutz-Grundverordnung (DSGVO)",
		RegulationNIS2:     "NIS-2-Richtlinie",
		RegulationISO27001: "ISO 27001 / ISMS",
		RegulationAIAct:    "EU AI Act / KI-Verordnung",
		RegulationGeschGehG: "Geschaeftsgeheimnisgesetz (GeschGehG)",
		RegulationHinSchG:  "Hinweisgeberschutzgesetz (HinSchG)",
	}

	regulation := regulationLabels[module.RegulationArea]
	if regulation == "" {
		regulation = string(module.RegulationArea)
	}

	return fmt.Sprintf(`Erstelle Schulungsmaterial fuer folgendes Compliance-Modul:

**Modulcode:** %s
**Titel:** %s
**Beschreibung:** %s
**Regulierungsbereich:** %s
**Dauer:** %d Minuten
**NIS2-relevant:** %v

Das Material soll:
1. Eine kurze Einfuehrung in das Thema geben
2. Die wichtigsten rechtlichen Grundlagen erklaeren
3. Praktische Handlungsanweisungen fuer den Arbeitsalltag enthalten
4. Typische Fehler und Risiken aufzeigen
5. Eine Zusammenfassung der Kernpunkte bieten

Verwende klare, verstaendliche Sprache. Zielgruppe sind Mitarbeiter in Unternehmen (50-1.500 MA).
Formatiere den Inhalt als Markdown mit Ueberschriften, Aufzaehlungen und Hervorhebungen.`,
		module.ModuleCode, module.Title, module.Description,
		regulation, module.DurationMinutes, module.NIS2Relevant)
}

func buildQuizPrompt(module TrainingModule, contentContext string, count int) string {
	prompt := fmt.Sprintf(`Erstelle %d Multiple-Choice-Pruefungsfragen fuer das Compliance-Modul:

**Modulcode:** %s
**Titel:** %s
**Regulierungsbereich:** %s`, count, module.ModuleCode, module.Title, string(module.RegulationArea))

	if contentContext != "" {
		// Truncate content to avoid token limit
		if len(contentContext) > 3000 {
			contentContext = contentContext[:3000] + "..."
		}
		prompt += fmt.Sprintf(`

**Schulungsinhalt als Kontext:**
%s`, contentContext)
	}

	prompt += fmt.Sprintf(`

Erstelle genau %d Fragen mit je 4 Antwortoptionen.
Verteile die Schwierigkeitsgrade: easy, medium, hard.
Antworte NUR mit dem JSON-Array.`, count)

	return prompt
}

// parseQuizResponse parses LLM JSON response into QuizQuestion structs
func parseQuizResponse(response string, moduleID uuid.UUID) ([]QuizQuestion, error) {
	// Try to extract JSON from the response (LLM might add text around it)
	jsonStr := response
	start := strings.Index(response, "[")
	end := strings.LastIndex(response, "]")
	if start >= 0 && end > start {
		jsonStr = response[start : end+1]
	}

	type rawQuestion struct {
		Question     string   `json:"question"`
		Options      []string `json:"options"`
		CorrectIndex int      `json:"correct_index"`
		Explanation  string   `json:"explanation"`
		Difficulty   string   `json:"difficulty"`
	}

	var rawQuestions []rawQuestion
	if err := json.Unmarshal([]byte(jsonStr), &rawQuestions); err != nil {
		return nil, fmt.Errorf("invalid JSON from LLM: %w", err)
	}

	var questions []QuizQuestion
	for _, rq := range rawQuestions {
		difficulty := Difficulty(rq.Difficulty)
		if difficulty != DifficultyEasy && difficulty != DifficultyMedium && difficulty != DifficultyHard {
			difficulty = DifficultyMedium
		}

		q := QuizQuestion{
			ModuleID:     moduleID,
			Question:     rq.Question,
			Options:      rq.Options,
			CorrectIndex: rq.CorrectIndex,
			Explanation:  rq.Explanation,
			Difficulty:   difficulty,
			IsActive:     true,
		}

		if len(q.Options) != 4 {
			continue // Skip malformed questions
		}
		if q.CorrectIndex < 0 || q.CorrectIndex >= len(q.Options) {
			continue
		}

		questions = append(questions, q)
	}

	if questions == nil {
		questions = []QuizQuestion{}
	}

	return questions, nil
}

// GenerateAllModuleContent generates text content for all modules that don't have published content yet
func (g *ContentGenerator) GenerateAllModuleContent(ctx context.Context, tenantID uuid.UUID, language string) (*BulkResult, error) {
	if language == "" {
		language = "de"
	}

	modules, _, err := g.store.ListModules(ctx, tenantID, &ModuleFilters{Limit: 100})
	if err != nil {
		return nil, fmt.Errorf("failed to list modules: %w", err)
	}

	result := &BulkResult{}
	for _, module := range modules {
		// Check if module already has published content
		content, _ := g.store.GetPublishedContent(ctx, module.ID)
		if content != nil {
			result.Skipped++
			continue
		}

		_, err := g.GenerateModuleContent(ctx, module, language)
		if err != nil {
			result.Errors = append(result.Errors, fmt.Sprintf("%s: %v", module.ModuleCode, err))
			continue
		}
		result.Generated++
	}

	return result, nil
}

// GenerateAllQuizQuestions generates quiz questions for all modules that don't have questions yet
func (g *ContentGenerator) GenerateAllQuizQuestions(ctx context.Context, tenantID uuid.UUID, count int) (*BulkResult, error) {
	if count <= 0 {
		count = 5
	}

	modules, _, err := g.store.ListModules(ctx, tenantID, &ModuleFilters{Limit: 100})
	if err != nil {
		return nil, fmt.Errorf("failed to list modules: %w", err)
	}

	result := &BulkResult{}
	for _, module := range modules {
		// Check if module already has quiz questions
		questions, _ := g.store.ListQuizQuestions(ctx, module.ID)
		if len(questions) > 0 {
			result.Skipped++
			continue
		}

		_, err := g.GenerateQuizQuestions(ctx, module, count)
		if err != nil {
			result.Errors = append(result.Errors, fmt.Sprintf("%s: %v", module.ModuleCode, err))
			continue
		}
		result.Generated++
	}

	return result, nil
}

// GenerateAudio generates audio for a module using the TTS service
func (g *ContentGenerator) GenerateAudio(ctx context.Context, module TrainingModule) (*TrainingMedia, error) {
	// Get published content
	content, err := g.store.GetPublishedContent(ctx, module.ID)
	if err != nil {
		return nil, fmt.Errorf("failed to get content: %w", err)
	}
	if content == nil {
		return nil, fmt.Errorf("no published content for module %s", module.ModuleCode)
	}

	if g.ttsClient == nil {
		return nil, fmt.Errorf("TTS client not configured")
	}

	// Create media record (processing)
	media := &TrainingMedia{
		ModuleID:    module.ID,
		ContentID:   &content.ID,
		MediaType:   MediaTypeAudio,
		Status:      MediaStatusProcessing,
		Bucket:      "compliance-training-audio",
		ObjectKey:   fmt.Sprintf("audio/%s/%s.mp3", module.ID.String(), content.ID.String()),
		MimeType:    "audio/mpeg",
		VoiceModel:  "de_DE-thorsten-high",
		Language:    "de",
		GeneratedBy: "tts_piper",
	}

	if err := g.store.CreateMedia(ctx, media); err != nil {
		return nil, fmt.Errorf("failed to create media record: %w", err)
	}

	// Call TTS service
	ttsResp, err := g.ttsClient.Synthesize(ctx, &TTSSynthesizeRequest{
		Text:      content.ContentBody,
		Language:  "de",
		Voice:     "thorsten-high",
		ModuleID:  module.ID.String(),
		ContentID: content.ID.String(),
	})

	if err != nil {
		g.store.UpdateMediaStatus(ctx, media.ID, MediaStatusFailed, 0, 0, err.Error())
		return nil, fmt.Errorf("TTS synthesis failed: %w", err)
	}

	// Update media record
	media.Status = MediaStatusCompleted
	media.FileSizeBytes = ttsResp.SizeBytes
	media.DurationSeconds = ttsResp.DurationSeconds
	media.ObjectKey = ttsResp.ObjectKey
	media.Bucket = ttsResp.Bucket

	g.store.UpdateMediaStatus(ctx, media.ID, MediaStatusCompleted, ttsResp.SizeBytes, ttsResp.DurationSeconds, "")

	// Audit log
	g.store.LogAction(ctx, &AuditLogEntry{
		TenantID:   module.TenantID,
		Action:     AuditAction("audio_generated"),
		EntityType: AuditEntityModule,
		EntityID:   &module.ID,
		Details: map[string]interface{}{
			"module_code":      module.ModuleCode,
			"media_id":         media.ID.String(),
			"duration_seconds": ttsResp.DurationSeconds,
			"size_bytes":       ttsResp.SizeBytes,
		},
	})

	return media, nil
}

// VideoScript represents a structured presentation script
type VideoScript struct {
	Title    string               `json:"title"`
	Sections []VideoScriptSection `json:"sections"`
}

// VideoScriptSection is one slide in the presentation
type VideoScriptSection struct {
	Heading      string   `json:"heading"`
	Text         string   `json:"text"`
	BulletPoints []string `json:"bullet_points"`
}

// GenerateVideoScript generates a structured video script from module content via LLM
func (g *ContentGenerator) GenerateVideoScript(ctx context.Context, module TrainingModule) (*VideoScript, error) {
	content, err := g.store.GetPublishedContent(ctx, module.ID)
	if err != nil {
		return nil, fmt.Errorf("failed to get content: %w", err)
	}
	if content == nil {
		return nil, fmt.Errorf("no published content for module %s", module.ModuleCode)
	}

	prompt := fmt.Sprintf(`Erstelle ein strukturiertes Folien-Script fuer eine Praesentations-Video-Schulung.

**Modul:** %s — %s
**Inhalt:**
%s

Erstelle 5-8 Folien. Jede Folie hat:
- heading: Kurze Ueberschrift (max 60 Zeichen)
- text: Erklaerungstext (1-2 Saetze)
- bullet_points: 2-4 Kernpunkte

Antworte NUR mit einem JSON-Objekt in diesem Format:
{
  "title": "Titel der Praesentation",
  "sections": [
    {
      "heading": "Folienueberschrift",
      "text": "Erklaerungstext fuer diese Folie.",
      "bullet_points": ["Punkt 1", "Punkt 2", "Punkt 3"]
    }
  ]
}`, module.ModuleCode, module.Title, truncateText(content.ContentBody, 3000))

	resp, err := g.registry.Chat(ctx, &llm.ChatRequest{
		Messages: []llm.Message{
			{Role: "system", Content: "Du bist ein Experte fuer Compliance-Schulungspraesentationen. Erstelle strukturierte Folien-Scripts als JSON. Antworte NUR mit dem JSON-Objekt."},
			{Role: "user", Content: prompt},
		},
		Temperature: 0.15,
		MaxTokens:   4096,
	})
	if err != nil {
		return nil, fmt.Errorf("LLM video script generation failed: %w", err)
	}

	// Parse JSON response
	var script VideoScript
	jsonStr := resp.Message.Content
	start := strings.Index(jsonStr, "{")
	end := strings.LastIndex(jsonStr, "}")
	if start >= 0 && end > start {
		jsonStr = jsonStr[start : end+1]
	}

	if err := json.Unmarshal([]byte(jsonStr), &script); err != nil {
		return nil, fmt.Errorf("failed to parse video script JSON: %w", err)
	}

	if len(script.Sections) == 0 {
		return nil, fmt.Errorf("video script has no sections")
	}

	return &script, nil
}

// GenerateVideo generates a presentation video for a module
func (g *ContentGenerator) GenerateVideo(ctx context.Context, module TrainingModule) (*TrainingMedia, error) {
	if g.ttsClient == nil {
		return nil, fmt.Errorf("TTS client not configured")
	}

	// Check for published audio, generate if missing
	audio, _ := g.store.GetPublishedAudio(ctx, module.ID)
	if audio == nil {
		// Try to generate audio first
		var err error
		audio, err = g.GenerateAudio(ctx, module)
		if err != nil {
			return nil, fmt.Errorf("audio generation required but failed: %w", err)
		}
		// Auto-publish the audio
		g.store.PublishMedia(ctx, audio.ID, true)
	}

	// Generate video script via LLM
	script, err := g.GenerateVideoScript(ctx, module)
	if err != nil {
		return nil, fmt.Errorf("video script generation failed: %w", err)
	}

	// Create media record
	media := &TrainingMedia{
		ModuleID:    module.ID,
		MediaType:   MediaTypeVideo,
		Status:      MediaStatusProcessing,
		Bucket:      "compliance-training-video",
		ObjectKey:   fmt.Sprintf("video/%s/presentation.mp4", module.ID.String()),
		MimeType:    "video/mp4",
		Language:    "de",
		GeneratedBy: "tts_ffmpeg",
	}

	if err := g.store.CreateMedia(ctx, media); err != nil {
		return nil, fmt.Errorf("failed to create media record: %w", err)
	}

	// Build script map for TTS service
	scriptMap := map[string]interface{}{
		"title":       script.Title,
		"module_code": module.ModuleCode,
		"sections":    script.Sections,
	}

	// Call TTS service video generation
	videoResp, err := g.ttsClient.GenerateVideo(ctx, &TTSGenerateVideoRequest{
		Script:         scriptMap,
		AudioObjectKey: audio.ObjectKey,
		ModuleID:       module.ID.String(),
	})

	if err != nil {
		g.store.UpdateMediaStatus(ctx, media.ID, MediaStatusFailed, 0, 0, err.Error())
		return nil, fmt.Errorf("video generation failed: %w", err)
	}

	// Update media record
	media.Status = MediaStatusCompleted
	media.FileSizeBytes = videoResp.SizeBytes
	media.DurationSeconds = videoResp.DurationSeconds
	media.ObjectKey = videoResp.ObjectKey
	media.Bucket = videoResp.Bucket

	g.store.UpdateMediaStatus(ctx, media.ID, MediaStatusCompleted, videoResp.SizeBytes, videoResp.DurationSeconds, "")

	// Audit log
	g.store.LogAction(ctx, &AuditLogEntry{
		TenantID:   module.TenantID,
		Action:     AuditAction("video_generated"),
		EntityType: AuditEntityModule,
		EntityID:   &module.ID,
		Details: map[string]interface{}{
			"module_code":      module.ModuleCode,
			"media_id":         media.ID.String(),
			"duration_seconds": videoResp.DurationSeconds,
			"size_bytes":       videoResp.SizeBytes,
			"slides":           len(script.Sections),
		},
	})

	return media, nil
}

func truncateText(text string, maxLen int) string {
	if len(text) <= maxLen {
		return text
	}
	return text[:maxLen] + "..."
}