package usecase import ( "context" "encoding/json" "fmt" "strings" "time" "github.com/breakpilot/ai-compliance-sdk/internal/llm" ) // LLMQuestionGenerator uses an LLM to create questions from MC metadata // when no pre-defined questions or doc_check_controls exist (Mode B). type LLMQuestionGenerator struct { registry *llm.ProviderRegistry } // NewLLMQuestionGenerator creates a new LLM-based generator. func NewLLMQuestionGenerator(registry *llm.ProviderRegistry) *LLMQuestionGenerator { return &LLMQuestionGenerator{registry: registry} } // llmQuestion is the JSON structure we expect from the LLM. type llmQuestion struct { MCName string `json:"mc_name"` Question string `json:"question"` PassCriteria []string `json:"pass_criteria"` FailCriteria []string `json:"fail_criteria"` Severity string `json:"severity"` } // maxLLMMCs limits how many MCs we send to the LLM in one batch. const maxLLMMCs = 5 // GenerateQuestions generates questions for MCs using a single batched LLM call. func (g *LLMQuestionGenerator) GenerateQuestions(mcs []MCInfo, regulations []string) ([]Question, error) { if g.registry == nil { return nil, fmt.Errorf("no LLM provider configured") } // Limit batch size batch := mcs if len(batch) > maxLLMMCs { batch = batch[:maxLLMMCs] } ctx, cancel := context.WithTimeout(context.Background(), 45*time.Second) defer cancel() prompt := buildBatchPrompt(batch, regulations) resp, err := g.registry.Chat(ctx, &llm.ChatRequest{ Messages: []llm.Message{ {Role: "system", Content: systemPrompt}, {Role: "user", Content: prompt}, }, Temperature: 0.3, MaxTokens: 2000, }) if err != nil { return nil, fmt.Errorf("LLM call failed: %w", err) } parsed := parseLLMResponse(resp.Message.Content) if len(parsed) == 0 { return nil, fmt.Errorf("LLM returned no valid questions") } // Map parsed questions back to MCs mcByName := make(map[string]MCInfo) for _, mc := range batch { mcByName[mc.CanonicalName] = mc } var questions []Question for _, lq := range parsed { mc, ok := mcByName[lq.MCName] if !ok { // Try fuzzy match for name, m := range mcByName { if strings.Contains(lq.MCName, name) || strings.Contains(name, lq.MCName) { mc = m ok = true break } } } q := Question{ Text: lq.Question, QuestionType: "yes_no", Severity: normalizeSeverity(lq.Severity), PassCriteria: lq.PassCriteria, FailCriteria: lq.FailCriteria, } if ok { q.MCID = mc.MasterControlID q.MCName = mc.CanonicalName q.Regulation = mc.RegSource } questions = append(questions, q) } return questions, nil } const systemPrompt = `Du bist ein Compliance-Experte. Generiere praezise Prueffragen fuer Compliance-Audits. Antworte NUR mit einem JSON-Array. Jedes Element hat: - "mc_name": Der canonical_name des Master Controls (exakt wie im Input) - "question": Eine klare Ja/Nein-Frage auf Deutsch - "pass_criteria": Array mit 1-2 Kriterien fuer "bestanden" - "fail_criteria": Array mit 1-2 Kriterien fuer "nicht bestanden" - "severity": "HIGH", "MEDIUM" oder "LOW" Generiere 1 Frage pro Master Control. Keine Erklaerungen, nur das JSON-Array.` func buildBatchPrompt(mcs []MCInfo, regulations []string) string { regStr := strings.Join(regulations, ", ") var sb strings.Builder sb.WriteString(fmt.Sprintf("Regulierungen: %s\n\nMaster Controls:\n", regStr)) for i, mc := range mcs { readable := strings.ReplaceAll(mc.CanonicalName, "_", " ") sb.WriteString(fmt.Sprintf("%d. mc_name=%q (%d Controls, Quelle: %s)\n", i+1, mc.CanonicalName, mc.TotalControls, mc.RegSource)) _ = readable } sb.WriteString("\nGeneriere je 1 Prueffrage pro Master Control.") return sb.String() } func buildPrompt(mc MCInfo, regulations []string) string { readable := strings.ReplaceAll(mc.CanonicalName, "_", " ") regStr := strings.Join(regulations, ", ") return fmt.Sprintf( `Master Control: "%s" (%d Atomic Controls) Regulierungen: %s Regulation Source: %s Generiere 1-2 praezise Prueffragen fuer diesen Master Control.`, readable, mc.TotalControls, regStr, mc.RegSource) } func parseLLMResponse(content string) []llmQuestion { content = strings.TrimSpace(content) // Try to find JSON array in the response start := strings.Index(content, "[") end := strings.LastIndex(content, "]") if start >= 0 && end > start { content = content[start : end+1] } var questions []llmQuestion if err := json.Unmarshal([]byte(content), &questions); err != nil { return nil } // Validate var valid []llmQuestion for _, q := range questions { if q.Question != "" && len(q.PassCriteria) > 0 { valid = append(valid, q) } } return valid }