package usecase import ( "context" "encoding/json" "fmt" "strings" "time" "github.com/breakpilot/ai-compliance-sdk/internal/llm" ) // LLMQuestionGenerator uses an LLM to create questions from MC metadata // when no pre-defined questions or doc_check_controls exist (Mode B). type LLMQuestionGenerator struct { registry *llm.ProviderRegistry } // NewLLMQuestionGenerator creates a new LLM-based generator. func NewLLMQuestionGenerator(registry *llm.ProviderRegistry) *LLMQuestionGenerator { return &LLMQuestionGenerator{registry: registry} } // llmQuestion is the JSON structure we expect from the LLM. type llmQuestion struct { Question string `json:"question"` PassCriteria []string `json:"pass_criteria"` FailCriteria []string `json:"fail_criteria"` Severity string `json:"severity"` } // GenerateQuestions generates questions for a list of MCs using the LLM. func (g *LLMQuestionGenerator) GenerateQuestions(mcs []MCInfo, regulations []string) ([]Question, error) { if g.registry == nil { return nil, fmt.Errorf("no LLM provider configured") } ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second) defer cancel() var questions []Question qNum := 1 for _, mc := range mcs { prompt := buildPrompt(mc, regulations) resp, err := g.registry.Chat(ctx, &llm.ChatRequest{ Messages: []llm.Message{ {Role: "system", Content: systemPrompt}, {Role: "user", Content: prompt}, }, Temperature: 0.3, MaxTokens: 500, }) if err != nil { // Fallback to deterministic generation questions = append(questions, GenerateFromMC(mc)...) qNum += len(GenerateFromMC(mc)) continue } parsed := parseLLMResponse(resp.Message.Content) for _, lq := range parsed { q := Question{ ID: fmt.Sprintf("Q%d", qNum), MCID: mc.MasterControlID, MCName: mc.CanonicalName, Text: lq.Question, QuestionType: "yes_no", Severity: normalizeSeverity(lq.Severity), Regulation: mc.RegSource, PassCriteria: lq.PassCriteria, FailCriteria: lq.FailCriteria, } questions = append(questions, q) qNum++ } // Cap total questions if qNum > 50 { break } } return questions, nil } const systemPrompt = `Du bist ein Compliance-Experte. Generiere praezise Prueffragen fuer Compliance-Audits. Antworte NUR mit einem JSON-Array. Jedes Element hat: - "question": Eine klare Ja/Nein-Frage auf Deutsch - "pass_criteria": Array mit 1-2 Kriterien fuer "bestanden" - "fail_criteria": Array mit 1-2 Kriterien fuer "nicht bestanden" - "severity": "HIGH", "MEDIUM" oder "LOW" Keine Erklaerungen, nur das JSON-Array.` func buildPrompt(mc MCInfo, regulations []string) string { readable := strings.ReplaceAll(mc.CanonicalName, "_", " ") regStr := strings.Join(regulations, ", ") return fmt.Sprintf( `Master Control: "%s" (%d Atomic Controls) Regulierungen: %s Regulation Source: %s Generiere 1-2 praezise Prueffragen fuer diesen Master Control.`, readable, mc.TotalControls, regStr, mc.RegSource) } func parseLLMResponse(content string) []llmQuestion { content = strings.TrimSpace(content) // Try to find JSON array in the response start := strings.Index(content, "[") end := strings.LastIndex(content, "]") if start >= 0 && end > start { content = content[start : end+1] } var questions []llmQuestion if err := json.Unmarshal([]byte(content), &questions); err != nil { return nil } // Validate var valid []llmQuestion for _, q := range questions { if q.Question != "" && len(q.PassCriteria) > 0 { valid = append(valid, q) } } return valid }