package usecase import ( "fmt" "log" "strings" "golang.org/x/text/cases" "golang.org/x/text/language" ) // Compiler turns Master Controls into audit questionnaires. type Compiler struct { store *Store llmGen *LLMQuestionGenerator } // NewCompiler creates a Compiler with optional LLM generator. func NewCompiler(store *Store, llmGen *LLMQuestionGenerator) *Compiler { return &Compiler{store: store, llmGen: llmGen} } // Compile generates questions for a template. // // Flow (per Plan): // 1. Fetch MCs matching template filters from DB // 2. For each MC: check doc_check_controls → Mode A (deterministic) // 3. For remaining MCs: use LLM → Mode B // 4. For remaining MCs: derive from MC name → Mode A fallback // 5. Template hardcoded questions = absolute fallback if DB returns nothing func (c *Compiler) Compile(tmpl *Template) ([]Question, error) { // 1. Fetch MCs matching the template filters mcs, err := c.store.FetchMCsByFilters(tmpl.MCFilters) if err != nil { log.Printf("usecase: MC fetch failed: %v, falling back to template questions", err) return c.templateFallback(tmpl), nil } if len(mcs) == 0 { // No MCs in DB for these filters → use hardcoded template questions if len(tmpl.Questions) > 0 { return tmpl.Questions, nil } return nil, fmt.Errorf("no Master Controls found for filters %v", tmpl.MCFilters) } // 2. Check for existing doc_check_controls mcIDs := make([]string, len(mcs)) for i, mc := range mcs { mcIDs[i] = mc.MasterControlID } checkQuestions, _ := c.store.FetchCheckQuestions(mcIDs) // 3. Build questions: doc_check → LLM → deterministic var questions []Question var mcsWithoutQuestions []MCInfo qNum := 1 for _, mc := range mcs { // Mode A: existing doc_check_controls if cqs, ok := checkQuestions[mc.MasterControlID]; ok && len(cqs) > 0 { for _, cq := range cqs { questions = append(questions, Question{ ID: fmt.Sprintf("Q%d", qNum), MCID: mc.MasterControlID, MCName: mc.CanonicalName, Text: cq.Question, QuestionType: "yes_no", Severity: normalizeSeverity(cq.Severity), Regulation: mc.RegSource, PassCriteria: splitCriteria(cq.PassCriteria), FailCriteria: splitCriteria(cq.FailCriteria), }) qNum++ } continue } mcsWithoutQuestions = append(mcsWithoutQuestions, mc) } // Mode B: LLM for MCs without doc_check_controls if len(mcsWithoutQuestions) > 0 && c.llmGen != nil { llmQuestions, err := c.llmGen.GenerateQuestions(mcsWithoutQuestions, tmpl.Regulations) if err == nil && len(llmQuestions) > 0 { // Renumber for i := range llmQuestions { llmQuestions[i].ID = fmt.Sprintf("Q%d", qNum) qNum++ } questions = append(questions, llmQuestions...) mcsWithoutQuestions = nil // all handled } else if err != nil { log.Printf("usecase: LLM generation failed: %v, using deterministic fallback", err) } } // Mode A fallback: deterministic derivation for remaining MCs for _, mc := range mcsWithoutQuestions { questions = append(questions, Question{ ID: fmt.Sprintf("Q%d", qNum), MCID: mc.MasterControlID, MCName: mc.CanonicalName, Text: deriveQuestion(mc.CanonicalName), QuestionType: "yes_no", Severity: inferMCSeverity(mc.CanonicalName), Regulation: mc.RegSource, PassCriteria: []string{"Anforderung erfuellt und dokumentiert"}, FailCriteria: []string{"Nicht implementiert oder nicht nachweisbar"}, }) qNum++ if qNum > 50 { break } } // Merge: add template hardcoded questions that cover topics not yet covered if len(tmpl.Questions) > 0 { questions = mergeTemplateQuestions(questions, tmpl.Questions, qNum) } if len(questions) == 0 { return c.templateFallback(tmpl), nil } return questions, nil } // templateFallback returns hardcoded template questions or an error. func (c *Compiler) templateFallback(tmpl *Template) []Question { if len(tmpl.Questions) > 0 { return tmpl.Questions } return nil } // mergeTemplateQuestions adds template questions that aren't already // covered by MC-compiled questions (matched by keyword overlap). func mergeTemplateQuestions(compiled, template []Question, nextNum int) []Question { // Build set of covered MC topics coveredTopics := make(map[string]bool) for _, q := range compiled { if q.MCName != "" { coveredTopics[q.MCName] = true } // Also index key words from the question text for _, w := range extractKeywords(q.Text) { coveredTopics[w] = true } } qNum := nextNum for _, tq := range template { // Check if this template question's topic is already covered keywords := extractKeywords(tq.Text) covered := false for _, kw := range keywords { if coveredTopics[kw] { covered = true break } } if covered { continue } tq.ID = fmt.Sprintf("Q%d", qNum) compiled = append(compiled, tq) qNum++ } return compiled } // extractKeywords pulls significant words from a question for dedup. func extractKeywords(text string) []string { stopwords := map[string]bool{ "ist": true, "hat": true, "gibt": true, "es": true, "ein": true, "eine": true, "der": true, "die": true, "das": true, "den": true, "dem": true, "des": true, "oder": true, "und": true, "fuer": true, "nach": true, "mit": true, "von": true, "zu": true, "auf": true, "in": true, "an": true, "bei": true, "werden": true, "wird": true, "sind": true, "nicht": true, "nur": true, "auch": true, } words := strings.Fields(strings.ToLower(text)) var keywords []string for _, w := range words { w = strings.Trim(w, "?.,;:!\"'()") if len(w) > 3 && !stopwords[w] { keywords = append(keywords, w) } } return keywords } // deriveQuestion generates a human-readable question from an MC name. func deriveQuestion(canonicalName string) string { readable := strings.ReplaceAll(canonicalName, "_", " ") readable = cases.Title(language.German).String(readable) return fmt.Sprintf("Ist '%s' implementiert und dokumentiert?", readable) } // splitCriteria splits a pipe-separated criteria string. func splitCriteria(s string) []string { if s == "" { return nil } parts := strings.Split(s, "|") result := make([]string, 0, len(parts)) for _, p := range parts { p = strings.TrimSpace(p) if p != "" { result = append(result, p) } } if len(result) == 0 { return []string{s} } return result } // normalizeSeverity maps doc_check severity to our format. func normalizeSeverity(s string) string { s = strings.ToUpper(strings.TrimSpace(s)) switch s { case "HIGH", "CRITICAL": return "HIGH" case "MEDIUM": return "MEDIUM" case "LOW": return "LOW" default: return "MEDIUM" } } // inferMCSeverity guesses severity from the MC topic name. func inferMCSeverity(name string) string { high := []string{"encryption", "access_control", "incident", "vulnerability", "authentication", "key_management", "data_breach", "personal_data", "consent", "data_transfer"} for _, h := range high { if strings.Contains(name, h) { return "HIGH" } } return "MEDIUM" }