Files
breakpilot-compliance/ai-compliance-sdk/internal/usecase/compiler.go
T
Benjamin Admin e785b6d695
Build + Deploy / build-admin-compliance (push) Successful in 14s
Build + Deploy / build-developer-portal (push) Successful in 10s
Build + Deploy / build-tts (push) Successful in 11s
Build + Deploy / build-document-crawler (push) Successful in 20s
Build + Deploy / build-dsms-gateway (push) Successful in 13s
Build + Deploy / build-dsms-node (push) Successful in 13s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 18s
Build + Deploy / trigger-orca (push) Successful in 2m26s
Build + Deploy / build-backend-compliance (push) Successful in 13s
Build + Deploy / build-ai-sdk (push) Successful in 11s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m50s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Successful in 43s
CI / test-python-backend (push) Successful in 38s
CI / test-python-document-crawler (push) Successful in 26s
CI / test-python-dsms-gateway (push) Successful in 25s
CI / validate-canonical-controls (push) Successful in 16s
fix(use-case-compiler): compile questions from MCs, not hardcoded
Changes the compile flow to always query Master Controls from DB first:
1. doc_check_controls → Mode A (deterministic)
2. LLM generation via Ollama/Claude → Mode B
3. Derive from MC name → fallback
4. Template hardcoded questions → absolute fallback

Previously, templates with pre-defined questions just returned those
without ever hitting the DB. Now MC-compiled questions take priority
and template questions fill gaps for uncovered topics.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-12 17:34:41 +02:00

251 lines
6.9 KiB
Go

package usecase
import (
"fmt"
"log"
"strings"
"golang.org/x/text/cases"
"golang.org/x/text/language"
)
// Compiler turns Master Controls into audit questionnaires.
type Compiler struct {
store *Store
llmGen *LLMQuestionGenerator
}
// NewCompiler creates a Compiler with optional LLM generator.
func NewCompiler(store *Store, llmGen *LLMQuestionGenerator) *Compiler {
return &Compiler{store: store, llmGen: llmGen}
}
// Compile generates questions for a template.
//
// Flow (per Plan):
// 1. Fetch MCs matching template filters from DB
// 2. For each MC: check doc_check_controls → Mode A (deterministic)
// 3. For remaining MCs: use LLM → Mode B
// 4. For remaining MCs: derive from MC name → Mode A fallback
// 5. Template hardcoded questions = absolute fallback if DB returns nothing
func (c *Compiler) Compile(tmpl *Template) ([]Question, error) {
// 1. Fetch MCs matching the template filters
mcs, err := c.store.FetchMCsByFilters(tmpl.MCFilters)
if err != nil {
log.Printf("usecase: MC fetch failed: %v, falling back to template questions", err)
return c.templateFallback(tmpl), nil
}
if len(mcs) == 0 {
// No MCs in DB for these filters → use hardcoded template questions
if len(tmpl.Questions) > 0 {
return tmpl.Questions, nil
}
return nil, fmt.Errorf("no Master Controls found for filters %v", tmpl.MCFilters)
}
// 2. Check for existing doc_check_controls
mcIDs := make([]string, len(mcs))
for i, mc := range mcs {
mcIDs[i] = mc.MasterControlID
}
checkQuestions, _ := c.store.FetchCheckQuestions(mcIDs)
// 3. Build questions: doc_check → LLM → deterministic
var questions []Question
var mcsWithoutQuestions []MCInfo
qNum := 1
for _, mc := range mcs {
// Mode A: existing doc_check_controls
if cqs, ok := checkQuestions[mc.MasterControlID]; ok && len(cqs) > 0 {
for _, cq := range cqs {
questions = append(questions, Question{
ID: fmt.Sprintf("Q%d", qNum),
MCID: mc.MasterControlID,
MCName: mc.CanonicalName,
Text: cq.Question,
QuestionType: "yes_no",
Severity: normalizeSeverity(cq.Severity),
Regulation: mc.RegSource,
PassCriteria: splitCriteria(cq.PassCriteria),
FailCriteria: splitCriteria(cq.FailCriteria),
})
qNum++
}
continue
}
mcsWithoutQuestions = append(mcsWithoutQuestions, mc)
}
// Mode B: LLM for MCs without doc_check_controls
if len(mcsWithoutQuestions) > 0 && c.llmGen != nil {
llmQuestions, err := c.llmGen.GenerateQuestions(mcsWithoutQuestions, tmpl.Regulations)
if err == nil && len(llmQuestions) > 0 {
// Renumber
for i := range llmQuestions {
llmQuestions[i].ID = fmt.Sprintf("Q%d", qNum)
qNum++
}
questions = append(questions, llmQuestions...)
mcsWithoutQuestions = nil // all handled
} else if err != nil {
log.Printf("usecase: LLM generation failed: %v, using deterministic fallback", err)
}
}
// Mode A fallback: deterministic derivation for remaining MCs
for _, mc := range mcsWithoutQuestions {
questions = append(questions, Question{
ID: fmt.Sprintf("Q%d", qNum),
MCID: mc.MasterControlID,
MCName: mc.CanonicalName,
Text: deriveQuestion(mc.CanonicalName),
QuestionType: "yes_no",
Severity: inferMCSeverity(mc.CanonicalName),
Regulation: mc.RegSource,
PassCriteria: []string{"Anforderung erfuellt und dokumentiert"},
FailCriteria: []string{"Nicht implementiert oder nicht nachweisbar"},
})
qNum++
if qNum > 50 {
break
}
}
// Merge: add template hardcoded questions that cover topics not yet covered
if len(tmpl.Questions) > 0 {
questions = mergeTemplateQuestions(questions, tmpl.Questions, qNum)
}
if len(questions) == 0 {
return c.templateFallback(tmpl), nil
}
return questions, nil
}
// templateFallback returns hardcoded template questions or an error.
func (c *Compiler) templateFallback(tmpl *Template) []Question {
if len(tmpl.Questions) > 0 {
return tmpl.Questions
}
return nil
}
// mergeTemplateQuestions adds template questions that aren't already
// covered by MC-compiled questions (matched by keyword overlap).
func mergeTemplateQuestions(compiled, template []Question, nextNum int) []Question {
// Build set of covered MC topics
coveredTopics := make(map[string]bool)
for _, q := range compiled {
if q.MCName != "" {
coveredTopics[q.MCName] = true
}
// Also index key words from the question text
for _, w := range extractKeywords(q.Text) {
coveredTopics[w] = true
}
}
qNum := nextNum
for _, tq := range template {
// Check if this template question's topic is already covered
keywords := extractKeywords(tq.Text)
covered := false
for _, kw := range keywords {
if coveredTopics[kw] {
covered = true
break
}
}
if covered {
continue
}
tq.ID = fmt.Sprintf("Q%d", qNum)
compiled = append(compiled, tq)
qNum++
}
return compiled
}
// extractKeywords pulls significant words from a question for dedup.
func extractKeywords(text string) []string {
stopwords := map[string]bool{
"ist": true, "hat": true, "gibt": true, "es": true, "ein": true,
"eine": true, "der": true, "die": true, "das": true, "den": true,
"dem": true, "des": true, "oder": true, "und": true, "fuer": true,
"nach": true, "mit": true, "von": true, "zu": true, "auf": true,
"in": true, "an": true, "bei": true, "werden": true, "wird": true,
"sind": true, "nicht": true, "nur": true, "auch": true,
}
words := strings.Fields(strings.ToLower(text))
var keywords []string
for _, w := range words {
w = strings.Trim(w, "?.,;:!\"'()")
if len(w) > 3 && !stopwords[w] {
keywords = append(keywords, w)
}
}
return keywords
}
// deriveQuestion generates a human-readable question from an MC name.
func deriveQuestion(canonicalName string) string {
readable := strings.ReplaceAll(canonicalName, "_", " ")
readable = cases.Title(language.German).String(readable)
return fmt.Sprintf("Ist '%s' implementiert und dokumentiert?", readable)
}
// splitCriteria splits a pipe-separated criteria string.
func splitCriteria(s string) []string {
if s == "" {
return nil
}
parts := strings.Split(s, "|")
result := make([]string, 0, len(parts))
for _, p := range parts {
p = strings.TrimSpace(p)
if p != "" {
result = append(result, p)
}
}
if len(result) == 0 {
return []string{s}
}
return result
}
// normalizeSeverity maps doc_check severity to our format.
func normalizeSeverity(s string) string {
s = strings.ToUpper(strings.TrimSpace(s))
switch s {
case "HIGH", "CRITICAL":
return "HIGH"
case "MEDIUM":
return "MEDIUM"
case "LOW":
return "LOW"
default:
return "MEDIUM"
}
}
// inferMCSeverity guesses severity from the MC topic name.
func inferMCSeverity(name string) string {
high := []string{"encryption", "access_control", "incident", "vulnerability",
"authentication", "key_management", "data_breach", "personal_data",
"consent", "data_transfer"}
for _, h := range high {
if strings.Contains(name, h) {
return "HIGH"
}
}
return "MEDIUM"
}