Files
breakpilot-compliance/ai-compliance-sdk/internal/usecase/compiler_llm.go
T
Benjamin Admin 979fe20ea5
Build + Deploy / build-admin-compliance (push) Successful in 15s
Build + Deploy / build-dsms-node (push) Successful in 14s
CI / branch-name (push) Has been skipped
Build + Deploy / build-backend-compliance (push) Successful in 13s
Build + Deploy / build-ai-sdk (push) Successful in 11s
Build + Deploy / build-developer-portal (push) Successful in 12s
Build + Deploy / build-tts (push) Successful in 17s
Build + Deploy / build-document-crawler (push) Successful in 15s
Build + Deploy / build-dsms-gateway (push) Successful in 11s
CI / test-python-backend (push) Successful in 38s
CI / test-python-document-crawler (push) Successful in 26s
CI / test-python-dsms-gateway (push) Successful in 24s
CI / validate-canonical-controls (push) Successful in 14s
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 16s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m46s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Successful in 44s
Build + Deploy / trigger-orca (push) Successful in 2m16s
fix(use-case-compiler): increase LLM timeout to 45s, reduce batch to 5
Mac Mini M4 needs more time for qwen3:30b. Reduced batch from 10→5
MCs and increased timeout from 20→45s to give LLM a fair chance.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-12 18:02:05 +02:00

173 lines
4.6 KiB
Go

package usecase
import (
"context"
"encoding/json"
"fmt"
"strings"
"time"
"github.com/breakpilot/ai-compliance-sdk/internal/llm"
)
// LLMQuestionGenerator uses an LLM to create questions from MC metadata
// when no pre-defined questions or doc_check_controls exist (Mode B).
type LLMQuestionGenerator struct {
registry *llm.ProviderRegistry
}
// NewLLMQuestionGenerator creates a new LLM-based generator.
func NewLLMQuestionGenerator(registry *llm.ProviderRegistry) *LLMQuestionGenerator {
return &LLMQuestionGenerator{registry: registry}
}
// llmQuestion is the JSON structure we expect from the LLM.
type llmQuestion struct {
MCName string `json:"mc_name"`
Question string `json:"question"`
PassCriteria []string `json:"pass_criteria"`
FailCriteria []string `json:"fail_criteria"`
Severity string `json:"severity"`
}
// maxLLMMCs limits how many MCs we send to the LLM in one batch.
const maxLLMMCs = 5
// GenerateQuestions generates questions for MCs using a single batched LLM call.
func (g *LLMQuestionGenerator) GenerateQuestions(mcs []MCInfo, regulations []string) ([]Question, error) {
if g.registry == nil {
return nil, fmt.Errorf("no LLM provider configured")
}
// Limit batch size
batch := mcs
if len(batch) > maxLLMMCs {
batch = batch[:maxLLMMCs]
}
ctx, cancel := context.WithTimeout(context.Background(), 45*time.Second)
defer cancel()
prompt := buildBatchPrompt(batch, regulations)
resp, err := g.registry.Chat(ctx, &llm.ChatRequest{
Messages: []llm.Message{
{Role: "system", Content: systemPrompt},
{Role: "user", Content: prompt},
},
Temperature: 0.3,
MaxTokens: 2000,
})
if err != nil {
return nil, fmt.Errorf("LLM call failed: %w", err)
}
parsed := parseLLMResponse(resp.Message.Content)
if len(parsed) == 0 {
return nil, fmt.Errorf("LLM returned no valid questions")
}
// Map parsed questions back to MCs
mcByName := make(map[string]MCInfo)
for _, mc := range batch {
mcByName[mc.CanonicalName] = mc
}
var questions []Question
for _, lq := range parsed {
mc, ok := mcByName[lq.MCName]
if !ok {
// Try fuzzy match
for name, m := range mcByName {
if strings.Contains(lq.MCName, name) || strings.Contains(name, lq.MCName) {
mc = m
ok = true
break
}
}
}
q := Question{
Text: lq.Question,
QuestionType: "yes_no",
Severity: normalizeSeverity(lq.Severity),
PassCriteria: lq.PassCriteria,
FailCriteria: lq.FailCriteria,
}
if ok {
q.MCID = mc.MasterControlID
q.MCName = mc.CanonicalName
q.Regulation = mc.RegSource
}
questions = append(questions, q)
}
return questions, nil
}
const systemPrompt = `Du bist ein Compliance-Experte. Generiere praezise Prueffragen fuer Compliance-Audits.
Antworte NUR mit einem JSON-Array. Jedes Element hat:
- "mc_name": Der canonical_name des Master Controls (exakt wie im Input)
- "question": Eine klare Ja/Nein-Frage auf Deutsch
- "pass_criteria": Array mit 1-2 Kriterien fuer "bestanden"
- "fail_criteria": Array mit 1-2 Kriterien fuer "nicht bestanden"
- "severity": "HIGH", "MEDIUM" oder "LOW"
Generiere 1 Frage pro Master Control. Keine Erklaerungen, nur das JSON-Array.`
func buildBatchPrompt(mcs []MCInfo, regulations []string) string {
regStr := strings.Join(regulations, ", ")
var sb strings.Builder
sb.WriteString(fmt.Sprintf("Regulierungen: %s\n\nMaster Controls:\n", regStr))
for i, mc := range mcs {
readable := strings.ReplaceAll(mc.CanonicalName, "_", " ")
sb.WriteString(fmt.Sprintf("%d. mc_name=%q (%d Controls, Quelle: %s)\n",
i+1, mc.CanonicalName, mc.TotalControls, mc.RegSource))
_ = readable
}
sb.WriteString("\nGeneriere je 1 Prueffrage pro Master Control.")
return sb.String()
}
func buildPrompt(mc MCInfo, regulations []string) string {
readable := strings.ReplaceAll(mc.CanonicalName, "_", " ")
regStr := strings.Join(regulations, ", ")
return fmt.Sprintf(
`Master Control: "%s" (%d Atomic Controls)
Regulierungen: %s
Regulation Source: %s
Generiere 1-2 praezise Prueffragen fuer diesen Master Control.`,
readable, mc.TotalControls, regStr, mc.RegSource)
}
func parseLLMResponse(content string) []llmQuestion {
content = strings.TrimSpace(content)
// Try to find JSON array in the response
start := strings.Index(content, "[")
end := strings.LastIndex(content, "]")
if start >= 0 && end > start {
content = content[start : end+1]
}
var questions []llmQuestion
if err := json.Unmarshal([]byte(content), &questions); err != nil {
return nil
}
// Validate
var valid []llmQuestion
for _, q := range questions {
if q.Question != "" && len(q.PassCriteria) > 0 {
valid = append(valid, q)
}
}
return valid
}