de808190dd
Build + Deploy / build-admin-compliance (push) Successful in 14s
CI / secret-scan (push) Has been skipped
Build + Deploy / build-backend-compliance (push) Successful in 12s
Build + Deploy / build-ai-sdk (push) Successful in 48s
Build + Deploy / build-developer-portal (push) Successful in 13s
Build + Deploy / build-tts (push) Successful in 17s
Build + Deploy / build-document-crawler (push) Successful in 13s
Build + Deploy / build-dsms-gateway (push) Successful in 12s
Build + Deploy / build-dsms-node (push) Successful in 14s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 16s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m48s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-python-backend (push) Successful in 39s
CI / test-python-document-crawler (push) Successful in 29s
CI / test-python-dsms-gateway (push) Successful in 22s
CI / validate-canonical-controls (push) Successful in 18s
CI / test-go (push) Successful in 45s
Build + Deploy / trigger-orca (push) Successful in 2m21s
Single LLM calls per MC caused 2min+ timeouts. Now batches up to 10 MCs in one prompt with 20s timeout. LLM failure falls through to deterministic derivation gracefully. Proxy timeout increased to 60s. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
173 lines
4.6 KiB
Go
173 lines
4.6 KiB
Go
package usecase
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/breakpilot/ai-compliance-sdk/internal/llm"
|
|
)
|
|
|
|
// LLMQuestionGenerator uses an LLM to create questions from MC metadata
|
|
// when no pre-defined questions or doc_check_controls exist (Mode B).
|
|
type LLMQuestionGenerator struct {
|
|
registry *llm.ProviderRegistry
|
|
}
|
|
|
|
// NewLLMQuestionGenerator creates a new LLM-based generator.
|
|
func NewLLMQuestionGenerator(registry *llm.ProviderRegistry) *LLMQuestionGenerator {
|
|
return &LLMQuestionGenerator{registry: registry}
|
|
}
|
|
|
|
// llmQuestion is the JSON structure we expect from the LLM.
|
|
type llmQuestion struct {
|
|
MCName string `json:"mc_name"`
|
|
Question string `json:"question"`
|
|
PassCriteria []string `json:"pass_criteria"`
|
|
FailCriteria []string `json:"fail_criteria"`
|
|
Severity string `json:"severity"`
|
|
}
|
|
|
|
// maxLLMMCs limits how many MCs we send to the LLM in one batch.
|
|
const maxLLMMCs = 10
|
|
|
|
// GenerateQuestions generates questions for MCs using a single batched LLM call.
|
|
func (g *LLMQuestionGenerator) GenerateQuestions(mcs []MCInfo, regulations []string) ([]Question, error) {
|
|
if g.registry == nil {
|
|
return nil, fmt.Errorf("no LLM provider configured")
|
|
}
|
|
|
|
// Limit batch size
|
|
batch := mcs
|
|
if len(batch) > maxLLMMCs {
|
|
batch = batch[:maxLLMMCs]
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
|
|
defer cancel()
|
|
|
|
prompt := buildBatchPrompt(batch, regulations)
|
|
|
|
resp, err := g.registry.Chat(ctx, &llm.ChatRequest{
|
|
Messages: []llm.Message{
|
|
{Role: "system", Content: systemPrompt},
|
|
{Role: "user", Content: prompt},
|
|
},
|
|
Temperature: 0.3,
|
|
MaxTokens: 2000,
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("LLM call failed: %w", err)
|
|
}
|
|
|
|
parsed := parseLLMResponse(resp.Message.Content)
|
|
if len(parsed) == 0 {
|
|
return nil, fmt.Errorf("LLM returned no valid questions")
|
|
}
|
|
|
|
// Map parsed questions back to MCs
|
|
mcByName := make(map[string]MCInfo)
|
|
for _, mc := range batch {
|
|
mcByName[mc.CanonicalName] = mc
|
|
}
|
|
|
|
var questions []Question
|
|
for _, lq := range parsed {
|
|
mc, ok := mcByName[lq.MCName]
|
|
if !ok {
|
|
// Try fuzzy match
|
|
for name, m := range mcByName {
|
|
if strings.Contains(lq.MCName, name) || strings.Contains(name, lq.MCName) {
|
|
mc = m
|
|
ok = true
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
q := Question{
|
|
Text: lq.Question,
|
|
QuestionType: "yes_no",
|
|
Severity: normalizeSeverity(lq.Severity),
|
|
PassCriteria: lq.PassCriteria,
|
|
FailCriteria: lq.FailCriteria,
|
|
}
|
|
if ok {
|
|
q.MCID = mc.MasterControlID
|
|
q.MCName = mc.CanonicalName
|
|
q.Regulation = mc.RegSource
|
|
}
|
|
questions = append(questions, q)
|
|
}
|
|
|
|
return questions, nil
|
|
}
|
|
|
|
const systemPrompt = `Du bist ein Compliance-Experte. Generiere praezise Prueffragen fuer Compliance-Audits.
|
|
|
|
Antworte NUR mit einem JSON-Array. Jedes Element hat:
|
|
- "mc_name": Der canonical_name des Master Controls (exakt wie im Input)
|
|
- "question": Eine klare Ja/Nein-Frage auf Deutsch
|
|
- "pass_criteria": Array mit 1-2 Kriterien fuer "bestanden"
|
|
- "fail_criteria": Array mit 1-2 Kriterien fuer "nicht bestanden"
|
|
- "severity": "HIGH", "MEDIUM" oder "LOW"
|
|
|
|
Generiere 1 Frage pro Master Control. Keine Erklaerungen, nur das JSON-Array.`
|
|
|
|
func buildBatchPrompt(mcs []MCInfo, regulations []string) string {
|
|
regStr := strings.Join(regulations, ", ")
|
|
|
|
var sb strings.Builder
|
|
sb.WriteString(fmt.Sprintf("Regulierungen: %s\n\nMaster Controls:\n", regStr))
|
|
|
|
for i, mc := range mcs {
|
|
readable := strings.ReplaceAll(mc.CanonicalName, "_", " ")
|
|
sb.WriteString(fmt.Sprintf("%d. mc_name=%q (%d Controls, Quelle: %s)\n",
|
|
i+1, mc.CanonicalName, mc.TotalControls, mc.RegSource))
|
|
_ = readable
|
|
}
|
|
|
|
sb.WriteString("\nGeneriere je 1 Prueffrage pro Master Control.")
|
|
return sb.String()
|
|
}
|
|
|
|
func buildPrompt(mc MCInfo, regulations []string) string {
|
|
readable := strings.ReplaceAll(mc.CanonicalName, "_", " ")
|
|
regStr := strings.Join(regulations, ", ")
|
|
|
|
return fmt.Sprintf(
|
|
`Master Control: "%s" (%d Atomic Controls)
|
|
Regulierungen: %s
|
|
Regulation Source: %s
|
|
|
|
Generiere 1-2 praezise Prueffragen fuer diesen Master Control.`,
|
|
readable, mc.TotalControls, regStr, mc.RegSource)
|
|
}
|
|
|
|
func parseLLMResponse(content string) []llmQuestion {
|
|
content = strings.TrimSpace(content)
|
|
|
|
// Try to find JSON array in the response
|
|
start := strings.Index(content, "[")
|
|
end := strings.LastIndex(content, "]")
|
|
if start >= 0 && end > start {
|
|
content = content[start : end+1]
|
|
}
|
|
|
|
var questions []llmQuestion
|
|
if err := json.Unmarshal([]byte(content), &questions); err != nil {
|
|
return nil
|
|
}
|
|
|
|
// Validate
|
|
var valid []llmQuestion
|
|
for _, q := range questions {
|
|
if q.Question != "" && len(q.PassCriteria) > 0 {
|
|
valid = append(valid, q)
|
|
}
|
|
}
|
|
return valid
|
|
}
|