breakpilot-compliance/ai-compliance-sdk/internal/usecase/compiler_llm.go

package usecase

import (
	"context"
	"encoding/json"
	"fmt"
	"strings"
	"time"

	"github.com/breakpilot/ai-compliance-sdk/internal/llm"
)

// LLMQuestionGenerator uses an LLM to create questions from MC metadata
// when no pre-defined questions or doc_check_controls exist (Mode B).
type LLMQuestionGenerator struct {
	registry *llm.ProviderRegistry
}

// NewLLMQuestionGenerator creates a new LLM-based generator.
func NewLLMQuestionGenerator(registry *llm.ProviderRegistry) *LLMQuestionGenerator {
	return &LLMQuestionGenerator{registry: registry}
}

// llmQuestion is the JSON structure we expect from the LLM.
type llmQuestion struct {
	Question     string   `json:"question"`
	PassCriteria []string `json:"pass_criteria"`
	FailCriteria []string `json:"fail_criteria"`
	Severity     string   `json:"severity"`
}

// GenerateQuestions generates questions for a list of MCs using the LLM.
func (g *LLMQuestionGenerator) GenerateQuestions(mcs []MCInfo, regulations []string) ([]Question, error) {
	if g.registry == nil {
		return nil, fmt.Errorf("no LLM provider configured")
	}

	ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
	defer cancel()

	var questions []Question
	qNum := 1

	for _, mc := range mcs {
		prompt := buildPrompt(mc, regulations)

		resp, err := g.registry.Chat(ctx, &llm.ChatRequest{
			Messages: []llm.Message{
				{Role: "system", Content: systemPrompt},
				{Role: "user", Content: prompt},
			},
			Temperature: 0.3,
			MaxTokens:   500,
		})
		if err != nil {
			// Fallback to deterministic generation
			questions = append(questions, GenerateFromMC(mc)...)
			qNum += len(GenerateFromMC(mc))
			continue
		}

		parsed := parseLLMResponse(resp.Message.Content)
		for _, lq := range parsed {
			q := Question{
				ID:           fmt.Sprintf("Q%d", qNum),
				MCID:         mc.MasterControlID,
				MCName:       mc.CanonicalName,
				Text:         lq.Question,
				QuestionType: "yes_no",
				Severity:     normalizeSeverity(lq.Severity),
				Regulation:   mc.RegSource,
				PassCriteria: lq.PassCriteria,
				FailCriteria: lq.FailCriteria,
			}
			questions = append(questions, q)
			qNum++
		}

		// Cap total questions
		if qNum > 50 {
			break
		}
	}

	return questions, nil
}

const systemPrompt = `Du bist ein Compliance-Experte. Generiere praezise Prueffragen fuer Compliance-Audits.

Antworte NUR mit einem JSON-Array. Jedes Element hat:
- "question": Eine klare Ja/Nein-Frage auf Deutsch
- "pass_criteria": Array mit 1-2 Kriterien fuer "bestanden"
- "fail_criteria": Array mit 1-2 Kriterien fuer "nicht bestanden"
- "severity": "HIGH", "MEDIUM" oder "LOW"

Keine Erklaerungen, nur das JSON-Array.`

func buildPrompt(mc MCInfo, regulations []string) string {
	readable := strings.ReplaceAll(mc.CanonicalName, "_", " ")
	regStr := strings.Join(regulations, ", ")

	return fmt.Sprintf(
		`Master Control: "%s" (%d Atomic Controls)
Regulierungen: %s
Regulation Source: %s

Generiere 1-2 praezise Prueffragen fuer diesen Master Control.`,
		readable, mc.TotalControls, regStr, mc.RegSource)
}

func parseLLMResponse(content string) []llmQuestion {
	content = strings.TrimSpace(content)

	// Try to find JSON array in the response
	start := strings.Index(content, "[")
	end := strings.LastIndex(content, "]")
	if start >= 0 && end > start {
		content = content[start : end+1]
	}

	var questions []llmQuestion
	if err := json.Unmarshal([]byte(content), &questions); err != nil {
		return nil
	}

	// Validate
	var valid []llmQuestion
	for _, q := range questions {
		if q.Question != "" && len(q.PassCriteria) > 0 {
			valid = append(valid, q)
		}
	}
	return valid
}