package iace import ( "context" "encoding/json" "fmt" "strings" "github.com/breakpilot/ai-compliance-sdk/internal/llm" ) // Semantic judgement over RECALL-SAFE dedup candidates (P2 slice 2). DEV-TIME, // propose-only. The deterministic GT wall (proposer_screen.go) has already // removed candidates that would drop recall or that credit different GT entries; // the judge only adds an opinion on whether the survivors are truly the same // hazard, plus a rationale, for the human review queue. It NEVER mutates anything. // // The judge is pluggable behind CandidateJudge so the runtime/tests stay // deterministic (HeuristicJudge) while the dev-time CLI can plug in the // non-deterministic LLM (LLMJudge over the shared llm.ProviderRegistry). const ( VerdictDuplicate = "duplicate" VerdictDistinct = "distinct" VerdictUncertain = "uncertain" ) // JudgedProposal is one candidate with its GT-wall result and the judge's opinion. type JudgedProposal struct { Candidate DedupCandidate `json:"candidate"` Screen ScreenResult `json:"screen"` Verdict string `json:"verdict"` Confidence string `json:"confidence"` Rationale string `json:"rationale"` Judge string `json:"judge"` } // CandidateJudge decides whether two near-duplicate patterns are the same hazard. type CandidateJudge interface { Name() string Judge(ctx context.Context, c DedupCandidate, a, b PatternMatch) (verdict, confidence, rationale string) } // HeuristicJudge is the deterministic default/fallback. It only ever returns "low" // confidence — it is a placeholder for the LLM, and it deliberately punts to // "uncertain" on the hard cases (low text overlap, shared measures) so the queue // makes clear exactly where the LLM earns its keep. type HeuristicJudge struct{} func (HeuristicJudge) Name() string { return "heuristic" } func (HeuristicJudge) Judge(_ context.Context, c DedupCandidate, _, _ PatternMatch) (string, string, string) { switch { case c.ScenarioJaccard >= 0.5 || (c.ZoneJaccard >= 0.5 && c.MeasureJaccard >= 0.5): return VerdictDuplicate, "low", "structural: high scenario, or combined zone+measure, overlap" case c.MeasureJaccard >= 0.99 && c.ZoneJaccard == 0 && c.ScenarioJaccard < 0.3: return VerdictDistinct, "low", "structural: identical measures but no zone/scenario overlap — likely distinct hazards sharing generic measures" default: return VerdictUncertain, "low", "structural signal inconclusive — needs the LLM judge" } } // LLMJudge asks an offline model to make the semantic call. Non-deterministic, so // it lives only in the dev-time tool, never in tests or the runtime. It degrades // to "uncertain" on any transport or parse error — it must never break the run. type LLMJudge struct { Completer LLMCompleter MachineClass string } func (LLMJudge) Name() string { return "llm" } func (j LLMJudge) Judge(ctx context.Context, c DedupCandidate, a, b PatternMatch) (string, string, string) { system, user := BuildJudgePrompt(j.MachineClass, a, b) raw, err := j.Completer.Complete(ctx, system, user) if err != nil { return VerdictUncertain, "low", "LLM error: " + err.Error() } return parseJudgeJSON(raw) } // BuildJudgePrompt is the real LLM artifact — built and unit-tested deterministically // even though the call itself is not. It frames the ISO 12100 same-vs-distinct // question and forces a JSON answer. func BuildJudgePrompt(machineClass string, a, b PatternMatch) (system, user string) { system = "Du bist Sachverstaendiger fuer Maschinensicherheit nach EN ISO 12100. " + "Entscheide, ob zwei generierte Gefaehrdungen fuer DIESE Maschine DIESELBE Gefaehrdung " + "beschreiben (Dublette) oder fachlich VERSCHIEDENE Gefaehrdungen sind, die nur zufaellig " + "dieselben Schutzmassnahmen teilen. Verschieden, wenn Wirkort, Ausloeser oder " + "Schadensmechanismus abweichen — auch bei gleicher Kategorie und gleichen Massnahmen. " + "Antworte AUSSCHLIESSLICH als JSON: " + `{"verdict":"duplicate|distinct|uncertain","confidence":"high|medium|low","rationale":"..."}.` user = fmt.Sprintf(`Maschinenklasse: %s Gefaehrdung A (%s): Name: %s Kategorie: %s Zone: %s Szenario: %s Ausloeser: %s Schaden: %s Massnahmen: %s Gefaehrdung B (%s): Name: %s Kategorie: %s Zone: %s Szenario: %s Ausloeser: %s Schaden: %s Massnahmen: %s Sind A und B dieselbe Gefaehrdung fuer diese Maschine?`, machineClass, a.PatternID, a.PatternName, primaryCat(a), a.ZoneDE, a.ScenarioDE, a.TriggerDE, a.HarmDE, strings.Join(a.SuggestedMeasureIDs, ", "), b.PatternID, b.PatternName, primaryCat(b), b.ZoneDE, b.ScenarioDE, b.TriggerDE, b.HarmDE, strings.Join(b.SuggestedMeasureIDs, ", ")) return system, user } func parseJudgeJSON(raw string) (verdict, confidence, rationale string) { start, end := strings.Index(raw, "{"), strings.LastIndex(raw, "}") if start < 0 || end <= start { return VerdictUncertain, "low", "unparseable LLM output" } var v struct { Verdict string `json:"verdict"` Confidence string `json:"confidence"` Rationale string `json:"rationale"` } if err := json.Unmarshal([]byte(raw[start:end+1]), &v); err != nil { return VerdictUncertain, "low", "unparseable LLM JSON: " + err.Error() } switch v.Verdict { case VerdictDuplicate, VerdictDistinct, VerdictUncertain: default: v.Verdict = VerdictUncertain } if v.Confidence == "" { v.Confidence = "low" } return v.Verdict, v.Confidence, v.Rationale } // LLMCompleter is the minimal text-in/text-out the LLM judge needs. Tests pass a // stub; the dev-time tool passes a registry-backed adapter (NewRegistryCompleter). type LLMCompleter interface { Complete(ctx context.Context, system, user string) (string, error) } type registryCompleter struct { reg *llm.ProviderRegistry model string } // NewRegistryCompleter adapts the shared llm.ProviderRegistry to LLMCompleter so // the proposer can reuse the platform's offline model wiring (e.g. self-hosted qwen). func NewRegistryCompleter(reg *llm.ProviderRegistry, model string) LLMCompleter { return ®istryCompleter{reg: reg, model: model} } func (rc *registryCompleter) Complete(ctx context.Context, system, user string) (string, error) { resp, err := rc.reg.Chat(ctx, &llm.ChatRequest{ Model: rc.model, Messages: []llm.Message{ {Role: "system", Content: system}, {Role: "user", Content: user}, }, Temperature: 0, }) if err != nil { return "", err } return resp.Message.Content, nil }