Merge pull request 'feat(ai-sdk): IACE hazard-engine quality + offline proposer (Session 4)' (#2) from feat/iace-gt-warewashing into main
CI / detect-changes (push) Successful in 10s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Successful in 7s
CI / validate-canonical-controls (push) Successful in 8s
CI / loc-budget (push) Successful in 21s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Successful in 1m1s
CI / iace-gt-coverage (push) Successful in 19s
CI / test-python-backend (push) Successful in 24s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped

This commit is contained in:
pilotadmin
2026-06-26 11:48:09 +02:00
26 changed files with 1591 additions and 78 deletions
+3 -1
View File
@@ -34,6 +34,8 @@ func main() {
cmdEcho(os.Args[2:])
case "hierarchy":
cmdHierarchy(os.Args[2:])
case "propose":
cmdPropose(os.Args[2:])
default:
usage()
os.Exit(2)
@@ -41,7 +43,7 @@ func main() {
}
func usage() {
fmt.Fprintln(os.Stderr, "Usage: iace-audit <reachability|consistency|vocabulary|echo|hierarchy> [args]")
fmt.Fprintln(os.Stderr, "Usage: iace-audit <reachability|consistency|vocabulary|echo|hierarchy|propose> [args]")
}
func cmdReachability(_ []string) {
+188
View File
@@ -0,0 +1,188 @@
package main
import (
"context"
"encoding/json"
"fmt"
"os"
"strconv"
"strings"
"github.com/breakpilot/ai-compliance-sdk/internal/iace"
"github.com/breakpilot/ai-compliance-sdk/internal/iace/audit"
"github.com/breakpilot/ai-compliance-sdk/internal/llm"
)
type narrativeInput struct {
MachineType string `json:"machine_type"`
Narrative string `json:"narrative"`
MachineTypes []string `json:"machine_types,omitempty"`
}
// cmdPropose — Method P: offline dedup-candidate proposer.
//
// iace-audit propose <narrative.json> [<ground-truth.json>]
//
// Detect near-duplicate patterns, screen survivors against a ground truth (if
// given), judge them (heuristic by default, LLM when enabled), and write the
// human-review queue to audit-reports/proposals.{md,json}. Propose-only — it
// writes a report and never mutates the pattern library.
//
// Env:
//
// IACE_PROPOSE_THRESHOLD candidate score threshold (default 0.30)
// IACE_PROPOSE_LLM=1 use the offline LLM judge instead of the heuristic
// OLLAMA_URL ollama base URL (default http://localhost:11434)
// SELF_HOSTED_LLM_MODEL model name (default qwen2.5:32b-instruct)
func cmdPropose(args []string) {
if len(args) < 1 {
fmt.Fprintln(os.Stderr, "propose: usage: iace-audit propose <narrative.json> [<ground-truth.json>]")
os.Exit(2)
}
var in narrativeInput
must(readJSONFile(args[0], &in))
if in.Narrative == "" {
fmt.Fprintln(os.Stderr, "propose: narrative is empty")
os.Exit(2)
}
var gt *iace.GroundTruth
if len(args) >= 2 {
var g iace.GroundTruth
must(readJSONFile(args[1], &g))
gt = &g
}
threshold := envFloat("IACE_PROPOSE_THRESHOLD", 0.30)
hazards, mits, fired := iace.BuildProposerInput(in.Narrative, in.MachineType, in.MachineTypes)
candidates := iace.FindDedupCandidates(fired, threshold)
byID := make(map[string]iace.PatternMatch, len(fired))
for _, pm := range fired {
byID[pm.PatternID] = pm
}
judge := selectJudge(in.MachineType)
ctx := context.Background()
var proposals []iace.JudgedProposal
blocked := 0
for _, c := range candidates {
var sr iace.ScreenResult
if gt != nil {
sr = iace.ScreenSupersession(gt, hazards, mits, c.KeepHazardName, c.DropName)
if sr.RecallAfter < sr.RecallBefore || sr.DistinctGT {
blocked++
continue
}
}
v, conf, rat := judge.Judge(ctx, c, byID[c.KeepPattern], byID[c.DropPattern])
proposals = append(proposals, iace.JudgedProposal{
Candidate: c, Screen: sr, Verdict: v, Confidence: conf, Rationale: rat, Judge: judge.Name(),
})
}
writeText("audit-reports/proposals.md", iace.RenderProposalQueue(in.MachineType, proposals))
writeJSON("audit-reports/proposals.json", proposals)
// Type 2: foreign-framing candidates (zone terms with no narrative echo).
framing := iace.FindFramingCandidates(fired, in.Narrative, envFloat("IACE_FRAMING_MIN_ORPHAN", 0.6))
writeText("audit-reports/framing.md", iace.RenderFramingQueue(in.MachineType, framing))
writeJSON("audit-reports/framing.json", framing)
// Type 3: vocab->tag proposals (unknown narrative tokens that pattern text
// names as a whole word, with a dominant shared required tag).
vocab := audit.RunVocabulary(map[string]any{"narrative": in.Narrative})
var vgaps []audit.DictionarySuggestion
for _, s := range vocab.SuggestedDictionaryEntries {
if len(s.SuggestedTags) > 0 {
vgaps = append(vgaps, s)
}
}
writeText("audit-reports/vocab.md", renderVocabQueue(in.MachineType, vgaps))
writeJSON("audit-reports/vocab.json", vgaps)
// Type 4: coverage blind-spots (empty ISO 12100 groups A-G) + LLM expansion.
gaps := iace.FindCoverageGaps(hazards)
var missing []iace.MissingHazard
if lj, ok := judge.(iace.LLMJudge); ok {
missing = iace.ProposeMissingHazards(ctx, lj.Completer, in.MachineType, in.Narrative, hazards, gaps)
}
writeText("audit-reports/coverage.md", iace.RenderCoverageQueue(in.MachineType, gaps, missing))
writeJSON("audit-reports/coverage.json", gaps)
printSummary("Method P — Dedup Proposer ("+judge.Name()+")", map[string]int{
"fired_patterns": len(fired),
"candidates": len(candidates),
"in_queue": len(proposals),
"gt_blocked": blocked,
"framing_flags": len(framing),
"vocab_gaps": len(vgaps),
"coverage_gaps": len(gaps),
})
if gt == nil {
fmt.Fprintln(os.Stderr, "note: no ground truth provided — GT wall NOT applied (candidates not recall-screened)")
}
}
func selectJudge(machineClass string) iace.CandidateJudge {
if os.Getenv("IACE_PROPOSE_LLM") != "1" {
return iace.HeuristicJudge{}
}
base := envStr("OLLAMA_URL", "http://localhost:11434")
model := envStr("SELF_HOSTED_LLM_MODEL", "qwen2.5:32b-instruct")
reg := llm.NewProviderRegistry("ollama", "")
reg.Register(llm.NewOllamaAdapter(base, model))
fmt.Printf("using LLM judge (ollama %s, model %s)\n", base, model)
return iace.LLMJudge{Completer: iace.NewRegistryCompleter(reg, model), MachineClass: machineClass}
}
func readJSONFile(path string, v any) error {
raw, err := os.ReadFile(path)
if err != nil {
return err
}
return json.Unmarshal(raw, v)
}
func writeText(path, content string) {
_ = os.MkdirAll("audit-reports", 0o755)
if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
fmt.Fprintln(os.Stderr, "warn: could not write", path, err)
return
}
fmt.Println("→ wrote", path)
}
func envStr(key, def string) string {
if v := os.Getenv(key); v != "" {
return v
}
return def
}
func envFloat(key string, def float64) float64 {
if v := os.Getenv(key); v != "" {
if f, err := strconv.ParseFloat(v, 64); err == nil {
return f
}
}
return def
}
func renderVocabQueue(machine string, entries []audit.DictionarySuggestion) string {
var b strings.Builder
fmt.Fprintf(&b, "# Vocab→tag review queue — %s\n\n", machine)
fmt.Fprintf(&b, "%d unknown token(s) appear in pattern text but map to no dictionary tag. Propose-only — a human (or the LLM) confirms the tag, then adds a keyword_dictionary entry and pins a GT case.\n\n", len(entries))
for i, s := range entries {
tag := "<tag>"
if len(s.SuggestedTags) > 0 {
tag = s.SuggestedTags[0]
}
fmt.Fprintf(&b, "## %d. \"%s\" → suggested tag(s): %s\n", i+1, s.Token, strings.Join(s.SuggestedTags, ", "))
fmt.Fprintf(&b, "- named by %d pattern(s): %s\n", len(s.PatternIDs), strings.Join(s.PatternIDs, ", "))
fmt.Fprintf(&b, "- suggested action: add keyword_dictionary entry {%q → %s} so narratives mentioning it trigger those patterns; human confirms\n\n", s.Token, tag)
}
return b.String()
}
@@ -36,6 +36,10 @@ type DictionarySuggestion struct {
Token string `json:"token"`
Field string `json:"field"`
PatternIDs []string `json:"pattern_ids"`
// SuggestedTags are the RequiredComponentTags shared by the naming patterns,
// ranked by frequency — the candidate tags a keyword_dictionary entry for this
// token would emit so narratives mentioning it can trigger those patterns.
SuggestedTags []string `json:"suggested_tags,omitempty"`
}
type VocabularyReport struct {
@@ -66,14 +66,19 @@ func runVocabulary(form map[string]any) VocabularyReport {
// For each unknown token check if any pattern names it
patterns := iace.AllPatterns()
byID := make(map[string]iace.HazardPattern, len(patterns))
for _, p := range patterns {
byID[p.ID] = p
}
for _, tok := range report.UnknownTokens {
hits := patternsMentioning(tok, patterns)
if len(hits) == 0 {
continue
}
report.SuggestedDictionaryEntries = append(report.SuggestedDictionaryEntries, DictionarySuggestion{
Token: tok,
PatternIDs: hits,
Token: tok,
PatternIDs: hits,
SuggestedTags: suggestTagsFor(hits, byID),
})
}
sort.Slice(report.SuggestedDictionaryEntries, func(i, j int) bool {
@@ -129,18 +134,24 @@ func dictTokenHit(tok string, dict map[string]bool) bool {
return false
}
// patternsMentioning returns up to 8 pattern IDs whose scenario/trigger/
// harm/zone text contains the token (case-insensitive substring).
// patternsMentioning returns up to 8 pattern IDs whose scenario/trigger/harm/
// zone text names the token as a WHOLE WORD. Whole-word (not substring) matching
// is essential: a substring match flags common fragments like "stehen" inside
// "entstehen", producing spurious hits and nonsensical tag suggestions.
func patternsMentioning(tok string, patterns []iace.HazardPattern) []string {
tokLower := strings.ToLower(tok)
seen := map[string]bool{}
var out []string
for _, p := range patterns {
hay := strings.ToLower(p.ScenarioDE + " " + p.TriggerDE + " " + p.HarmDE + " " + p.ZoneDE + " " + p.NameDE)
if !strings.Contains(hay, tokLower) {
continue
matched := false
for _, w := range tokenRE.FindAllString(hay, -1) {
if w == tokLower {
matched = true
break
}
}
if seen[p.ID] {
if !matched || seen[p.ID] {
continue
}
seen[p.ID] = true
@@ -151,3 +162,57 @@ func patternsMentioning(tok string, patterns []iace.HazardPattern) []string {
}
return out
}
// suggestTagsFor returns the RequiredComponentTags shared across the naming
// patterns, ranked by how many of them require each tag (ties broken by name),
// top 3. These are the candidate tags a dictionary entry for the token should
// emit so a narrative mentioning the token can trigger those patterns.
func suggestTagsFor(ids []string, byID map[string]iace.HazardPattern) []string {
freq := map[string]int{}
total := 0
for _, id := range ids {
p, ok := byID[id]
if !ok {
continue
}
total++
seen := map[string]bool{}
for _, tag := range p.RequiredComponentTags {
if seen[tag] {
continue
}
seen[tag] = true
freq[tag]++
}
}
if total == 0 {
return nil
}
type tf struct {
tag string
n int
}
ranked := make([]tf, 0, len(freq))
for t, n := range freq {
ranked = append(ranked, tf{t, n})
}
sort.Slice(ranked, func(i, j int) bool {
if ranked[i].n != ranked[j].n {
return ranked[i].n > ranked[j].n
}
return ranked[i].tag < ranked[j].tag
})
// Only suggest a tag shared by >= 40% of the naming patterns. Diffuse tokens
// (common verbs spread across categories) get no dominant tag and are dropped.
var out []string
for _, x := range ranked {
if float64(x.n)/float64(total) < 0.4 {
break
}
out = append(out, x.tag)
if len(out) >= 3 {
break
}
}
return out
}
@@ -0,0 +1,36 @@
package audit
import (
"testing"
"github.com/breakpilot/ai-compliance-sdk/internal/iace"
)
func TestSuggestTagsFor_RanksSharedRequiredTags(t *testing.T) {
byID := map[string]iace.HazardPattern{
"P1": {ID: "P1", RequiredComponentTags: []string{"backflow_risk", "dom_warewashing"}},
"P2": {ID: "P2", RequiredComponentTags: []string{"backflow_risk"}},
"P3": {ID: "P3", RequiredComponentTags: []string{"sharp_edge"}},
}
got := suggestTagsFor([]string{"P1", "P2", "P3"}, byID)
if len(got) == 0 || got[0] != "backflow_risk" {
t.Fatalf("want backflow_risk ranked first (2 patterns), got %v", got)
}
}
func TestSuggestTagsFor_TopThreeStableAlpha(t *testing.T) {
byID := map[string]iace.HazardPattern{
"P1": {ID: "P1", RequiredComponentTags: []string{"d", "b", "a", "c"}},
}
got := suggestTagsFor([]string{"P1"}, byID)
if len(got) != 3 || got[0] != "a" || got[1] != "b" || got[2] != "c" {
t.Fatalf("want stable alpha top-3 [a b c], got %v", got)
}
}
func TestSuggestTagsFor_UnknownPatternIgnored(t *testing.T) {
byID := map[string]iace.HazardPattern{}
if got := suggestTagsFor([]string{"missing"}, byID); len(got) != 0 {
t.Fatalf("want empty for unknown patterns, got %v", got)
}
}
@@ -7,8 +7,6 @@ import (
"path/filepath"
"sort"
"testing"
"github.com/google/uuid"
)
// TestKistenhub_GTCoverage runs the Kistenhubgeraet ground truth (37 entries)
@@ -110,65 +108,6 @@ func TestKistenhub_GTCoverage(t *testing.T) {
// patternsToHazardsAndMitigations converts a pattern match output into the
// Hazard/Mitigation shapes that CompareBenchmark expects. Mirrors what
// iace_handler_init.go does in production but without DB writes.
func patternsToHazardsAndMitigations(out *MatchOutput) ([]Hazard, []Mitigation) {
hazards := make([]Hazard, 0, len(out.MatchedPatterns))
patternToHazard := make(map[string]uuid.UUID, len(out.MatchedPatterns))
for _, pm := range out.MatchedPatterns {
cat := ""
if len(pm.HazardCats) > 0 {
cat = pm.HazardCats[0]
}
zone := pm.ZoneDE
lifecycle := ""
if len(pm.ApplicableLifecycles) > 0 {
lifecycle = pm.ApplicableLifecycles[0]
}
h := Hazard{
ID: uuid.New(),
Name: pm.ScenarioDE,
Category: cat,
Description: pm.ScenarioDE,
Scenario: pm.ScenarioDE,
TriggerEvent: pm.TriggerDE,
PossibleHarm: pm.HarmDE,
AffectedPerson: pm.AffectedDE,
HazardousZone: zone,
LifecyclePhase: lifecycle,
}
if h.Name == "" {
h.Name = pm.PatternName
}
hazards = append(hazards, h)
patternToHazard[pm.PatternID] = h.ID
}
measureNames := make(map[string]string)
for _, m := range GetProtectiveMeasureLibrary() {
measureNames[m.ID] = m.Name
}
var mitigations []Mitigation
for _, sm := range out.SuggestedMeasures {
name := measureNames[sm.MeasureID]
if name == "" {
name = sm.MeasureID
}
for _, srcPattern := range sm.SourcePatterns {
hid, ok := patternToHazard[srcPattern]
if !ok {
continue
}
mitigations = append(mitigations, Mitigation{
ID: uuid.New(),
HazardID: hid,
Name: name,
})
}
}
return hazards, mitigations
}
func abbrev(s string, max int) string {
if len(s) <= max {
return s
@@ -1,6 +1,7 @@
package iace
import (
"context"
"encoding/json"
"os"
"path/filepath"
@@ -45,7 +46,7 @@ var warewashingCyberCategories = map[string]bool{
// warewashingEngineOutput runs the production chain and returns the filtered
// hazards/mitigations the user would see for the UC-M.
func warewashingEngineOutput() ([]Hazard, []Mitigation, int) {
func warewashingEngineOutput() ([]Hazard, []Mitigation, []PatternMatch) {
res := ParseNarrative(warewashingNarrative, "Gewerbliche Untertisch-Geschirrspuelmaschine (vernetzt)")
var compIDs, compNames []string
@@ -94,7 +95,7 @@ func warewashingEngineOutput() ([]Hazard, []Mitigation, int) {
filtered := *out
filtered.MatchedPatterns = kept
hazards, mitigations := patternsToHazardsAndMitigations(&filtered)
return hazards, mitigations, len(kept)
return hazards, mitigations, kept
}
func TestWarewashing_GTCoverage(t *testing.T) {
@@ -119,8 +120,8 @@ func TestWarewashing_GTCoverage(t *testing.T) {
t.Logf("Parsed components: %v", cn)
}
hazards, mitigations, nPatterns := warewashingEngineOutput()
t.Logf("Engine: %d patterns kept (relevance+cyber filter) -> %d hazards", nPatterns, len(hazards))
hazards, mitigations, keptPatterns := warewashingEngineOutput()
t.Logf("Engine: %d patterns kept (relevance+cyber filter) -> %d hazards", len(keptPatterns), len(hazards))
result := CompareBenchmark(&gt, hazards, mitigations)
precision := 0.0
@@ -180,3 +181,57 @@ func TestWarewashing_GTCoverage(t *testing.T) {
t.Errorf("warewashing recall below 40%% floor: %.1f%%", result.CoverageScore*100)
}
}
// TestWarewashing_DedupProposer exercises the offline dedup-candidate proposer
// end-to-end on the real warewashing engine output: detect candidates, screen
// each against the GT, and log the human-review queue. It asserts the WALL is
// self-consistent — a PASS verdict may never coincide with a recall drop.
func TestWarewashing_DedupProposer(t *testing.T) {
raw, err := os.ReadFile(filepath.Join("testdata", "ground_truth_warewashing.json"))
if err != nil {
t.Fatalf("read GT: %v", err)
}
var gt GroundTruth
if err := json.Unmarshal(raw, &gt); err != nil {
t.Fatalf("parse GT: %v", err)
}
hazards, mits, kept := warewashingEngineOutput()
byID := map[string]PatternMatch{}
for _, pm := range kept {
byID[pm.PatternID] = pm
}
// 0.25 is a deliberately permissive candidate threshold: the proposer is meant
// to over-surface, because the deterministic GT wall below (and a human, and the
// LLM judge) is the precision filter — not the detector.
candidates := FindDedupCandidates(kept, 0.25)
t.Logf("Proposer: %d dedup candidate(s) from %d fired patterns", len(candidates), len(kept))
// Deterministic judge in the test; the dev-time CLI swaps in LLMJudge.
judge := HeuristicJudge{}
var judged []JudgedProposal
blocked := 0
for _, c := range candidates {
sr := ScreenSupersession(&gt, hazards, mits, c.KeepHazardName, c.DropName)
switch {
case sr.RecallAfter < sr.RecallBefore:
t.Logf("[BLOCK recall-load-bearing] keep %s / drop %s", c.KeepPattern, c.DropPattern)
blocked++
case sr.DistinctGT:
t.Logf("[BLOCK distinct GT %s vs %s] keep %s / drop %s", sr.KeepGT, sr.DropGT, c.KeepPattern, c.DropPattern)
blocked++
default:
if !sr.Safe {
t.Errorf("RECALL-SAFE branch but ScreenResult.Safe=false for drop %s", c.DropPattern)
}
v, conf, rat := judge.Judge(context.Background(), c, byID[c.KeepPattern], byID[c.DropPattern])
judged = append(judged, JudgedProposal{
Candidate: c, Screen: sr, Verdict: v, Confidence: conf, Rationale: rat, Judge: judge.Name(),
})
}
}
t.Logf("\n%s", RenderProposalQueue("Gewerbliche Geschirrspuelmaschine (vernetzt)", judged))
t.Logf("Proposer summary: %d candidate(s) in queue (judge=%s), %d BLOCKED by the GT wall — propose-only, nothing auto-applied",
len(judged), judge.Name(), blocked)
}
@@ -0,0 +1,50 @@
package iace
import "sort"
// EN ISO 12100 hazard-group ordering for the hazard log. Without it the log is
// returned in pattern-firing order, which reads as a jumble. This groups the
// hazards top-down by type (A. Mechanisch, B. Elektrisch, C. Thermisch, …),
// matching the frontend CATEGORY_LABELS.
var isoCategoryRank = map[string]int{
// A. Mechanisch
"mechanical_hazard": 10, "mechanical": 10, "maintenance_hazard": 11,
// B. Elektrisch
"electrical_hazard": 20, "electrical": 20, "emc_hazard": 21,
// C. Thermisch
"thermal_hazard": 30, "thermal": 30, "high_temperature": 31, "fire_explosion": 32,
// D. Pneumatik / Hydraulik
"pneumatic_hydraulic": 40,
// E. Laerm / Vibration
"noise_hazard": 50, "noise_vibration": 50, "vibration_hazard": 51,
// F. Ergonomie
"ergonomic_hazard": 60, "ergonomic": 60,
// G. Stoffe / Umwelt
"material_environmental": 70, "chemical_risk": 71, "radiation_hazard": 72,
// H. Software / Steuerung (funktionale Sicherheit)
"software_control": 80, "software_fault": 80, "safety_function_failure": 81,
"configuration_error": 82, "sensor_fault": 83, "hmi_error": 84, "mode_confusion": 85,
"communication_failure": 86, "update_failure": 87,
// I. Cyber / Netzwerk (zur Ordnungs-Vollstaendigkeit; im CE-Log ausgeschlossen)
"unauthorized_access": 90, "firmware_corruption": 91, "cyber_resilience": 92,
"cyber_network": 93, "logging_audit_failure": 94, "sensor_spoofing": 95,
// J. KI-spezifisch
"ai_specific": 100, "ai_misclassification": 100, "false_classification": 100,
"model_drift": 100, "data_poisoning": 100, "unintended_bias": 100,
}
func categoryRank(cat string) int {
if r, ok := isoCategoryRank[cat]; ok {
return r
}
return 999 // unknown categories last
}
// SortHazardsByISO12100 groups hazards by ISO 12100 hazard group. Stable: the
// relative order within a group (creation/priority order from the engine) is
// preserved.
func SortHazardsByISO12100(hazards []Hazard) {
sort.SliceStable(hazards, func(i, j int) bool {
return categoryRank(hazards[i].Category) < categoryRank(hazards[j].Category)
})
}
@@ -157,7 +157,7 @@ func GetGTBremseHazardPatterns() []HazardPattern {
// ════════════════════════════════════════════════════════════════
{
ID: "HP1717", NameDE: "Verletzung durch unvermittelt austretende pneumatische Restenergie", NameEN: "Injury from unexpectedly released pneumatic stored energy",
RequiredComponentTags: []string{"stored_energy"},
RequiredComponentTags: []string{"pneumatic_part"},
RequiredEnergyTags: []string{"pneumatic_pressure"},
GeneratedHazardCats: []string{"mechanical_hazard"},
SuggestedMeasureIDs: []string{"M485", "M534", "M527"},
@@ -375,7 +375,7 @@ func GetSpecificMachinePatterns() []HazardPattern {
// ================================================================
{
ID: "HP753", NameDE: "Thermal Runaway bei Lithium-Batterie", NameEN: "Thermal runaway of lithium battery",
RequiredComponentTags: []string{"stored_energy", "high_temperature"},
RequiredComponentTags: []string{"battery", "high_temperature"},
RequiredEnergyTags: []string{"electrical_energy", "thermal"},
GeneratedHazardCats: []string{"thermal_hazard", "electrical_hazard"},
SuggestedMeasureIDs: []string{"M005", "M141"},
@@ -390,7 +390,7 @@ func GetSpecificMachinePatterns() []HazardPattern {
},
{
ID: "HP754", NameDE: "Ausgasung giftiger Daempfe aus Batterie", NameEN: "Toxic gas emission from battery",
RequiredComponentTags: []string{"stored_energy", "chemical_risk"},
RequiredComponentTags: []string{"battery", "chemical_risk"},
RequiredEnergyTags: []string{},
GeneratedHazardCats: []string{"material_environmental"},
SuggestedMeasureIDs: []string{"M005", "M141"},
@@ -405,7 +405,7 @@ func GetSpecificMachinePatterns() []HazardPattern {
},
{
ID: "HP755", NameDE: "Elektrischer Schlag an Hochvolt-Batteriespeicher", NameEN: "Electric shock from high-voltage battery storage",
RequiredComponentTags: []string{"stored_energy", "electrical_part"},
RequiredComponentTags: []string{"battery", "electrical_part"},
RequiredEnergyTags: []string{"electrical_energy"},
GeneratedHazardCats: []string{"electrical_hazard"},
SuggestedMeasureIDs: []string{"M082", "M141"},
@@ -137,7 +137,7 @@ func GetKeywordDictionary() []KeywordEntry {
{Keywords: []string{"kreiselmaeher", "scheibenmaeher", "maehwerk"}, ExtraTags: []string{"agri_mower"}},
{Keywords: []string{"spruehduese", "spritzduese", "spruehkopf"}, ExtraTags: []string{"spray_nozzle"}},
{Keywords: []string{"galvanikbad", "tauchbad", "beizbad", "chemiebad"}, ExtraTags: []string{"chemical_bath"}},
{Keywords: []string{"batterie", "akku", "akkumulator", "traktionsbatterie"}, ExtraTags: []string{"battery"}},
{Keywords: []string{"batterie", "akku", "akkumulator", "traktionsbatterie", "lithium", "batteriespeicher", "hochvoltbatterie", "lithium-batterie"}, ExtraTags: []string{"battery"}},
{Keywords: []string{"heizelement", "heizpatrone", "heizband"}, ExtraTags: []string{"heating_element"}},
{Keywords: []string{"uv-lampe", "uv-strahler", "uv-c-strahler"}, ExtraTags: []string{"uv_source"}},
{Keywords: []string{"roentgen", "radioaktiv", "strahlenquelle", "gammastrahl", "isotop"}, ExtraTags: []string{"radiation_source"}},
@@ -42,3 +42,29 @@ func guardedLifecycles(p HazardPattern, tagSet map[string]bool) []string {
}
return p.ApplicableLifecycles
}
// Domain-specific supersession.
//
// A generic pattern that fires via a broad tag (e.g. high_temperature) can
// duplicate a domain-specific pattern that describes the same hazard more
// precisely. When the domain is present, the specific pattern wins and the
// generic duplicate is dropped. Scoped to the domain tag, so machines outside
// the domain keep the generic pattern — regression-safe by construction.
//
// HP016 (generic hot surfaces) -> HP2201 (Boiler/Tank/Spuelkammer)
// HP018 (actuator burn) -> HP2201 (same contact-burn hazard)
// HP013 (stored electrical NRG) -> HP144 (residual voltage; HP013's zone is
// framed for Batteriefaecher/USV-Anlagen a
// dishwasher does not have, HP144 is the
// Frequenzumrichter/Zwischenkreis variant)
var genericSupersededByWarewashing = map[string]bool{
"HP016": true,
"HP018": true,
"HP013": true,
}
// supersededByDomainSpecific reports whether a generic pattern is replaced by a
// more precise equivalent that the project's domain already provides.
func supersededByDomainSpecific(p HazardPattern, tagSet map[string]bool) bool {
return tagSet["dom_warewashing"] && genericSupersededByWarewashing[p.ID]
}
@@ -416,6 +416,11 @@ func patternMatches(p HazardPattern, tagSet map[string]bool, input MatchInput) b
return false
}
// Domain-specific supersession (generic duplicate replaced by a precise one).
if supersededByDomainSpecific(p, tagSet) {
return false
}
return true
}
@@ -0,0 +1,143 @@
package iace
import (
"context"
"encoding/json"
"fmt"
"strings"
)
// Coverage blind-spot proposer (P2 slice 6, type 4). DEV-TIME, propose-only.
//
// Deterministic skeleton: which EN ISO 12100 hazard groups (A-G, the classic CE
// groups; H-J are control/CRA and routinely routed elsewhere) did the engine
// leave with ZERO hazards for this machine? An empty group is a structural
// blind-spot signal — the machine may genuinely lack that hazard, or a pattern
// may be missing. The LLM then expands each gap into specific expected-but-missing
// hazards a safety assessor would name, for a human to confirm into a new pattern
// or GT case. The gaps alone are useful without any model.
type isoGroup struct {
Key string
Label string
Cats []string
}
var iso12100Groups = []isoGroup{
{"mechanical", "A. Mechanisch", []string{"mechanical_hazard", "mechanical", "maintenance_hazard"}},
{"electrical", "B. Elektrisch", []string{"electrical_hazard", "electrical", "emc_hazard"}},
{"thermal", "C. Thermisch", []string{"thermal_hazard", "thermal", "high_temperature", "fire_explosion"}},
{"pneumatic_hydraulic", "D. Pneumatik/Hydraulik", []string{"pneumatic_hydraulic"}},
{"noise_vibration", "E. Laerm/Vibration", []string{"noise_hazard", "noise_vibration", "vibration_hazard"}},
{"ergonomic", "F. Ergonomie", []string{"ergonomic_hazard", "ergonomic"}},
{"material", "G. Stoffe/Umwelt", []string{"material_environmental", "chemical_risk", "radiation_hazard"}},
}
// CoverageGap is an ISO 12100 hazard group with no engine hazard.
type CoverageGap struct {
Group string `json:"group"`
Key string `json:"key"`
Note string `json:"note"`
}
// FindCoverageGaps returns the A-G hazard groups that produced zero hazards.
func FindCoverageGaps(hazards []Hazard) []CoverageGap {
present := make(map[string]bool, len(hazards))
for _, h := range hazards {
present[h.Category] = true
}
var gaps []CoverageGap
for _, g := range iso12100Groups {
covered := false
for _, c := range g.Cats {
if present[c] {
covered = true
break
}
}
if !covered {
gaps = append(gaps, CoverageGap{
Group: g.Label, Key: g.Key,
Note: "no engine hazard in this ISO 12100 group — verify the machine truly lacks it, or a pattern is missing",
})
}
}
return gaps
}
// MissingHazard is an LLM-proposed hazard a safety assessor would expect.
type MissingHazard struct {
Group string `json:"group"`
Hazard string `json:"hazard"`
Why string `json:"why"`
}
// ProposeMissingHazards asks the LLM to expand the empty groups into specific
// expected hazards. Returns nil without a completer or on any error — propose-only,
// never breaks the run.
func ProposeMissingHazards(ctx context.Context, completer LLMCompleter, machineClass, narrative string, produced []Hazard, gaps []CoverageGap) []MissingHazard {
if completer == nil || len(gaps) == 0 {
return nil
}
system, user := BuildCoveragePrompt(machineClass, narrative, produced, gaps)
raw, err := completer.Complete(ctx, system, user)
if err != nil {
return nil
}
return parseMissingHazards(raw)
}
// BuildCoveragePrompt frames the "what is missing?" question for the LLM.
func BuildCoveragePrompt(machineClass, narrative string, produced []Hazard, gaps []CoverageGap) (system, user string) {
system = "Du bist Sachverstaendiger fuer Maschinensicherheit nach EN ISO 12100. " +
"Dir werden eine Maschine, die bereits erkannten Gefaehrdungen und Gefaehrdungsgruppen OHNE Eintrag genannt. " +
"Nenne nur Gefaehrdungen, die ein Sachverstaendiger fuer DIESE Maschine ERWARTET, die aber FEHLEN. " +
"Erfinde nichts Maschinenfremdes. Antworte AUSSCHLIESSLICH als JSON-Array: " +
`[{"group":"...","hazard":"...","why":"..."}].`
var have []string
seen := map[string]bool{}
for _, h := range produced {
if h.Category != "" && !seen[h.Category] {
seen[h.Category] = true
have = append(have, h.Category)
}
}
var empty []string
for _, g := range gaps {
empty = append(empty, g.Group)
}
user = fmt.Sprintf("Maschinenklasse: %s\n\nBeschreibung:\n%s\n\nBereits erkannte Kategorien: %s\n\nGruppen OHNE Eintrag (Fokus): %s\n\nWelche erwarteten Gefaehrdungen fehlen?",
machineClass, narrative, strings.Join(have, ", "), strings.Join(empty, ", "))
return system, user
}
func parseMissingHazards(raw string) []MissingHazard {
start, end := strings.Index(raw, "["), strings.LastIndex(raw, "]")
if start < 0 || end <= start {
return nil
}
var out []MissingHazard
if err := json.Unmarshal([]byte(raw[start:end+1]), &out); err != nil {
return nil
}
return out
}
// RenderCoverageQueue renders the deterministic gaps plus any LLM-proposed missing
// hazards as a markdown review queue.
func RenderCoverageQueue(machine string, gaps []CoverageGap, missing []MissingHazard) string {
var b strings.Builder
fmt.Fprintf(&b, "# Coverage blind-spot queue — %s\n\n", machine)
fmt.Fprintf(&b, "%d ISO 12100 group(s) (A-G) have no engine hazard. Propose-only — a human confirms whether the machine truly lacks it or a pattern/GT case is missing.\n\n", len(gaps))
for _, g := range gaps {
fmt.Fprintf(&b, "- **%s** — %s\n", g.Group, g.Note)
}
if len(missing) > 0 {
fmt.Fprintf(&b, "\n## LLM-proposed expected-but-missing hazards (%d)\n\n", len(missing))
for i, m := range missing {
fmt.Fprintf(&b, "%d. [%s] %s\n - why: %s\n", i+1, m.Group, m.Hazard, m.Why)
}
}
return b.String()
}
@@ -0,0 +1,59 @@
package iace
import (
"context"
"strings"
"testing"
)
func TestFindCoverageGaps(t *testing.T) {
hazards := []Hazard{
{Category: "mechanical_hazard"},
{Category: "thermal_hazard"},
{Category: "electrical_hazard"},
{Category: "material_environmental"},
}
gapKeys := map[string]bool{}
for _, g := range FindCoverageGaps(hazards) {
gapKeys[g.Key] = true
}
for _, want := range []string{"pneumatic_hydraulic", "noise_vibration", "ergonomic"} {
if !gapKeys[want] {
t.Errorf("expected gap %s", want)
}
}
for _, notWant := range []string{"mechanical", "thermal", "electrical", "material"} {
if gapKeys[notWant] {
t.Errorf("did not expect gap %s (covered)", notWant)
}
}
}
func TestBuildCoveragePrompt_ContainsContext(t *testing.T) {
produced := []Hazard{{Category: "thermal_hazard"}}
gaps := []CoverageGap{{Group: "F. Ergonomie", Key: "ergonomic"}}
system, user := BuildCoveragePrompt("Geschirrspuelmaschine", "Eine Spuelmaschine mit Tank.", produced, gaps)
if !strings.Contains(system, "EN ISO 12100") || !strings.Contains(system, "JSON") {
t.Errorf("system prompt missing framing")
}
for _, want := range []string{"Geschirrspuelmaschine", "thermal_hazard", "F. Ergonomie", "Spuelmaschine mit Tank"} {
if !strings.Contains(user, want) {
t.Errorf("user prompt missing %q", want)
}
}
}
func TestProposeMissingHazards_ParsesAndDegrades(t *testing.T) {
gaps := []CoverageGap{{Group: "F. Ergonomie", Key: "ergonomic"}}
c := fakeCompleter{out: `Hier: [{"group":"F. Ergonomie","hazard":"Heben schwerer Koerbe","why":"manuelles Beladen"}] fertig`}
got := ProposeMissingHazards(context.Background(), c, "x", "n", nil, gaps)
if len(got) != 1 || got[0].Hazard != "Heben schwerer Koerbe" {
t.Fatalf("parse: got %+v", got)
}
if ProposeMissingHazards(context.Background(), nil, "x", "n", nil, gaps) != nil {
t.Errorf("nil completer must return nil")
}
if ProposeMissingHazards(context.Background(), fakeCompleter{err: context.DeadlineExceeded}, "x", "n", nil, gaps) != nil {
t.Errorf("error must return nil")
}
}
@@ -0,0 +1,152 @@
package iace
import (
"fmt"
"math"
"regexp"
"sort"
"strings"
)
// Offline dedup-candidate proposer (P2, type 1). DEV-TIME ONLY.
//
// It inspects the patterns that fired for one machine and proposes which look
// like duplicates, so a human (later an LLM) can decide a supersession/merge. It
// NEVER mutates the pattern library or the runtime — it only surfaces candidates.
// The deterministic GT screen (ScreenSupersession, proposer_screen.go) is the
// wall that proves a proposal is safe before a human ever sees it.
//
// Detection here is purely structural (category + zone + measure + scenario
// overlap) and therefore reproducible. Two safety rules bake in what P1 taught
// us about the dishwasher review:
// - only patterns with the SAME primary category are ever compared;
// - a pair with DIFFERENT operational states is NEVER proposed, because
// normal-operation and maintenance are legitimately distinct contexts with
// different protective measures (e.g. HP011 vs HP077). Merging them would
// erase the maintenance view.
// DedupCandidate is a proposed near-duplicate pattern pair for one machine class.
type DedupCandidate struct {
KeepPattern string `json:"keep_pattern"` // higher-priority survivor
DropPattern string `json:"drop_pattern"` // supersession target
KeepName string `json:"keep_name"`
KeepHazardName string `json:"keep_hazard_name"` // keep pattern ScenarioDE (for the GT-distinctness screen)
DropName string `json:"drop_name"` // == generated hazard Name (ScenarioDE) of the drop pattern
Category string `json:"category"`
ZoneJaccard float64 `json:"zone_jaccard"`
MeasureJaccard float64 `json:"measure_jaccard"`
ScenarioJaccard float64 `json:"scenario_jaccard"`
Score float64 `json:"score"`
Rationale string `json:"rationale"`
}
// FindDedupCandidates compares the fired patterns pairwise and returns near-dup
// candidates whose combined overlap score meets threshold, deterministically
// ordered (score desc, then drop-pattern id). The combined score weights measure
// overlap highest (shared measures are the strongest duplicate signal), then zone
// and scenario equally.
func FindDedupCandidates(fired []PatternMatch, threshold float64) []DedupCandidate {
var out []DedupCandidate
for i := 0; i < len(fired); i++ {
for j := i + 1; j < len(fired); j++ {
a, b := fired[i], fired[j]
ca := primaryCat(a)
if ca == "" || ca != primaryCat(b) {
continue
}
if !sameOpStateSet(a.OperationalStates, b.OperationalStates) {
continue // legitimate lifecycle variants — never propose a merge
}
zj := tokenJaccard(zoneTokenSet(a.ZoneDE), zoneTokenSet(b.ZoneDE))
mj := tokenJaccard(toSet(a.SuggestedMeasureIDs), toSet(b.SuggestedMeasureIDs))
sj := tokenJaccard(wordTokenSet(a.ScenarioDE), wordTokenSet(b.ScenarioDE))
score := 0.4*mj + 0.3*zj + 0.3*sj
if score < threshold {
continue
}
keep, drop := a, b
if b.Priority > a.Priority {
keep, drop = b, a
}
out = append(out, DedupCandidate{
KeepPattern: keep.PatternID, DropPattern: drop.PatternID,
KeepName: keep.PatternName, KeepHazardName: keep.ScenarioDE, DropName: drop.ScenarioDE,
Category: ca, ZoneJaccard: round2(zj), MeasureJaccard: round2(mj),
ScenarioJaccard: round2(sj), Score: round2(score),
Rationale: fmt.Sprintf(
"same category %q · measure overlap %.0f%% · zone overlap %.0f%% · scenario overlap %.0f%% → keep %s (P%d), supersede %s (P%d)",
ca, mj*100, zj*100, sj*100, keep.PatternID, keep.Priority, drop.PatternID, drop.Priority),
})
}
}
sort.SliceStable(out, func(i, j int) bool {
if out[i].Score != out[j].Score {
return out[i].Score > out[j].Score
}
return out[i].DropPattern < out[j].DropPattern
})
return out
}
func primaryCat(pm PatternMatch) string {
if len(pm.HazardCats) == 0 {
return ""
}
return pm.HazardCats[0]
}
func sameOpStateSet(a, b []string) bool {
sa, sb := toSet(a), toSet(b)
if len(sa) != len(sb) {
return false
}
for k := range sa {
if !sb[k] {
return false
}
}
return true
}
var proposerWordSplit = regexp.MustCompile(`[^\p{L}]+`)
// zoneTokenSet splits a comma-separated zone string into its component terms.
func zoneTokenSet(zone string) map[string]bool {
out := map[string]bool{}
for _, part := range strings.Split(strings.ToLower(zone), ",") {
if t := strings.TrimSpace(part); len([]rune(t)) >= 3 {
out[t] = true
}
}
return out
}
// wordTokenSet tokenises free text into words of length >= 4 (drops connectives).
func wordTokenSet(s string) map[string]bool {
out := map[string]bool{}
for _, w := range proposerWordSplit.Split(strings.ToLower(s), -1) {
if len([]rune(w)) >= 4 {
out[w] = true
}
}
return out
}
func tokenJaccard(a, b map[string]bool) float64 {
if len(a) == 0 && len(b) == 0 {
return 0
}
inter := 0
for k := range a {
if b[k] {
inter++
}
}
union := len(a) + len(b) - inter
if union == 0 {
return 0
}
return float64(inter) / float64(union)
}
func round2(x float64) float64 { return math.Round(x*100) / 100 }
@@ -0,0 +1,67 @@
package iace
import "testing"
func mkPM(id, cat, zone, scenario string, prio int, measures, opstates []string) PatternMatch {
return PatternMatch{
PatternID: id, PatternName: id, Priority: prio,
HazardCats: []string{cat}, ZoneDE: zone, ScenarioDE: scenario,
SuggestedMeasureIDs: measures, OperationalStates: opstates,
}
}
func TestFindDedupCandidates_FindsOverlappingPair(t *testing.T) {
fired := []PatternMatch{
mkPM("HPa", "update_failure", "Steuerung, SPS", "Software-Update der Steuerung scheitert nach Abbruch", 80,
[]string{"M138", "M146"}, nil),
mkPM("HPb", "update_failure", "Steuerung, Antriebsregler", "Software-Update der Steuerung schlaegt fehl", 75,
[]string{"M138", "M146", "M141"}, nil),
mkPM("HPc", "mechanical_hazard", "Tuer", "Quetschen der Finger an der Tuer", 70,
[]string{"M003"}, nil),
}
got := FindDedupCandidates(fired, 0.4)
if len(got) != 1 {
t.Fatalf("want 1 candidate, got %d: %+v", len(got), got)
}
// Higher-priority pattern survives, lower one is the drop target.
if got[0].KeepPattern != "HPa" || got[0].DropPattern != "HPb" {
t.Errorf("want keep HPa / drop HPb, got keep %s / drop %s", got[0].KeepPattern, got[0].DropPattern)
}
if got[0].DropName != "Software-Update der Steuerung schlaegt fehl" {
t.Errorf("DropName must equal drop pattern ScenarioDE, got %q", got[0].DropName)
}
}
func TestFindDedupCandidates_LifecycleGuard(t *testing.T) {
// Same category, zone and measures — but normal-operation vs maintenance.
// These are legitimate variants (HP011 vs HP077) and must NOT be proposed.
fired := []PatternMatch{
mkPM("HP011", "electrical_hazard", "Schaltschrank, Klemmenkasten", "Person beruehrt spannungsfuehrende Teile", 95,
[]string{"M481", "M482"}, nil),
mkPM("HP077", "electrical_hazard", "Schaltschrank, Klemmenkasten", "Person beruehrt spannungsfuehrende Teile", 80,
[]string{"M481", "M482"}, []string{"maintenance"}),
}
if got := FindDedupCandidates(fired, 0.4); len(got) != 0 {
t.Fatalf("lifecycle guard failed: want 0 candidates, got %d: %+v", len(got), got)
}
}
func TestFindDedupCandidates_DifferentCategoryIgnored(t *testing.T) {
fired := []PatternMatch{
mkPM("HPa", "thermal_hazard", "Boiler", "Heisse Oberflaeche am Boiler", 80, []string{"M071"}, nil),
mkPM("HPb", "mechanical_hazard", "Boiler", "Heisse Oberflaeche am Boiler", 80, []string{"M071"}, nil),
}
if got := FindDedupCandidates(fired, 0.3); len(got) != 0 {
t.Fatalf("cross-category pair must not be proposed, got %d", len(got))
}
}
func TestFindDedupCandidates_BelowThresholdDropped(t *testing.T) {
fired := []PatternMatch{
mkPM("HPa", "mechanical_hazard", "Tuer", "Quetschen an der Tuer", 80, []string{"M003"}, nil),
mkPM("HPb", "mechanical_hazard", "Foerderband", "Einzug am Foerderband", 80, []string{"M540"}, nil),
}
if got := FindDedupCandidates(fired, 0.4); len(got) != 0 {
t.Fatalf("disjoint pair must be below threshold, got %d: %+v", len(got), got)
}
}
@@ -0,0 +1,154 @@
package iace
import (
"fmt"
"sort"
"strings"
)
// Foreign-framing proposer (P2 slice 4, type 2). DEV-TIME, propose-only.
//
// A pattern can fire for a machine yet describe its hazard with a zone text
// framed for a DIFFERENT machine (e.g. a dishwasher hazard whose zone names
// "Walzen, Transportbaender" or "Bearbeitungszone"). Such foreign framing leaks
// through terms that are NOT yet in domainGateTerms — once a term is a gate term,
// the ghost-pattern invariant already fences the pattern out. So we surface the
// candidates structurally: zone terms a fired pattern names that the machine's
// narrative never mentions (minus generic hazard-location vocabulary). A human
// (or the LLM) then decides: add a dom_* gate term, or re-frame the zone text.
//
// This OVER-surfaces by design — the human/LLM is the precision filter, not the
// detector (same contract as the dedup proposer).
// genericHazardStop are hazard-LOCATION words that legitimately appear in zones
// without being echoed in a narrative — they are not evidence of foreign framing.
var genericHazardStop = map[string]bool{
"quetschstelle": true, "einzugstelle": true, "einzugsstelle": true, "scherstelle": true,
"schneidstelle": true, "stossstelle": true, "fangstelle": true, "klemmstelle": true,
"gefahrbereich": true, "gefahrenbereich": true, "gefahrstelle": true, "gefahrenstelle": true,
"arbeitsbereich": true, "wirkbereich": true, "schutzbereich": true, "umgebung": true,
"bereich": true, "zugang": true, "oberflaeche": true, "oberflaechen": true,
"gehaeuse": true, "bauteil": true, "bauteile": true, "komponente": true, "maschine": true,
}
// FramingCandidate is a fired pattern whose zone text looks foreign for the machine.
type FramingCandidate struct {
Pattern string `json:"pattern"`
Name string `json:"name"`
Category string `json:"category"`
Zone string `json:"zone"`
OrphanTerms []string `json:"orphan_terms"`
OrphanFraction float64 `json:"orphan_fraction"`
Verdict string `json:"verdict"` // heuristic lean: foreign | plausible
Evidence string `json:"evidence"`
}
// FindFramingCandidates returns fired patterns whose zone is mostly not echoed in
// the narrative, sorted by orphan fraction descending (deterministic).
func FindFramingCandidates(fired []PatternMatch, narrative string, minFraction float64) []FramingCandidate {
nar := strings.ToLower(narrative)
var narStems []string
for _, w := range proposerWordSplit.Split(nar, -1) {
if len([]rune(w)) >= 5 {
narStems = append(narStems, w)
}
}
var out []FramingCandidate
for _, pm := range fired {
parts := zoneParts(pm.ZoneDE)
if len(parts) == 0 {
continue
}
var orphans []string
for _, p := range parts {
if !partEchoed(p, nar, narStems) {
orphans = append(orphans, p)
}
}
frac := float64(len(orphans)) / float64(len(parts))
if len(orphans) == 0 || frac < minFraction {
continue
}
out = append(out, FramingCandidate{
Pattern: pm.PatternID, Name: pm.PatternName, Category: primaryCat(pm),
Zone: pm.ZoneDE, OrphanTerms: orphans, OrphanFraction: round2(frac),
Verdict: framingHeuristicVerdict(frac),
Evidence: fmt.Sprintf("%d/%d zone terms have no narrative echo: %s", len(orphans), len(parts), strings.Join(orphans, ", ")),
})
}
sort.SliceStable(out, func(i, j int) bool {
if out[i].OrphanFraction != out[j].OrphanFraction {
return out[i].OrphanFraction > out[j].OrphanFraction
}
return out[i].Pattern < out[j].Pattern
})
return out
}
func framingHeuristicVerdict(frac float64) string {
if frac >= 0.99 {
return "foreign" // nothing in the zone is echoed by the narrative
}
return "plausible" // partial echo — likely generic vocabulary, human to confirm
}
// zoneParts splits a zone string into significant terms on commas, slashes,
// parentheses and semicolons, lowercased, length >= 4.
func zoneParts(zone string) []string {
fields := strings.FieldsFunc(strings.ToLower(zone), func(r rune) bool {
return r == ',' || r == '/' || r == ';' || r == '(' || r == ')'
})
var out []string
for _, f := range fields {
if t := strings.TrimSpace(f); len([]rune(t)) >= 4 {
out = append(out, t)
}
}
return out
}
// partEchoed reports whether a zone part is reflected in the narrative. Matching
// is bidirectional to survive German compounding: a zone word echoes if it is a
// generic hazard term, if it is a substring of the narrative, OR if any narrative
// stem (>= 5 chars) is a substring of the zone word (so narrative "Steuerung"
// echoes zone "Steuerungssystem").
func partEchoed(part, narrative string, narStems []string) bool {
for _, w := range strings.Fields(part) {
if genericHazardStop[w] {
return true
}
if len([]rune(w)) < 4 {
continue
}
if strings.Contains(narrative, w) {
return true
}
for _, ns := range narStems {
if strings.Contains(w, ns) {
return true
}
}
}
return false
}
// RenderFramingQueue renders foreign-framing candidates as a markdown review queue.
func RenderFramingQueue(machine string, candidates []FramingCandidate) string {
var b strings.Builder
fmt.Fprintf(&b, "# Foreign-framing review queue — %s\n\n", machine)
fmt.Fprintf(&b, "%d fired pattern(s) name zone terms the narrative never mentions. Propose-only — a human (or the LLM) decides: add a dom_* gate term, or re-frame the zone.\n\n", len(candidates))
for i, c := range candidates {
fmt.Fprintf(&b, "## %d. %s — %s [%s, orphan %.0f%%]\n", i+1, c.Pattern, c.Name, c.Verdict, c.OrphanFraction*100)
fmt.Fprintf(&b, "- category: %s\n- zone: %s\n", c.Category, c.Zone)
fmt.Fprintf(&b, "- orphan terms (no narrative echo): %s\n", strings.Join(c.OrphanTerms, ", "))
fmt.Fprintf(&b, "- suggested action: %s\n\n", framingAction(c.Verdict))
}
return b.String()
}
func framingAction(verdict string) string {
if verdict == "foreign" {
return "likely foreign-framed — propose a dom_* gate term for the orphan term(s), or re-frame the zone; human confirms + commits + pins a GT case"
}
return "partial echo — likely generic vocabulary; human to confirm whether any orphan term is a foreign-machine component"
}
@@ -0,0 +1,33 @@
package iace
import "testing"
func TestFindFramingCandidates_FlagsForeignZone(t *testing.T) {
narrative := "Gewerbliche Geschirrspuelmaschine mit Boiler und Tank. Die Tuer ist verriegelt."
fired := []PatternMatch{
mkPM("HPforeign", "mechanical_hazard", "Walzen, Transportbaender, Bearbeitungszone", "Einzug", 80, nil, nil),
mkPM("HPlocal", "thermal_hazard", "Boiler, Tank, Tuer", "Verbrennung", 80, nil, nil),
mkPM("HPgeneric", "mechanical_hazard", "Quetschstelle, Gefahrbereich", "Quetschen", 80, nil, nil),
}
got := FindFramingCandidates(fired, narrative, 0.6)
if len(got) != 1 || got[0].Pattern != "HPforeign" {
t.Fatalf("want only HPforeign flagged, got %+v", got)
}
if got[0].Verdict != "foreign" {
t.Errorf("fully-orphan zone should be 'foreign', got %s", got[0].Verdict)
}
}
func TestFindFramingCandidates_PartialEchoIsPlausible(t *testing.T) {
narrative := "Maschine mit Boiler und Tank."
fired := []PatternMatch{
mkPM("HPx", "thermal_hazard", "Boiler, Tank, Auspuffleitung", "x", 80, nil, nil),
}
got := FindFramingCandidates(fired, narrative, 0.3)
if len(got) != 1 {
t.Fatalf("want 1 candidate (1/3 orphan >= 0.3), got %d", len(got))
}
if got[0].Verdict != "plausible" || len(got[0].OrphanTerms) != 1 || got[0].OrphanTerms[0] != "auspuffleitung" {
t.Errorf("want plausible + orphan [auspuffleitung], got %s %v", got[0].Verdict, got[0].OrphanTerms)
}
}
@@ -0,0 +1,123 @@
package iace
import "github.com/google/uuid"
// Non-test plumbing for the offline proposer (P2 slice 3): run the engine for a
// narrative and produce the fired patterns + the engine-built hazards/mitigations
// the dedup proposer and GT screen consume. This is the same pipeline the GT
// benchmark tests use, lifted out of test scope so the dev-time CLI can call it.
// universalLifecyclePhases are appended so patterns gated to a specific lifecycle
// (maintenance/cleaning/setup/fault clearing) still fire — the proposer wants the
// full hazard picture, not only normal-operation hazards.
var universalLifecyclePhases = []string{"normal_operation", "maintenance", "cleaning", "setup", "fault_clearing"}
// BuildProposerInput parses a narrative, runs the pattern engine, keeps the
// narrative-relevant patterns, and returns the hazards, mitigations and fired
// patterns. NOTE: it does not apply the CE cyber-category skip, so the proposer
// view may include cyber/AI hazards that the CE log excludes — harmless for the
// GT recall screen (they match no CE ground-truth entry).
func BuildProposerInput(narrative, machineType string, extraMachineTypes []string) ([]Hazard, []Mitigation, []PatternMatch) {
res := ParseNarrative(narrative, machineType)
var compIDs, compNames, energyIDs []string
for _, c := range res.Components {
if c.Negated {
continue
}
compIDs = append(compIDs, c.LibraryID)
compNames = append(compNames, c.NameDE)
}
for _, e := range res.EnergySources {
energyIDs = append(energyIDs, e.SourceID)
}
machineTypes := append([]string{}, extraMachineTypes...)
if machineType != "" {
machineTypes = append(machineTypes, machineType)
}
lifecycles := append(append([]string{}, res.LifecyclePhases...), universalLifecyclePhases...)
out := NewPatternEngine().Match(MatchInput{
ComponentLibraryIDs: compIDs,
EnergySourceIDs: energyIDs,
LifecyclePhases: lifecycles,
CustomTags: res.CustomTags,
OperationalStates: res.OperationalStates,
StateTransitions: res.StateTransitions,
HumanRoles: res.Roles,
MachineTypes: machineTypes,
})
kept := make([]PatternMatch, 0, len(out.MatchedPatterns))
for _, pm := range out.MatchedPatterns {
if IsPatternRelevant(pm, narrative, compNames) {
kept = append(kept, pm)
}
}
filtered := *out
filtered.MatchedPatterns = kept
hazards, mits := patternsToHazardsAndMitigations(&filtered)
return hazards, mits, kept
}
// patternsToHazardsAndMitigations converts engine output into the hazard/mitigation
// entities the benchmark + proposer compare on. Simplified vs InitializeProject
// (no risk estimation, no norm refs) — it only needs category/zone/scenario/measures.
func patternsToHazardsAndMitigations(out *MatchOutput) ([]Hazard, []Mitigation) {
hazards := make([]Hazard, 0, len(out.MatchedPatterns))
patternToHazard := make(map[string]uuid.UUID, len(out.MatchedPatterns))
for _, pm := range out.MatchedPatterns {
cat := ""
if len(pm.HazardCats) > 0 {
cat = pm.HazardCats[0]
}
lifecycle := ""
if len(pm.ApplicableLifecycles) > 0 {
lifecycle = pm.ApplicableLifecycles[0]
}
h := Hazard{
ID: uuid.New(),
Name: pm.ScenarioDE,
Category: cat,
Description: pm.ScenarioDE,
Scenario: pm.ScenarioDE,
TriggerEvent: pm.TriggerDE,
PossibleHarm: pm.HarmDE,
AffectedPerson: pm.AffectedDE,
HazardousZone: pm.ZoneDE,
LifecyclePhase: lifecycle,
}
if h.Name == "" {
h.Name = pm.PatternName
}
hazards = append(hazards, h)
patternToHazard[pm.PatternID] = h.ID
}
measureNames := make(map[string]string)
for _, m := range GetProtectiveMeasureLibrary() {
measureNames[m.ID] = m.Name
}
var mitigations []Mitigation
for _, sm := range out.SuggestedMeasures {
name := measureNames[sm.MeasureID]
if name == "" {
name = sm.MeasureID
}
for _, srcPattern := range sm.SourcePatterns {
hid, ok := patternToHazard[srcPattern]
if !ok {
continue
}
mitigations = append(mitigations, Mitigation{
ID: uuid.New(),
HazardID: hid,
Name: name,
})
}
}
return hazards, mitigations
}
@@ -0,0 +1,25 @@
package iace
import "testing"
func TestBuildProposerInput_WarewashingFires(t *testing.T) {
hazards, _, fired := BuildProposerInput(
warewashingNarrative,
"Gewerbliche Untertisch-Geschirrspuelmaschine (vernetzt)",
[]string{"food_processing"},
)
if len(fired) == 0 || len(hazards) == 0 {
t.Fatalf("want fired patterns + hazards, got %d patterns / %d hazards", len(fired), len(hazards))
}
has := func(id string) bool {
for _, pm := range fired {
if pm.PatternID == id {
return true
}
}
return false
}
if !has("HP2201") {
t.Errorf("warewashing-specific HP2201 must fire via BuildProposerInput")
}
}
@@ -0,0 +1,174 @@
package iace
import (
"context"
"encoding/json"
"fmt"
"strings"
"github.com/breakpilot/ai-compliance-sdk/internal/llm"
)
// Semantic judgement over RECALL-SAFE dedup candidates (P2 slice 2). DEV-TIME,
// propose-only. The deterministic GT wall (proposer_screen.go) has already
// removed candidates that would drop recall or that credit different GT entries;
// the judge only adds an opinion on whether the survivors are truly the same
// hazard, plus a rationale, for the human review queue. It NEVER mutates anything.
//
// The judge is pluggable behind CandidateJudge so the runtime/tests stay
// deterministic (HeuristicJudge) while the dev-time CLI can plug in the
// non-deterministic LLM (LLMJudge over the shared llm.ProviderRegistry).
const (
VerdictDuplicate = "duplicate"
VerdictDistinct = "distinct"
VerdictUncertain = "uncertain"
)
// JudgedProposal is one candidate with its GT-wall result and the judge's opinion.
type JudgedProposal struct {
Candidate DedupCandidate `json:"candidate"`
Screen ScreenResult `json:"screen"`
Verdict string `json:"verdict"`
Confidence string `json:"confidence"`
Rationale string `json:"rationale"`
Judge string `json:"judge"`
}
// CandidateJudge decides whether two near-duplicate patterns are the same hazard.
type CandidateJudge interface {
Name() string
Judge(ctx context.Context, c DedupCandidate, a, b PatternMatch) (verdict, confidence, rationale string)
}
// HeuristicJudge is the deterministic default/fallback. It only ever returns "low"
// confidence — it is a placeholder for the LLM, and it deliberately punts to
// "uncertain" on the hard cases (low text overlap, shared measures) so the queue
// makes clear exactly where the LLM earns its keep.
type HeuristicJudge struct{}
func (HeuristicJudge) Name() string { return "heuristic" }
func (HeuristicJudge) Judge(_ context.Context, c DedupCandidate, _, _ PatternMatch) (string, string, string) {
switch {
case c.ScenarioJaccard >= 0.5 || (c.ZoneJaccard >= 0.5 && c.MeasureJaccard >= 0.5):
return VerdictDuplicate, "low", "structural: high scenario, or combined zone+measure, overlap"
case c.MeasureJaccard >= 0.99 && c.ZoneJaccard == 0 && c.ScenarioJaccard < 0.3:
return VerdictDistinct, "low", "structural: identical measures but no zone/scenario overlap — likely distinct hazards sharing generic measures"
default:
return VerdictUncertain, "low", "structural signal inconclusive — needs the LLM judge"
}
}
// LLMJudge asks an offline model to make the semantic call. Non-deterministic, so
// it lives only in the dev-time tool, never in tests or the runtime. It degrades
// to "uncertain" on any transport or parse error — it must never break the run.
type LLMJudge struct {
Completer LLMCompleter
MachineClass string
}
func (LLMJudge) Name() string { return "llm" }
func (j LLMJudge) Judge(ctx context.Context, c DedupCandidate, a, b PatternMatch) (string, string, string) {
system, user := BuildJudgePrompt(j.MachineClass, a, b)
raw, err := j.Completer.Complete(ctx, system, user)
if err != nil {
return VerdictUncertain, "low", "LLM error: " + err.Error()
}
return parseJudgeJSON(raw)
}
// BuildJudgePrompt is the real LLM artifact — built and unit-tested deterministically
// even though the call itself is not. It frames the ISO 12100 same-vs-distinct
// question and forces a JSON answer.
func BuildJudgePrompt(machineClass string, a, b PatternMatch) (system, user string) {
system = "Du bist Sachverstaendiger fuer Maschinensicherheit nach EN ISO 12100. " +
"Entscheide, ob zwei generierte Gefaehrdungen fuer DIESE Maschine DIESELBE Gefaehrdung " +
"beschreiben (Dublette) oder fachlich VERSCHIEDENE Gefaehrdungen sind, die nur zufaellig " +
"dieselben Schutzmassnahmen teilen. Verschieden, wenn Wirkort, Ausloeser oder " +
"Schadensmechanismus abweichen — auch bei gleicher Kategorie und gleichen Massnahmen. " +
"Antworte AUSSCHLIESSLICH als JSON: " +
`{"verdict":"duplicate|distinct|uncertain","confidence":"high|medium|low","rationale":"..."}.`
user = fmt.Sprintf(`Maschinenklasse: %s
Gefaehrdung A (%s):
Name: %s
Kategorie: %s
Zone: %s
Szenario: %s
Ausloeser: %s
Schaden: %s
Massnahmen: %s
Gefaehrdung B (%s):
Name: %s
Kategorie: %s
Zone: %s
Szenario: %s
Ausloeser: %s
Schaden: %s
Massnahmen: %s
Sind A und B dieselbe Gefaehrdung fuer diese Maschine?`,
machineClass,
a.PatternID, a.PatternName, primaryCat(a), a.ZoneDE, a.ScenarioDE, a.TriggerDE, a.HarmDE, strings.Join(a.SuggestedMeasureIDs, ", "),
b.PatternID, b.PatternName, primaryCat(b), b.ZoneDE, b.ScenarioDE, b.TriggerDE, b.HarmDE, strings.Join(b.SuggestedMeasureIDs, ", "))
return system, user
}
func parseJudgeJSON(raw string) (verdict, confidence, rationale string) {
start, end := strings.Index(raw, "{"), strings.LastIndex(raw, "}")
if start < 0 || end <= start {
return VerdictUncertain, "low", "unparseable LLM output"
}
var v struct {
Verdict string `json:"verdict"`
Confidence string `json:"confidence"`
Rationale string `json:"rationale"`
}
if err := json.Unmarshal([]byte(raw[start:end+1]), &v); err != nil {
return VerdictUncertain, "low", "unparseable LLM JSON: " + err.Error()
}
switch v.Verdict {
case VerdictDuplicate, VerdictDistinct, VerdictUncertain:
default:
v.Verdict = VerdictUncertain
}
if v.Confidence == "" {
v.Confidence = "low"
}
return v.Verdict, v.Confidence, v.Rationale
}
// LLMCompleter is the minimal text-in/text-out the LLM judge needs. Tests pass a
// stub; the dev-time tool passes a registry-backed adapter (NewRegistryCompleter).
type LLMCompleter interface {
Complete(ctx context.Context, system, user string) (string, error)
}
type registryCompleter struct {
reg *llm.ProviderRegistry
model string
}
// NewRegistryCompleter adapts the shared llm.ProviderRegistry to LLMCompleter so
// the proposer can reuse the platform's offline model wiring (e.g. self-hosted qwen).
func NewRegistryCompleter(reg *llm.ProviderRegistry, model string) LLMCompleter {
return &registryCompleter{reg: reg, model: model}
}
func (rc *registryCompleter) Complete(ctx context.Context, system, user string) (string, error) {
resp, err := rc.reg.Chat(ctx, &llm.ChatRequest{
Model: rc.model,
Messages: []llm.Message{
{Role: "system", Content: system},
{Role: "user", Content: user},
},
Temperature: 0,
})
if err != nil {
return "", err
}
return resp.Message.Content, nil
}
@@ -0,0 +1,104 @@
package iace
import (
"context"
"errors"
"strings"
"testing"
)
func TestHeuristicJudge_Verdicts(t *testing.T) {
tests := []struct {
name string
zone, meas float64
scenario float64
wantVerdict string
}{
{"high scenario overlap -> duplicate", 0, 0.3, 0.6, VerdictDuplicate},
{"high zone+measure -> duplicate", 0.6, 0.6, 0.1, VerdictDuplicate},
{"identical measures, no text -> distinct", 0, 1.0, 0.0, VerdictDistinct},
{"shared measures, low text -> uncertain", 0, 0.67, 0.19, VerdictUncertain},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
c := DedupCandidate{ZoneJaccard: tt.zone, MeasureJaccard: tt.meas, ScenarioJaccard: tt.scenario}
v, conf, _ := HeuristicJudge{}.Judge(context.Background(), c, PatternMatch{}, PatternMatch{})
if v != tt.wantVerdict {
t.Errorf("verdict: want %s, got %s", tt.wantVerdict, v)
}
if conf != "low" {
t.Errorf("heuristic confidence must be low, got %s", conf)
}
})
}
}
func TestBuildJudgePrompt_ContainsKeyFacts(t *testing.T) {
a := PatternMatch{PatternID: "HPa", PatternName: "Heisse Flaeche", HazardCats: []string{"thermal_hazard"},
ZoneDE: "Boiler", ScenarioDE: "Beruehrung heisser Boiler", SuggestedMeasureIDs: []string{"M071"}}
b := PatternMatch{PatternID: "HPb", PatternName: "Heisses Spuelgut", HazardCats: []string{"thermal_hazard"},
ZoneDE: "Spuelgut", ScenarioDE: "Beruehrung heisses Geschirr", SuggestedMeasureIDs: []string{"M071"}}
system, user := BuildJudgePrompt("Geschirrspuelmaschine", a, b)
for _, want := range []string{"EN ISO 12100", "JSON", "verdict"} {
if !strings.Contains(system, want) {
t.Errorf("system prompt missing %q", want)
}
}
for _, want := range []string{"Geschirrspuelmaschine", "HPa", "HPb", "Boiler", "Spuelgut", "thermal_hazard"} {
if !strings.Contains(user, want) {
t.Errorf("user prompt missing %q", want)
}
}
}
type fakeCompleter struct {
out string
err error
}
func (f fakeCompleter) Complete(_ context.Context, _, _ string) (string, error) { return f.out, f.err }
func TestLLMJudge_ParsesAndDegrades(t *testing.T) {
cand := DedupCandidate{KeepPattern: "HPa", DropPattern: "HPb"}
// Well-formed JSON, even wrapped in chatter, parses.
j := LLMJudge{Completer: fakeCompleter{out: "Sicher. {\"verdict\":\"distinct\",\"confidence\":\"high\",\"rationale\":\"andere Wirkorte\"}"}, MachineClass: "x"}
if v, conf, r := j.Judge(context.Background(), cand, PatternMatch{}, PatternMatch{}); v != VerdictDistinct || conf != "high" || r != "andere Wirkorte" {
t.Errorf("parse: got %s/%s/%q", v, conf, r)
}
// Unknown verdict value normalises to uncertain.
j2 := LLMJudge{Completer: fakeCompleter{out: `{"verdict":"maybe","confidence":"medium","rationale":"x"}`}}
if v, _, _ := j2.Judge(context.Background(), cand, PatternMatch{}, PatternMatch{}); v != VerdictUncertain {
t.Errorf("unknown verdict must normalise to uncertain, got %s", v)
}
// Transport error degrades gracefully, never panics.
j3 := LLMJudge{Completer: fakeCompleter{err: errors.New("connection refused")}}
if v, _, r := j3.Judge(context.Background(), cand, PatternMatch{}, PatternMatch{}); v != VerdictUncertain || !strings.Contains(r, "LLM error") {
t.Errorf("error path: got %s / %q", v, r)
}
// Garbage (no JSON) degrades to uncertain.
j4 := LLMJudge{Completer: fakeCompleter{out: "no json here"}}
if v, _, _ := j4.Judge(context.Background(), cand, PatternMatch{}, PatternMatch{}); v != VerdictUncertain {
t.Errorf("garbage must degrade to uncertain, got %s", v)
}
}
func TestRenderProposalQueue_ShowsActions(t *testing.T) {
proposals := []JudgedProposal{
{
Candidate: DedupCandidate{KeepPattern: "HP807", DropPattern: "HP033", Category: "update_failure", Score: 0.32},
Screen: ScreenResult{RecallBefore: 1, RecallAfter: 1},
Verdict: VerdictDuplicate, Confidence: "medium", Rationale: "same update failure", Judge: "llm",
},
}
out := RenderProposalQueue("Geschirrspuelmaschine", proposals)
for _, want := range []string{"HP807", "HP033", "update_failure", "supersession", "Propose-only"} {
if !strings.Contains(out, want) {
t.Errorf("queue missing %q\n%s", want, out)
}
}
}
@@ -0,0 +1,47 @@
package iace
import (
"fmt"
"strings"
)
// RenderProposalQueue turns judged dedup proposals into the human-review queue
// (markdown). Deterministic. Nothing here applies a change — every entry is a
// suggestion for a human to confirm, edit, commit, and pin with a GT case.
func RenderProposalQueue(machine string, proposals []JudgedProposal) string {
var b strings.Builder
fmt.Fprintf(&b, "# Dedup proposal queue — %s\n\n", machine)
fmt.Fprintf(&b, "%d candidate(s) survived the deterministic GT wall. Propose-only — nothing is applied automatically.\n\n", len(proposals))
for i, p := range proposals {
c := p.Candidate
fmt.Fprintf(&b, "## %d. keep %s ⊃ drop %s [%s → %s (%s)]\n",
i+1, c.KeepPattern, c.DropPattern, p.Judge, p.Verdict, p.Confidence)
fmt.Fprintf(&b, "- category %s · score %.2f (measures %.0f%%, zone %.0f%%, scenario %.0f%%)\n",
c.Category, c.Score, c.MeasureJaccard*100, c.ZoneJaccard*100, c.ScenarioJaccard*100)
fmt.Fprintf(&b, "- GT recall %.1f%% → %.1f%% when %s is dropped (wall: %s)\n",
p.Screen.RecallBefore*100, p.Screen.RecallAfter*100, c.DropPattern, wallNote(p.Screen))
fmt.Fprintf(&b, "- keep: %s\n- drop: %s\n", c.KeepHazardName, c.DropName)
fmt.Fprintf(&b, "- judge rationale: %s\n", p.Rationale)
fmt.Fprintf(&b, "- suggested action: %s\n\n", suggestedAction(p))
}
return b.String()
}
func wallNote(s ScreenResult) string {
if s.DistinctGT {
return fmt.Sprintf("distinct GT %s vs %s", s.KeepGT, s.DropGT)
}
return "recall-safe"
}
func suggestedAction(p JudgedProposal) string {
switch p.Verdict {
case VerdictDuplicate:
return fmt.Sprintf("add %s to a supersession set, then a human confirms + commits + pins a GT case", p.Candidate.DropPattern)
case VerdictDistinct:
return "keep both — judge considers them distinct hazards"
default:
return "needs human (or higher-confidence LLM) review — no automatic action"
}
}
@@ -0,0 +1,61 @@
package iace
import "github.com/google/uuid"
// ScreenResult is the deterministic GT verdict for one proposed supersession.
type ScreenResult struct {
RecallBefore float64 `json:"recall_before"`
RecallAfter float64 `json:"recall_after"`
KeepGT string `json:"keep_gt,omitempty"` // GT entry the keeper credits (if any)
DropGT string `json:"drop_gt,omitempty"` // GT entry the drop credits (if any)
DistinctGT bool `json:"distinct_gt"` // keep & drop credit DIFFERENT GT entries -> distinct hazards
Safe bool `json:"safe"` // recall preserved AND not distinct
}
// ScreenSupersession is the WALL between "propose" and "decide". A proposal is
// safe only if BOTH deterministic checks pass:
//
// 1. RECALL is not reduced when the drop-hazard (and its mitigations) is removed
// — otherwise the drop is load-bearing for GT coverage.
// 2. The two hazards do NOT credit DIFFERENT ground-truth entries. Recall alone
// is necessary but not sufficient: two genuinely distinct hazards that share
// the same measures (e.g. hot boiler surface vs hot ware on unloading) keep
// recall at 100% when one is dropped, yet must NOT be merged. If keep and
// drop each match a different GT entry, they are distinct.
//
// Whatever survives both is still only RECALL-SAFE — a candidate for a human (and
// in slice 2, an LLM) to confirm semantically. Deterministic; reuses
// CompareBenchmark; touches neither the library nor the runtime.
func ScreenSupersession(gt *GroundTruth, hazards []Hazard, mits []Mitigation, keepHazardName, dropHazardName string) ScreenResult {
before := CompareBenchmark(gt, hazards, mits)
gtOf := map[string]string{}
for _, p := range before.MatchedPairs {
gtOf[p.EngineHazard.Name] = p.GTEntry.Nr
}
keepGT, dropGT := gtOf[keepHazardName], gtOf[dropHazardName]
distinct := keepGT != "" && dropGT != "" && keepGT != dropGT
kept := make([]Hazard, 0, len(hazards))
dropped := map[uuid.UUID]bool{}
for _, h := range hazards {
if h.Name == dropHazardName {
dropped[h.ID] = true
continue
}
kept = append(kept, h)
}
keptMits := make([]Mitigation, 0, len(mits))
for _, m := range mits {
if !dropped[m.HazardID] {
keptMits = append(keptMits, m)
}
}
after := CompareBenchmark(gt, kept, keptMits)
return ScreenResult{
RecallBefore: before.CoverageScore, RecallAfter: after.CoverageScore,
KeepGT: keepGT, DropGT: dropGT, DistinctGT: distinct,
Safe: after.CoverageScore >= before.CoverageScore && !distinct,
}
}
@@ -160,6 +160,7 @@ func (s *Store) ListHazards(ctx context.Context, projectID uuid.UUID) ([]Hazard,
hazards = append(hazards, h)
}
SortHazardsByISO12100(hazards)
return hazards, nil
}