Merge pull request 'feat(ai-sdk): IACE hazard-engine quality + offline proposer (Session 4)' (#2) from feat/iace-gt-warewashing into main

2026-06-26 11:48:09 +02:00
parent eca8ec43c5 4d225f73a8
commit da466b3821
26 changed files with 1591 additions and 78 deletions
@@ -34,6 +34,8 @@ func main() {
 		cmdEcho(os.Args[2:])
 	case "hierarchy":
 		cmdHierarchy(os.Args[2:])
+	case "propose":
+		cmdPropose(os.Args[2:])
 	default:
 		usage()
 		os.Exit(2)
@@ -41,7 +43,7 @@ func main() {
 }

 func usage() {
-	fmt.Fprintln(os.Stderr, "Usage: iace-audit <reachability|consistency|vocabulary|echo|hierarchy> [args]")
+	fmt.Fprintln(os.Stderr, "Usage: iace-audit <reachability|consistency|vocabulary|echo|hierarchy|propose> [args]")
 }

 func cmdReachability(_ []string) {
@@ -0,0 +1,188 @@
+package main
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"os"
+	"strconv"
+	"strings"
+
+	"github.com/breakpilot/ai-compliance-sdk/internal/iace"
+	"github.com/breakpilot/ai-compliance-sdk/internal/iace/audit"
+	"github.com/breakpilot/ai-compliance-sdk/internal/llm"
+)
+
+type narrativeInput struct {
+	MachineType  string   `json:"machine_type"`
+	Narrative    string   `json:"narrative"`
+	MachineTypes []string `json:"machine_types,omitempty"`
+}
+
+// cmdPropose — Method P: offline dedup-candidate proposer.
+//
+//	iace-audit propose <narrative.json> [<ground-truth.json>]
+//
+// Detect near-duplicate patterns, screen survivors against a ground truth (if
+// given), judge them (heuristic by default, LLM when enabled), and write the
+// human-review queue to audit-reports/proposals.{md,json}. Propose-only — it
+// writes a report and never mutates the pattern library.
+//
+// Env:
+//
+//	IACE_PROPOSE_THRESHOLD  candidate score threshold (default 0.30)
+//	IACE_PROPOSE_LLM=1      use the offline LLM judge instead of the heuristic
+//	OLLAMA_URL              ollama base URL (default http://localhost:11434)
+//	SELF_HOSTED_LLM_MODEL   model name (default qwen2.5:32b-instruct)
+func cmdPropose(args []string) {
+	if len(args) < 1 {
+		fmt.Fprintln(os.Stderr, "propose: usage: iace-audit propose <narrative.json> [<ground-truth.json>]")
+		os.Exit(2)
+	}
+
+	var in narrativeInput
+	must(readJSONFile(args[0], &in))
+	if in.Narrative == "" {
+		fmt.Fprintln(os.Stderr, "propose: narrative is empty")
+		os.Exit(2)
+	}
+
+	var gt *iace.GroundTruth
+	if len(args) >= 2 {
+		var g iace.GroundTruth
+		must(readJSONFile(args[1], &g))
+		gt = &g
+	}
+
+	threshold := envFloat("IACE_PROPOSE_THRESHOLD", 0.30)
+	hazards, mits, fired := iace.BuildProposerInput(in.Narrative, in.MachineType, in.MachineTypes)
+	candidates := iace.FindDedupCandidates(fired, threshold)
+
+	byID := make(map[string]iace.PatternMatch, len(fired))
+	for _, pm := range fired {
+		byID[pm.PatternID] = pm
+	}
+
+	judge := selectJudge(in.MachineType)
+	ctx := context.Background()
+
+	var proposals []iace.JudgedProposal
+	blocked := 0
+	for _, c := range candidates {
+		var sr iace.ScreenResult
+		if gt != nil {
+			sr = iace.ScreenSupersession(gt, hazards, mits, c.KeepHazardName, c.DropName)
+			if sr.RecallAfter < sr.RecallBefore || sr.DistinctGT {
+				blocked++
+				continue
+			}
+		}
+		v, conf, rat := judge.Judge(ctx, c, byID[c.KeepPattern], byID[c.DropPattern])
+		proposals = append(proposals, iace.JudgedProposal{
+			Candidate: c, Screen: sr, Verdict: v, Confidence: conf, Rationale: rat, Judge: judge.Name(),
+		})
+	}
+
+	writeText("audit-reports/proposals.md", iace.RenderProposalQueue(in.MachineType, proposals))
+	writeJSON("audit-reports/proposals.json", proposals)
+
+	// Type 2: foreign-framing candidates (zone terms with no narrative echo).
+	framing := iace.FindFramingCandidates(fired, in.Narrative, envFloat("IACE_FRAMING_MIN_ORPHAN", 0.6))
+	writeText("audit-reports/framing.md", iace.RenderFramingQueue(in.MachineType, framing))
+	writeJSON("audit-reports/framing.json", framing)
+
+	// Type 3: vocab->tag proposals (unknown narrative tokens that pattern text
+	// names as a whole word, with a dominant shared required tag).
+	vocab := audit.RunVocabulary(map[string]any{"narrative": in.Narrative})
+	var vgaps []audit.DictionarySuggestion
+	for _, s := range vocab.SuggestedDictionaryEntries {
+		if len(s.SuggestedTags) > 0 {
+			vgaps = append(vgaps, s)
+		}
+	}
+	writeText("audit-reports/vocab.md", renderVocabQueue(in.MachineType, vgaps))
+	writeJSON("audit-reports/vocab.json", vgaps)
+
+	// Type 4: coverage blind-spots (empty ISO 12100 groups A-G) + LLM expansion.
+	gaps := iace.FindCoverageGaps(hazards)
+	var missing []iace.MissingHazard
+	if lj, ok := judge.(iace.LLMJudge); ok {
+		missing = iace.ProposeMissingHazards(ctx, lj.Completer, in.MachineType, in.Narrative, hazards, gaps)
+	}
+	writeText("audit-reports/coverage.md", iace.RenderCoverageQueue(in.MachineType, gaps, missing))
+	writeJSON("audit-reports/coverage.json", gaps)
+
+	printSummary("Method P — Dedup Proposer ("+judge.Name()+")", map[string]int{
+		"fired_patterns": len(fired),
+		"candidates":     len(candidates),
+		"in_queue":       len(proposals),
+		"gt_blocked":     blocked,
+		"framing_flags":  len(framing),
+		"vocab_gaps":     len(vgaps),
+		"coverage_gaps":  len(gaps),
+	})
+	if gt == nil {
+		fmt.Fprintln(os.Stderr, "note: no ground truth provided — GT wall NOT applied (candidates not recall-screened)")
+	}
+}
+
+func selectJudge(machineClass string) iace.CandidateJudge {
+	if os.Getenv("IACE_PROPOSE_LLM") != "1" {
+		return iace.HeuristicJudge{}
+	}
+	base := envStr("OLLAMA_URL", "http://localhost:11434")
+	model := envStr("SELF_HOSTED_LLM_MODEL", "qwen2.5:32b-instruct")
+	reg := llm.NewProviderRegistry("ollama", "")
+	reg.Register(llm.NewOllamaAdapter(base, model))
+	fmt.Printf("using LLM judge (ollama %s, model %s)\n", base, model)
+	return iace.LLMJudge{Completer: iace.NewRegistryCompleter(reg, model), MachineClass: machineClass}
+}
+
+func readJSONFile(path string, v any) error {
+	raw, err := os.ReadFile(path)
+	if err != nil {
+		return err
+	}
+	return json.Unmarshal(raw, v)
+}
+
+func writeText(path, content string) {
+	_ = os.MkdirAll("audit-reports", 0o755)
+	if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
+		fmt.Fprintln(os.Stderr, "warn: could not write", path, err)
+		return
+	}
+	fmt.Println("→ wrote", path)
+}
+
+func envStr(key, def string) string {
+	if v := os.Getenv(key); v != "" {
+		return v
+	}
+	return def
+}
+
+func envFloat(key string, def float64) float64 {
+	if v := os.Getenv(key); v != "" {
+		if f, err := strconv.ParseFloat(v, 64); err == nil {
+			return f
+		}
+	}
+	return def
+}
+
+func renderVocabQueue(machine string, entries []audit.DictionarySuggestion) string {
+	var b strings.Builder
+	fmt.Fprintf(&b, "# Vocab→tag review queue — %s\n\n", machine)
+	fmt.Fprintf(&b, "%d unknown token(s) appear in pattern text but map to no dictionary tag. Propose-only — a human (or the LLM) confirms the tag, then adds a keyword_dictionary entry and pins a GT case.\n\n", len(entries))
+	for i, s := range entries {
+		tag := "<tag>"
+		if len(s.SuggestedTags) > 0 {
+			tag = s.SuggestedTags[0]
+		}
+		fmt.Fprintf(&b, "## %d. \"%s\"  → suggested tag(s): %s\n", i+1, s.Token, strings.Join(s.SuggestedTags, ", "))
+		fmt.Fprintf(&b, "- named by %d pattern(s): %s\n", len(s.PatternIDs), strings.Join(s.PatternIDs, ", "))
+		fmt.Fprintf(&b, "- suggested action: add keyword_dictionary entry {%q → %s} so narratives mentioning it trigger those patterns; human confirms\n\n", s.Token, tag)
+	}
+	return b.String()
+}
@@ -36,6 +36,10 @@ type DictionarySuggestion struct {
 	Token      string   `json:"token"`
 	Field      string   `json:"field"`
 	PatternIDs []string `json:"pattern_ids"`
+	// SuggestedTags are the RequiredComponentTags shared by the naming patterns,
+	// ranked by frequency — the candidate tags a keyword_dictionary entry for this
+	// token would emit so narratives mentioning it can trigger those patterns.
+	SuggestedTags []string `json:"suggested_tags,omitempty"`
 }

 type VocabularyReport struct {
@@ -66,14 +66,19 @@ func runVocabulary(form map[string]any) VocabularyReport {

 	// For each unknown token check if any pattern names it
 	patterns := iace.AllPatterns()
+	byID := make(map[string]iace.HazardPattern, len(patterns))
+	for _, p := range patterns {
+		byID[p.ID] = p
+	}
 	for _, tok := range report.UnknownTokens {
 		hits := patternsMentioning(tok, patterns)
 		if len(hits) == 0 {
 			continue
 		}
 		report.SuggestedDictionaryEntries = append(report.SuggestedDictionaryEntries, DictionarySuggestion{
-			Token:      tok,
-			PatternIDs: hits,
+			Token:         tok,
+			PatternIDs:    hits,
+			SuggestedTags: suggestTagsFor(hits, byID),
 		})
 	}
 	sort.Slice(report.SuggestedDictionaryEntries, func(i, j int) bool {
@@ -129,18 +134,24 @@ func dictTokenHit(tok string, dict map[string]bool) bool {
 	return false
 }

-// patternsMentioning returns up to 8 pattern IDs whose scenario/trigger/
-// harm/zone text contains the token (case-insensitive substring).
+// patternsMentioning returns up to 8 pattern IDs whose scenario/trigger/harm/
+// zone text names the token as a WHOLE WORD. Whole-word (not substring) matching
+// is essential: a substring match flags common fragments like "stehen" inside
+// "entstehen", producing spurious hits and nonsensical tag suggestions.
 func patternsMentioning(tok string, patterns []iace.HazardPattern) []string {
 	tokLower := strings.ToLower(tok)
 	seen := map[string]bool{}
 	var out []string
 	for _, p := range patterns {
 		hay := strings.ToLower(p.ScenarioDE + " " + p.TriggerDE + " " + p.HarmDE + " " + p.ZoneDE + " " + p.NameDE)
-		if !strings.Contains(hay, tokLower) {
-			continue
+		matched := false
+		for _, w := range tokenRE.FindAllString(hay, -1) {
+			if w == tokLower {
+				matched = true
+				break
+			}
 		}
-		if seen[p.ID] {
+		if !matched || seen[p.ID] {
 			continue
 		}
 		seen[p.ID] = true
@@ -151,3 +162,57 @@ func patternsMentioning(tok string, patterns []iace.HazardPattern) []string {
 	}
 	return out
 }
+
+// suggestTagsFor returns the RequiredComponentTags shared across the naming
+// patterns, ranked by how many of them require each tag (ties broken by name),
+// top 3. These are the candidate tags a dictionary entry for the token should
+// emit so a narrative mentioning the token can trigger those patterns.
+func suggestTagsFor(ids []string, byID map[string]iace.HazardPattern) []string {
+	freq := map[string]int{}
+	total := 0
+	for _, id := range ids {
+		p, ok := byID[id]
+		if !ok {
+			continue
+		}
+		total++
+		seen := map[string]bool{}
+		for _, tag := range p.RequiredComponentTags {
+			if seen[tag] {
+				continue
+			}
+			seen[tag] = true
+			freq[tag]++
+		}
+	}
+	if total == 0 {
+		return nil
+	}
+	type tf struct {
+		tag string
+		n   int
+	}
+	ranked := make([]tf, 0, len(freq))
+	for t, n := range freq {
+		ranked = append(ranked, tf{t, n})
+	}
+	sort.Slice(ranked, func(i, j int) bool {
+		if ranked[i].n != ranked[j].n {
+			return ranked[i].n > ranked[j].n
+		}
+		return ranked[i].tag < ranked[j].tag
+	})
+	// Only suggest a tag shared by >= 40% of the naming patterns. Diffuse tokens
+	// (common verbs spread across categories) get no dominant tag and are dropped.
+	var out []string
+	for _, x := range ranked {
+		if float64(x.n)/float64(total) < 0.4 {
+			break
+		}
+		out = append(out, x.tag)
+		if len(out) >= 3 {
+			break
+		}
+	}
+	return out
+}
@@ -0,0 +1,36 @@
+package audit
+
+import (
+	"testing"
+
+	"github.com/breakpilot/ai-compliance-sdk/internal/iace"
+)
+
+func TestSuggestTagsFor_RanksSharedRequiredTags(t *testing.T) {
+	byID := map[string]iace.HazardPattern{
+		"P1": {ID: "P1", RequiredComponentTags: []string{"backflow_risk", "dom_warewashing"}},
+		"P2": {ID: "P2", RequiredComponentTags: []string{"backflow_risk"}},
+		"P3": {ID: "P3", RequiredComponentTags: []string{"sharp_edge"}},
+	}
+	got := suggestTagsFor([]string{"P1", "P2", "P3"}, byID)
+	if len(got) == 0 || got[0] != "backflow_risk" {
+		t.Fatalf("want backflow_risk ranked first (2 patterns), got %v", got)
+	}
+}
+
+func TestSuggestTagsFor_TopThreeStableAlpha(t *testing.T) {
+	byID := map[string]iace.HazardPattern{
+		"P1": {ID: "P1", RequiredComponentTags: []string{"d", "b", "a", "c"}},
+	}
+	got := suggestTagsFor([]string{"P1"}, byID)
+	if len(got) != 3 || got[0] != "a" || got[1] != "b" || got[2] != "c" {
+		t.Fatalf("want stable alpha top-3 [a b c], got %v", got)
+	}
+}
+
+func TestSuggestTagsFor_UnknownPatternIgnored(t *testing.T) {
+	byID := map[string]iace.HazardPattern{}
+	if got := suggestTagsFor([]string{"missing"}, byID); len(got) != 0 {
+		t.Fatalf("want empty for unknown patterns, got %v", got)
+	}
+}
@@ -7,8 +7,6 @@ import (
 	"path/filepath"
 	"sort"
 	"testing"
-
-	"github.com/google/uuid"
 )

 // TestKistenhub_GTCoverage runs the Kistenhubgeraet ground truth (37 entries)
@@ -110,65 +108,6 @@ func TestKistenhub_GTCoverage(t *testing.T) {
 // patternsToHazardsAndMitigations converts a pattern match output into the
 // Hazard/Mitigation shapes that CompareBenchmark expects. Mirrors what
 // iace_handler_init.go does in production but without DB writes.
-func patternsToHazardsAndMitigations(out *MatchOutput) ([]Hazard, []Mitigation) {
-	hazards := make([]Hazard, 0, len(out.MatchedPatterns))
-	patternToHazard := make(map[string]uuid.UUID, len(out.MatchedPatterns))
-
-	for _, pm := range out.MatchedPatterns {
-		cat := ""
-		if len(pm.HazardCats) > 0 {
-			cat = pm.HazardCats[0]
-		}
-		zone := pm.ZoneDE
-		lifecycle := ""
-		if len(pm.ApplicableLifecycles) > 0 {
-			lifecycle = pm.ApplicableLifecycles[0]
-		}
-		h := Hazard{
-			ID:             uuid.New(),
-			Name:           pm.ScenarioDE,
-			Category:       cat,
-			Description:    pm.ScenarioDE,
-			Scenario:       pm.ScenarioDE,
-			TriggerEvent:   pm.TriggerDE,
-			PossibleHarm:   pm.HarmDE,
-			AffectedPerson: pm.AffectedDE,
-			HazardousZone:  zone,
-			LifecyclePhase: lifecycle,
-		}
-		if h.Name == "" {
-			h.Name = pm.PatternName
-		}
-		hazards = append(hazards, h)
-		patternToHazard[pm.PatternID] = h.ID
-	}
-
-	measureNames := make(map[string]string)
-	for _, m := range GetProtectiveMeasureLibrary() {
-		measureNames[m.ID] = m.Name
-	}
-
-	var mitigations []Mitigation
-	for _, sm := range out.SuggestedMeasures {
-		name := measureNames[sm.MeasureID]
-		if name == "" {
-			name = sm.MeasureID
-		}
-		for _, srcPattern := range sm.SourcePatterns {
-			hid, ok := patternToHazard[srcPattern]
-			if !ok {
-				continue
-			}
-			mitigations = append(mitigations, Mitigation{
-				ID:       uuid.New(),
-				HazardID: hid,
-				Name:     name,
-			})
-		}
-	}
-	return hazards, mitigations
-}
-
 func abbrev(s string, max int) string {
 	if len(s) <= max {
 		return s
@@ -1,6 +1,7 @@
 package iace

 import (
+	"context"
 	"encoding/json"
 	"os"
 	"path/filepath"
@@ -45,7 +46,7 @@ var warewashingCyberCategories = map[string]bool{

 // warewashingEngineOutput runs the production chain and returns the filtered
 // hazards/mitigations the user would see for the UC-M.
-func warewashingEngineOutput() ([]Hazard, []Mitigation, int) {
+func warewashingEngineOutput() ([]Hazard, []Mitigation, []PatternMatch) {
 	res := ParseNarrative(warewashingNarrative, "Gewerbliche Untertisch-Geschirrspuelmaschine (vernetzt)")

 	var compIDs, compNames []string
@@ -94,7 +95,7 @@ func warewashingEngineOutput() ([]Hazard, []Mitigation, int) {
 	filtered := *out
 	filtered.MatchedPatterns = kept
 	hazards, mitigations := patternsToHazardsAndMitigations(&filtered)
-	return hazards, mitigations, len(kept)
+	return hazards, mitigations, kept
 }

 func TestWarewashing_GTCoverage(t *testing.T) {
@@ -119,8 +120,8 @@ func TestWarewashing_GTCoverage(t *testing.T) {
 		t.Logf("Parsed components: %v", cn)
 	}

-	hazards, mitigations, nPatterns := warewashingEngineOutput()
-	t.Logf("Engine: %d patterns kept (relevance+cyber filter) -> %d hazards", nPatterns, len(hazards))
+	hazards, mitigations, keptPatterns := warewashingEngineOutput()
+	t.Logf("Engine: %d patterns kept (relevance+cyber filter) -> %d hazards", len(keptPatterns), len(hazards))

 	result := CompareBenchmark(&gt, hazards, mitigations)
 	precision := 0.0
@@ -180,3 +181,57 @@ func TestWarewashing_GTCoverage(t *testing.T) {
 		t.Errorf("warewashing recall below 40%% floor: %.1f%%", result.CoverageScore*100)
 	}
 }
+
+// TestWarewashing_DedupProposer exercises the offline dedup-candidate proposer
+// end-to-end on the real warewashing engine output: detect candidates, screen
+// each against the GT, and log the human-review queue. It asserts the WALL is
+// self-consistent — a PASS verdict may never coincide with a recall drop.
+func TestWarewashing_DedupProposer(t *testing.T) {
+	raw, err := os.ReadFile(filepath.Join("testdata", "ground_truth_warewashing.json"))
+	if err != nil {
+		t.Fatalf("read GT: %v", err)
+	}
+	var gt GroundTruth
+	if err := json.Unmarshal(raw, &gt); err != nil {
+		t.Fatalf("parse GT: %v", err)
+	}
+
+	hazards, mits, kept := warewashingEngineOutput()
+	byID := map[string]PatternMatch{}
+	for _, pm := range kept {
+		byID[pm.PatternID] = pm
+	}
+	// 0.25 is a deliberately permissive candidate threshold: the proposer is meant
+	// to over-surface, because the deterministic GT wall below (and a human, and the
+	// LLM judge) is the precision filter — not the detector.
+	candidates := FindDedupCandidates(kept, 0.25)
+	t.Logf("Proposer: %d dedup candidate(s) from %d fired patterns", len(candidates), len(kept))
+
+	// Deterministic judge in the test; the dev-time CLI swaps in LLMJudge.
+	judge := HeuristicJudge{}
+	var judged []JudgedProposal
+	blocked := 0
+	for _, c := range candidates {
+		sr := ScreenSupersession(&gt, hazards, mits, c.KeepHazardName, c.DropName)
+		switch {
+		case sr.RecallAfter < sr.RecallBefore:
+			t.Logf("[BLOCK recall-load-bearing] keep %s / drop %s", c.KeepPattern, c.DropPattern)
+			blocked++
+		case sr.DistinctGT:
+			t.Logf("[BLOCK distinct GT %s vs %s] keep %s / drop %s", sr.KeepGT, sr.DropGT, c.KeepPattern, c.DropPattern)
+			blocked++
+		default:
+			if !sr.Safe {
+				t.Errorf("RECALL-SAFE branch but ScreenResult.Safe=false for drop %s", c.DropPattern)
+			}
+			v, conf, rat := judge.Judge(context.Background(), c, byID[c.KeepPattern], byID[c.DropPattern])
+			judged = append(judged, JudgedProposal{
+				Candidate: c, Screen: sr, Verdict: v, Confidence: conf, Rationale: rat, Judge: judge.Name(),
+			})
+		}
+	}
+
+	t.Logf("\n%s", RenderProposalQueue("Gewerbliche Geschirrspuelmaschine (vernetzt)", judged))
+	t.Logf("Proposer summary: %d candidate(s) in queue (judge=%s), %d BLOCKED by the GT wall — propose-only, nothing auto-applied",
+		len(judged), judge.Name(), blocked)
+}
@@ -0,0 +1,50 @@
+package iace
+
+import "sort"
+
+// EN ISO 12100 hazard-group ordering for the hazard log. Without it the log is
+// returned in pattern-firing order, which reads as a jumble. This groups the
+// hazards top-down by type (A. Mechanisch, B. Elektrisch, C. Thermisch, …),
+// matching the frontend CATEGORY_LABELS.
+var isoCategoryRank = map[string]int{
+	// A. Mechanisch
+	"mechanical_hazard": 10, "mechanical": 10, "maintenance_hazard": 11,
+	// B. Elektrisch
+	"electrical_hazard": 20, "electrical": 20, "emc_hazard": 21,
+	// C. Thermisch
+	"thermal_hazard": 30, "thermal": 30, "high_temperature": 31, "fire_explosion": 32,
+	// D. Pneumatik / Hydraulik
+	"pneumatic_hydraulic": 40,
+	// E. Laerm / Vibration
+	"noise_hazard": 50, "noise_vibration": 50, "vibration_hazard": 51,
+	// F. Ergonomie
+	"ergonomic_hazard": 60, "ergonomic": 60,
+	// G. Stoffe / Umwelt
+	"material_environmental": 70, "chemical_risk": 71, "radiation_hazard": 72,
+	// H. Software / Steuerung (funktionale Sicherheit)
+	"software_control": 80, "software_fault": 80, "safety_function_failure": 81,
+	"configuration_error": 82, "sensor_fault": 83, "hmi_error": 84, "mode_confusion": 85,
+	"communication_failure": 86, "update_failure": 87,
+	// I. Cyber / Netzwerk (zur Ordnungs-Vollstaendigkeit; im CE-Log ausgeschlossen)
+	"unauthorized_access": 90, "firmware_corruption": 91, "cyber_resilience": 92,
+	"cyber_network": 93, "logging_audit_failure": 94, "sensor_spoofing": 95,
+	// J. KI-spezifisch
+	"ai_specific": 100, "ai_misclassification": 100, "false_classification": 100,
+	"model_drift": 100, "data_poisoning": 100, "unintended_bias": 100,
+}
+
+func categoryRank(cat string) int {
+	if r, ok := isoCategoryRank[cat]; ok {
+		return r
+	}
+	return 999 // unknown categories last
+}
+
+// SortHazardsByISO12100 groups hazards by ISO 12100 hazard group. Stable: the
+// relative order within a group (creation/priority order from the engine) is
+// preserved.
+func SortHazardsByISO12100(hazards []Hazard) {
+	sort.SliceStable(hazards, func(i, j int) bool {
+		return categoryRank(hazards[i].Category) < categoryRank(hazards[j].Category)
+	})
+}
@@ -157,7 +157,7 @@ func GetGTBremseHazardPatterns() []HazardPattern {
 		// ════════════════════════════════════════════════════════════════
 		{
 			ID: "HP1717", NameDE: "Verletzung durch unvermittelt austretende pneumatische Restenergie", NameEN: "Injury from unexpectedly released pneumatic stored energy",
-			RequiredComponentTags: []string{"stored_energy"},
+			RequiredComponentTags: []string{"pneumatic_part"},
 			RequiredEnergyTags:    []string{"pneumatic_pressure"},
 			GeneratedHazardCats:   []string{"mechanical_hazard"},
 			SuggestedMeasureIDs:   []string{"M485", "M534", "M527"},
@@ -375,7 +375,7 @@ func GetSpecificMachinePatterns() []HazardPattern {
 		// ================================================================
 		{
 			ID: "HP753", NameDE: "Thermal Runaway bei Lithium-Batterie", NameEN: "Thermal runaway of lithium battery",
-			RequiredComponentTags: []string{"stored_energy", "high_temperature"},
+			RequiredComponentTags: []string{"battery", "high_temperature"},
 			RequiredEnergyTags:    []string{"electrical_energy", "thermal"},
 			GeneratedHazardCats:   []string{"thermal_hazard", "electrical_hazard"},
 			SuggestedMeasureIDs:   []string{"M005", "M141"},
@@ -390,7 +390,7 @@ func GetSpecificMachinePatterns() []HazardPattern {
 		},
 		{
 			ID: "HP754", NameDE: "Ausgasung giftiger Daempfe aus Batterie", NameEN: "Toxic gas emission from battery",
-			RequiredComponentTags: []string{"stored_energy", "chemical_risk"},
+			RequiredComponentTags: []string{"battery", "chemical_risk"},
 			RequiredEnergyTags:    []string{},
 			GeneratedHazardCats:   []string{"material_environmental"},
 			SuggestedMeasureIDs:   []string{"M005", "M141"},
@@ -405,7 +405,7 @@ func GetSpecificMachinePatterns() []HazardPattern {
 		},
 		{
 			ID: "HP755", NameDE: "Elektrischer Schlag an Hochvolt-Batteriespeicher", NameEN: "Electric shock from high-voltage battery storage",
-			RequiredComponentTags: []string{"stored_energy", "electrical_part"},
+			RequiredComponentTags: []string{"battery", "electrical_part"},
 			RequiredEnergyTags:    []string{"electrical_energy"},
 			GeneratedHazardCats:   []string{"electrical_hazard"},
 			SuggestedMeasureIDs:   []string{"M082", "M141"},
@@ -137,7 +137,7 @@ func GetKeywordDictionary() []KeywordEntry {
 		{Keywords: []string{"kreiselmaeher", "scheibenmaeher", "maehwerk"}, ExtraTags: []string{"agri_mower"}},
 		{Keywords: []string{"spruehduese", "spritzduese", "spruehkopf"}, ExtraTags: []string{"spray_nozzle"}},
 		{Keywords: []string{"galvanikbad", "tauchbad", "beizbad", "chemiebad"}, ExtraTags: []string{"chemical_bath"}},
-		{Keywords: []string{"batterie", "akku", "akkumulator", "traktionsbatterie"}, ExtraTags: []string{"battery"}},
+		{Keywords: []string{"batterie", "akku", "akkumulator", "traktionsbatterie", "lithium", "batteriespeicher", "hochvoltbatterie", "lithium-batterie"}, ExtraTags: []string{"battery"}},
 		{Keywords: []string{"heizelement", "heizpatrone", "heizband"}, ExtraTags: []string{"heating_element"}},
 		{Keywords: []string{"uv-lampe", "uv-strahler", "uv-c-strahler"}, ExtraTags: []string{"uv_source"}},
 		{Keywords: []string{"roentgen", "radioaktiv", "strahlenquelle", "gammastrahl", "isotop"}, ExtraTags: []string{"radiation_source"}},
@@ -42,3 +42,29 @@ func guardedLifecycles(p HazardPattern, tagSet map[string]bool) []string {
 	}
 	return p.ApplicableLifecycles
 }
+
+// Domain-specific supersession.
+//
+// A generic pattern that fires via a broad tag (e.g. high_temperature) can
+// duplicate a domain-specific pattern that describes the same hazard more
+// precisely. When the domain is present, the specific pattern wins and the
+// generic duplicate is dropped. Scoped to the domain tag, so machines outside
+// the domain keep the generic pattern — regression-safe by construction.
+//
+//	HP016 (generic hot surfaces)  -> HP2201 (Boiler/Tank/Spuelkammer)
+//	HP018 (actuator burn)         -> HP2201 (same contact-burn hazard)
+//	HP013 (stored electrical NRG) -> HP144  (residual voltage; HP013's zone is
+//	                                 framed for Batteriefaecher/USV-Anlagen a
+//	                                 dishwasher does not have, HP144 is the
+//	                                 Frequenzumrichter/Zwischenkreis variant)
+var genericSupersededByWarewashing = map[string]bool{
+	"HP016": true,
+	"HP018": true,
+	"HP013": true,
+}
+
+// supersededByDomainSpecific reports whether a generic pattern is replaced by a
+// more precise equivalent that the project's domain already provides.
+func supersededByDomainSpecific(p HazardPattern, tagSet map[string]bool) bool {
+	return tagSet["dom_warewashing"] && genericSupersededByWarewashing[p.ID]
+}
@@ -416,6 +416,11 @@ func patternMatches(p HazardPattern, tagSet map[string]bool, input MatchInput) b
 		return false
 	}

+	// Domain-specific supersession (generic duplicate replaced by a precise one).
+	if supersededByDomainSpecific(p, tagSet) {
+		return false
+	}
+
 	return true
 }

@@ -0,0 +1,143 @@
+package iace
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"strings"
+)
+
+// Coverage blind-spot proposer (P2 slice 6, type 4). DEV-TIME, propose-only.
+//
+// Deterministic skeleton: which EN ISO 12100 hazard groups (A-G, the classic CE
+// groups; H-J are control/CRA and routinely routed elsewhere) did the engine
+// leave with ZERO hazards for this machine? An empty group is a structural
+// blind-spot signal — the machine may genuinely lack that hazard, or a pattern
+// may be missing. The LLM then expands each gap into specific expected-but-missing
+// hazards a safety assessor would name, for a human to confirm into a new pattern
+// or GT case. The gaps alone are useful without any model.
+
+type isoGroup struct {
+	Key   string
+	Label string
+	Cats  []string
+}
+
+var iso12100Groups = []isoGroup{
+	{"mechanical", "A. Mechanisch", []string{"mechanical_hazard", "mechanical", "maintenance_hazard"}},
+	{"electrical", "B. Elektrisch", []string{"electrical_hazard", "electrical", "emc_hazard"}},
+	{"thermal", "C. Thermisch", []string{"thermal_hazard", "thermal", "high_temperature", "fire_explosion"}},
+	{"pneumatic_hydraulic", "D. Pneumatik/Hydraulik", []string{"pneumatic_hydraulic"}},
+	{"noise_vibration", "E. Laerm/Vibration", []string{"noise_hazard", "noise_vibration", "vibration_hazard"}},
+	{"ergonomic", "F. Ergonomie", []string{"ergonomic_hazard", "ergonomic"}},
+	{"material", "G. Stoffe/Umwelt", []string{"material_environmental", "chemical_risk", "radiation_hazard"}},
+}
+
+// CoverageGap is an ISO 12100 hazard group with no engine hazard.
+type CoverageGap struct {
+	Group string `json:"group"`
+	Key   string `json:"key"`
+	Note  string `json:"note"`
+}
+
+// FindCoverageGaps returns the A-G hazard groups that produced zero hazards.
+func FindCoverageGaps(hazards []Hazard) []CoverageGap {
+	present := make(map[string]bool, len(hazards))
+	for _, h := range hazards {
+		present[h.Category] = true
+	}
+	var gaps []CoverageGap
+	for _, g := range iso12100Groups {
+		covered := false
+		for _, c := range g.Cats {
+			if present[c] {
+				covered = true
+				break
+			}
+		}
+		if !covered {
+			gaps = append(gaps, CoverageGap{
+				Group: g.Label, Key: g.Key,
+				Note: "no engine hazard in this ISO 12100 group — verify the machine truly lacks it, or a pattern is missing",
+			})
+		}
+	}
+	return gaps
+}
+
+// MissingHazard is an LLM-proposed hazard a safety assessor would expect.
+type MissingHazard struct {
+	Group  string `json:"group"`
+	Hazard string `json:"hazard"`
+	Why    string `json:"why"`
+}
+
+// ProposeMissingHazards asks the LLM to expand the empty groups into specific
+// expected hazards. Returns nil without a completer or on any error — propose-only,
+// never breaks the run.
+func ProposeMissingHazards(ctx context.Context, completer LLMCompleter, machineClass, narrative string, produced []Hazard, gaps []CoverageGap) []MissingHazard {
+	if completer == nil || len(gaps) == 0 {
+		return nil
+	}
+	system, user := BuildCoveragePrompt(machineClass, narrative, produced, gaps)
+	raw, err := completer.Complete(ctx, system, user)
+	if err != nil {
+		return nil
+	}
+	return parseMissingHazards(raw)
+}
+
+// BuildCoveragePrompt frames the "what is missing?" question for the LLM.
+func BuildCoveragePrompt(machineClass, narrative string, produced []Hazard, gaps []CoverageGap) (system, user string) {
+	system = "Du bist Sachverstaendiger fuer Maschinensicherheit nach EN ISO 12100. " +
+		"Dir werden eine Maschine, die bereits erkannten Gefaehrdungen und Gefaehrdungsgruppen OHNE Eintrag genannt. " +
+		"Nenne nur Gefaehrdungen, die ein Sachverstaendiger fuer DIESE Maschine ERWARTET, die aber FEHLEN. " +
+		"Erfinde nichts Maschinenfremdes. Antworte AUSSCHLIESSLICH als JSON-Array: " +
+		`[{"group":"...","hazard":"...","why":"..."}].`
+
+	var have []string
+	seen := map[string]bool{}
+	for _, h := range produced {
+		if h.Category != "" && !seen[h.Category] {
+			seen[h.Category] = true
+			have = append(have, h.Category)
+		}
+	}
+	var empty []string
+	for _, g := range gaps {
+		empty = append(empty, g.Group)
+	}
+	user = fmt.Sprintf("Maschinenklasse: %s\n\nBeschreibung:\n%s\n\nBereits erkannte Kategorien: %s\n\nGruppen OHNE Eintrag (Fokus): %s\n\nWelche erwarteten Gefaehrdungen fehlen?",
+		machineClass, narrative, strings.Join(have, ", "), strings.Join(empty, ", "))
+	return system, user
+}
+
+func parseMissingHazards(raw string) []MissingHazard {
+	start, end := strings.Index(raw, "["), strings.LastIndex(raw, "]")
+	if start < 0 || end <= start {
+		return nil
+	}
+	var out []MissingHazard
+	if err := json.Unmarshal([]byte(raw[start:end+1]), &out); err != nil {
+		return nil
+	}
+	return out
+}
+
+// RenderCoverageQueue renders the deterministic gaps plus any LLM-proposed missing
+// hazards as a markdown review queue.
+func RenderCoverageQueue(machine string, gaps []CoverageGap, missing []MissingHazard) string {
+	var b strings.Builder
+	fmt.Fprintf(&b, "# Coverage blind-spot queue — %s\n\n", machine)
+	fmt.Fprintf(&b, "%d ISO 12100 group(s) (A-G) have no engine hazard. Propose-only — a human confirms whether the machine truly lacks it or a pattern/GT case is missing.\n\n", len(gaps))
+	for _, g := range gaps {
+		fmt.Fprintf(&b, "- **%s** — %s\n", g.Group, g.Note)
+	}
+	if len(missing) > 0 {
+		fmt.Fprintf(&b, "\n## LLM-proposed expected-but-missing hazards (%d)\n\n", len(missing))
+		for i, m := range missing {
+			fmt.Fprintf(&b, "%d. [%s] %s\n   - why: %s\n", i+1, m.Group, m.Hazard, m.Why)
+		}
+	}
+	return b.String()
+}
@@ -0,0 +1,59 @@
+package iace
+
+import (
+	"context"
+	"strings"
+	"testing"
+)
+
+func TestFindCoverageGaps(t *testing.T) {
+	hazards := []Hazard{
+		{Category: "mechanical_hazard"},
+		{Category: "thermal_hazard"},
+		{Category: "electrical_hazard"},
+		{Category: "material_environmental"},
+	}
+	gapKeys := map[string]bool{}
+	for _, g := range FindCoverageGaps(hazards) {
+		gapKeys[g.Key] = true
+	}
+	for _, want := range []string{"pneumatic_hydraulic", "noise_vibration", "ergonomic"} {
+		if !gapKeys[want] {
+			t.Errorf("expected gap %s", want)
+		}
+	}
+	for _, notWant := range []string{"mechanical", "thermal", "electrical", "material"} {
+		if gapKeys[notWant] {
+			t.Errorf("did not expect gap %s (covered)", notWant)
+		}
+	}
+}
+
+func TestBuildCoveragePrompt_ContainsContext(t *testing.T) {
+	produced := []Hazard{{Category: "thermal_hazard"}}
+	gaps := []CoverageGap{{Group: "F. Ergonomie", Key: "ergonomic"}}
+	system, user := BuildCoveragePrompt("Geschirrspuelmaschine", "Eine Spuelmaschine mit Tank.", produced, gaps)
+	if !strings.Contains(system, "EN ISO 12100") || !strings.Contains(system, "JSON") {
+		t.Errorf("system prompt missing framing")
+	}
+	for _, want := range []string{"Geschirrspuelmaschine", "thermal_hazard", "F. Ergonomie", "Spuelmaschine mit Tank"} {
+		if !strings.Contains(user, want) {
+			t.Errorf("user prompt missing %q", want)
+		}
+	}
+}
+
+func TestProposeMissingHazards_ParsesAndDegrades(t *testing.T) {
+	gaps := []CoverageGap{{Group: "F. Ergonomie", Key: "ergonomic"}}
+	c := fakeCompleter{out: `Hier: [{"group":"F. Ergonomie","hazard":"Heben schwerer Koerbe","why":"manuelles Beladen"}] fertig`}
+	got := ProposeMissingHazards(context.Background(), c, "x", "n", nil, gaps)
+	if len(got) != 1 || got[0].Hazard != "Heben schwerer Koerbe" {
+		t.Fatalf("parse: got %+v", got)
+	}
+	if ProposeMissingHazards(context.Background(), nil, "x", "n", nil, gaps) != nil {
+		t.Errorf("nil completer must return nil")
+	}
+	if ProposeMissingHazards(context.Background(), fakeCompleter{err: context.DeadlineExceeded}, "x", "n", nil, gaps) != nil {
+		t.Errorf("error must return nil")
+	}
+}
@@ -0,0 +1,152 @@
+package iace
+
+import (
+	"fmt"
+	"math"
+	"regexp"
+	"sort"
+	"strings"
+)
+
+// Offline dedup-candidate proposer (P2, type 1). DEV-TIME ONLY.
+//
+// It inspects the patterns that fired for one machine and proposes which look
+// like duplicates, so a human (later an LLM) can decide a supersession/merge. It
+// NEVER mutates the pattern library or the runtime — it only surfaces candidates.
+// The deterministic GT screen (ScreenSupersession, proposer_screen.go) is the
+// wall that proves a proposal is safe before a human ever sees it.
+//
+// Detection here is purely structural (category + zone + measure + scenario
+// overlap) and therefore reproducible. Two safety rules bake in what P1 taught
+// us about the dishwasher review:
+//   - only patterns with the SAME primary category are ever compared;
+//   - a pair with DIFFERENT operational states is NEVER proposed, because
+//     normal-operation and maintenance are legitimately distinct contexts with
+//     different protective measures (e.g. HP011 vs HP077). Merging them would
+//     erase the maintenance view.
+
+// DedupCandidate is a proposed near-duplicate pattern pair for one machine class.
+type DedupCandidate struct {
+	KeepPattern     string  `json:"keep_pattern"` // higher-priority survivor
+	DropPattern     string  `json:"drop_pattern"` // supersession target
+	KeepName        string  `json:"keep_name"`
+	KeepHazardName  string  `json:"keep_hazard_name"` // keep pattern ScenarioDE (for the GT-distinctness screen)
+	DropName        string  `json:"drop_name"`        // == generated hazard Name (ScenarioDE) of the drop pattern
+	Category        string  `json:"category"`
+	ZoneJaccard     float64 `json:"zone_jaccard"`
+	MeasureJaccard  float64 `json:"measure_jaccard"`
+	ScenarioJaccard float64 `json:"scenario_jaccard"`
+	Score           float64 `json:"score"`
+	Rationale       string  `json:"rationale"`
+}
+
+// FindDedupCandidates compares the fired patterns pairwise and returns near-dup
+// candidates whose combined overlap score meets threshold, deterministically
+// ordered (score desc, then drop-pattern id). The combined score weights measure
+// overlap highest (shared measures are the strongest duplicate signal), then zone
+// and scenario equally.
+func FindDedupCandidates(fired []PatternMatch, threshold float64) []DedupCandidate {
+	var out []DedupCandidate
+	for i := 0; i < len(fired); i++ {
+		for j := i + 1; j < len(fired); j++ {
+			a, b := fired[i], fired[j]
+			ca := primaryCat(a)
+			if ca == "" || ca != primaryCat(b) {
+				continue
+			}
+			if !sameOpStateSet(a.OperationalStates, b.OperationalStates) {
+				continue // legitimate lifecycle variants — never propose a merge
+			}
+			zj := tokenJaccard(zoneTokenSet(a.ZoneDE), zoneTokenSet(b.ZoneDE))
+			mj := tokenJaccard(toSet(a.SuggestedMeasureIDs), toSet(b.SuggestedMeasureIDs))
+			sj := tokenJaccard(wordTokenSet(a.ScenarioDE), wordTokenSet(b.ScenarioDE))
+			score := 0.4*mj + 0.3*zj + 0.3*sj
+			if score < threshold {
+				continue
+			}
+			keep, drop := a, b
+			if b.Priority > a.Priority {
+				keep, drop = b, a
+			}
+			out = append(out, DedupCandidate{
+				KeepPattern: keep.PatternID, DropPattern: drop.PatternID,
+				KeepName: keep.PatternName, KeepHazardName: keep.ScenarioDE, DropName: drop.ScenarioDE,
+				Category: ca, ZoneJaccard: round2(zj), MeasureJaccard: round2(mj),
+				ScenarioJaccard: round2(sj), Score: round2(score),
+				Rationale: fmt.Sprintf(
+					"same category %q · measure overlap %.0f%% · zone overlap %.0f%% · scenario overlap %.0f%% → keep %s (P%d), supersede %s (P%d)",
+					ca, mj*100, zj*100, sj*100, keep.PatternID, keep.Priority, drop.PatternID, drop.Priority),
+			})
+		}
+	}
+	sort.SliceStable(out, func(i, j int) bool {
+		if out[i].Score != out[j].Score {
+			return out[i].Score > out[j].Score
+		}
+		return out[i].DropPattern < out[j].DropPattern
+	})
+	return out
+}
+
+func primaryCat(pm PatternMatch) string {
+	if len(pm.HazardCats) == 0 {
+		return ""
+	}
+	return pm.HazardCats[0]
+}
+
+func sameOpStateSet(a, b []string) bool {
+	sa, sb := toSet(a), toSet(b)
+	if len(sa) != len(sb) {
+		return false
+	}
+	for k := range sa {
+		if !sb[k] {
+			return false
+		}
+	}
+	return true
+}
+
+var proposerWordSplit = regexp.MustCompile(`[^\p{L}]+`)
+
+// zoneTokenSet splits a comma-separated zone string into its component terms.
+func zoneTokenSet(zone string) map[string]bool {
+	out := map[string]bool{}
+	for _, part := range strings.Split(strings.ToLower(zone), ",") {
+		if t := strings.TrimSpace(part); len([]rune(t)) >= 3 {
+			out[t] = true
+		}
+	}
+	return out
+}
+
+// wordTokenSet tokenises free text into words of length >= 4 (drops connectives).
+func wordTokenSet(s string) map[string]bool {
+	out := map[string]bool{}
+	for _, w := range proposerWordSplit.Split(strings.ToLower(s), -1) {
+		if len([]rune(w)) >= 4 {
+			out[w] = true
+		}
+	}
+	return out
+}
+
+func tokenJaccard(a, b map[string]bool) float64 {
+	if len(a) == 0 && len(b) == 0 {
+		return 0
+	}
+	inter := 0
+	for k := range a {
+		if b[k] {
+			inter++
+		}
+	}
+	union := len(a) + len(b) - inter
+	if union == 0 {
+		return 0
+	}
+	return float64(inter) / float64(union)
+}
+
+func round2(x float64) float64 { return math.Round(x*100) / 100 }
@@ -0,0 +1,67 @@
+package iace
+
+import "testing"
+
+func mkPM(id, cat, zone, scenario string, prio int, measures, opstates []string) PatternMatch {
+	return PatternMatch{
+		PatternID: id, PatternName: id, Priority: prio,
+		HazardCats: []string{cat}, ZoneDE: zone, ScenarioDE: scenario,
+		SuggestedMeasureIDs: measures, OperationalStates: opstates,
+	}
+}
+
+func TestFindDedupCandidates_FindsOverlappingPair(t *testing.T) {
+	fired := []PatternMatch{
+		mkPM("HPa", "update_failure", "Steuerung, SPS", "Software-Update der Steuerung scheitert nach Abbruch", 80,
+			[]string{"M138", "M146"}, nil),
+		mkPM("HPb", "update_failure", "Steuerung, Antriebsregler", "Software-Update der Steuerung schlaegt fehl", 75,
+			[]string{"M138", "M146", "M141"}, nil),
+		mkPM("HPc", "mechanical_hazard", "Tuer", "Quetschen der Finger an der Tuer", 70,
+			[]string{"M003"}, nil),
+	}
+	got := FindDedupCandidates(fired, 0.4)
+	if len(got) != 1 {
+		t.Fatalf("want 1 candidate, got %d: %+v", len(got), got)
+	}
+	// Higher-priority pattern survives, lower one is the drop target.
+	if got[0].KeepPattern != "HPa" || got[0].DropPattern != "HPb" {
+		t.Errorf("want keep HPa / drop HPb, got keep %s / drop %s", got[0].KeepPattern, got[0].DropPattern)
+	}
+	if got[0].DropName != "Software-Update der Steuerung schlaegt fehl" {
+		t.Errorf("DropName must equal drop pattern ScenarioDE, got %q", got[0].DropName)
+	}
+}
+
+func TestFindDedupCandidates_LifecycleGuard(t *testing.T) {
+	// Same category, zone and measures — but normal-operation vs maintenance.
+	// These are legitimate variants (HP011 vs HP077) and must NOT be proposed.
+	fired := []PatternMatch{
+		mkPM("HP011", "electrical_hazard", "Schaltschrank, Klemmenkasten", "Person beruehrt spannungsfuehrende Teile", 95,
+			[]string{"M481", "M482"}, nil),
+		mkPM("HP077", "electrical_hazard", "Schaltschrank, Klemmenkasten", "Person beruehrt spannungsfuehrende Teile", 80,
+			[]string{"M481", "M482"}, []string{"maintenance"}),
+	}
+	if got := FindDedupCandidates(fired, 0.4); len(got) != 0 {
+		t.Fatalf("lifecycle guard failed: want 0 candidates, got %d: %+v", len(got), got)
+	}
+}
+
+func TestFindDedupCandidates_DifferentCategoryIgnored(t *testing.T) {
+	fired := []PatternMatch{
+		mkPM("HPa", "thermal_hazard", "Boiler", "Heisse Oberflaeche am Boiler", 80, []string{"M071"}, nil),
+		mkPM("HPb", "mechanical_hazard", "Boiler", "Heisse Oberflaeche am Boiler", 80, []string{"M071"}, nil),
+	}
+	if got := FindDedupCandidates(fired, 0.3); len(got) != 0 {
+		t.Fatalf("cross-category pair must not be proposed, got %d", len(got))
+	}
+}
+
+func TestFindDedupCandidates_BelowThresholdDropped(t *testing.T) {
+	fired := []PatternMatch{
+		mkPM("HPa", "mechanical_hazard", "Tuer", "Quetschen an der Tuer", 80, []string{"M003"}, nil),
+		mkPM("HPb", "mechanical_hazard", "Foerderband", "Einzug am Foerderband", 80, []string{"M540"}, nil),
+	}
+	if got := FindDedupCandidates(fired, 0.4); len(got) != 0 {
+		t.Fatalf("disjoint pair must be below threshold, got %d: %+v", len(got), got)
+	}
+}
@@ -0,0 +1,154 @@
+package iace
+
+import (
+	"fmt"
+	"sort"
+	"strings"
+)
+
+// Foreign-framing proposer (P2 slice 4, type 2). DEV-TIME, propose-only.
+//
+// A pattern can fire for a machine yet describe its hazard with a zone text
+// framed for a DIFFERENT machine (e.g. a dishwasher hazard whose zone names
+// "Walzen, Transportbaender" or "Bearbeitungszone"). Such foreign framing leaks
+// through terms that are NOT yet in domainGateTerms — once a term is a gate term,
+// the ghost-pattern invariant already fences the pattern out. So we surface the
+// candidates structurally: zone terms a fired pattern names that the machine's
+// narrative never mentions (minus generic hazard-location vocabulary). A human
+// (or the LLM) then decides: add a dom_* gate term, or re-frame the zone text.
+//
+// This OVER-surfaces by design — the human/LLM is the precision filter, not the
+// detector (same contract as the dedup proposer).
+
+// genericHazardStop are hazard-LOCATION words that legitimately appear in zones
+// without being echoed in a narrative — they are not evidence of foreign framing.
+var genericHazardStop = map[string]bool{
+	"quetschstelle": true, "einzugstelle": true, "einzugsstelle": true, "scherstelle": true,
+	"schneidstelle": true, "stossstelle": true, "fangstelle": true, "klemmstelle": true,
+	"gefahrbereich": true, "gefahrenbereich": true, "gefahrstelle": true, "gefahrenstelle": true,
+	"arbeitsbereich": true, "wirkbereich": true, "schutzbereich": true, "umgebung": true,
+	"bereich": true, "zugang": true, "oberflaeche": true, "oberflaechen": true,
+	"gehaeuse": true, "bauteil": true, "bauteile": true, "komponente": true, "maschine": true,
+}
+
+// FramingCandidate is a fired pattern whose zone text looks foreign for the machine.
+type FramingCandidate struct {
+	Pattern        string   `json:"pattern"`
+	Name           string   `json:"name"`
+	Category       string   `json:"category"`
+	Zone           string   `json:"zone"`
+	OrphanTerms    []string `json:"orphan_terms"`
+	OrphanFraction float64  `json:"orphan_fraction"`
+	Verdict        string   `json:"verdict"` // heuristic lean: foreign | plausible
+	Evidence       string   `json:"evidence"`
+}
+
+// FindFramingCandidates returns fired patterns whose zone is mostly not echoed in
+// the narrative, sorted by orphan fraction descending (deterministic).
+func FindFramingCandidates(fired []PatternMatch, narrative string, minFraction float64) []FramingCandidate {
+	nar := strings.ToLower(narrative)
+	var narStems []string
+	for _, w := range proposerWordSplit.Split(nar, -1) {
+		if len([]rune(w)) >= 5 {
+			narStems = append(narStems, w)
+		}
+	}
+	var out []FramingCandidate
+	for _, pm := range fired {
+		parts := zoneParts(pm.ZoneDE)
+		if len(parts) == 0 {
+			continue
+		}
+		var orphans []string
+		for _, p := range parts {
+			if !partEchoed(p, nar, narStems) {
+				orphans = append(orphans, p)
+			}
+		}
+		frac := float64(len(orphans)) / float64(len(parts))
+		if len(orphans) == 0 || frac < minFraction {
+			continue
+		}
+		out = append(out, FramingCandidate{
+			Pattern: pm.PatternID, Name: pm.PatternName, Category: primaryCat(pm),
+			Zone: pm.ZoneDE, OrphanTerms: orphans, OrphanFraction: round2(frac),
+			Verdict:  framingHeuristicVerdict(frac),
+			Evidence: fmt.Sprintf("%d/%d zone terms have no narrative echo: %s", len(orphans), len(parts), strings.Join(orphans, ", ")),
+		})
+	}
+	sort.SliceStable(out, func(i, j int) bool {
+		if out[i].OrphanFraction != out[j].OrphanFraction {
+			return out[i].OrphanFraction > out[j].OrphanFraction
+		}
+		return out[i].Pattern < out[j].Pattern
+	})
+	return out
+}
+
+func framingHeuristicVerdict(frac float64) string {
+	if frac >= 0.99 {
+		return "foreign" // nothing in the zone is echoed by the narrative
+	}
+	return "plausible" // partial echo — likely generic vocabulary, human to confirm
+}
+
+// zoneParts splits a zone string into significant terms on commas, slashes,
+// parentheses and semicolons, lowercased, length >= 4.
+func zoneParts(zone string) []string {
+	fields := strings.FieldsFunc(strings.ToLower(zone), func(r rune) bool {
+		return r == ',' || r == '/' || r == ';' || r == '(' || r == ')'
+	})
+	var out []string
+	for _, f := range fields {
+		if t := strings.TrimSpace(f); len([]rune(t)) >= 4 {
+			out = append(out, t)
+		}
+	}
+	return out
+}
+
+// partEchoed reports whether a zone part is reflected in the narrative. Matching
+// is bidirectional to survive German compounding: a zone word echoes if it is a
+// generic hazard term, if it is a substring of the narrative, OR if any narrative
+// stem (>= 5 chars) is a substring of the zone word (so narrative "Steuerung"
+// echoes zone "Steuerungssystem").
+func partEchoed(part, narrative string, narStems []string) bool {
+	for _, w := range strings.Fields(part) {
+		if genericHazardStop[w] {
+			return true
+		}
+		if len([]rune(w)) < 4 {
+			continue
+		}
+		if strings.Contains(narrative, w) {
+			return true
+		}
+		for _, ns := range narStems {
+			if strings.Contains(w, ns) {
+				return true
+			}
+		}
+	}
+	return false
+}
+
+// RenderFramingQueue renders foreign-framing candidates as a markdown review queue.
+func RenderFramingQueue(machine string, candidates []FramingCandidate) string {
+	var b strings.Builder
+	fmt.Fprintf(&b, "# Foreign-framing review queue — %s\n\n", machine)
+	fmt.Fprintf(&b, "%d fired pattern(s) name zone terms the narrative never mentions. Propose-only — a human (or the LLM) decides: add a dom_* gate term, or re-frame the zone.\n\n", len(candidates))
+	for i, c := range candidates {
+		fmt.Fprintf(&b, "## %d. %s — %s  [%s, orphan %.0f%%]\n", i+1, c.Pattern, c.Name, c.Verdict, c.OrphanFraction*100)
+		fmt.Fprintf(&b, "- category: %s\n- zone: %s\n", c.Category, c.Zone)
+		fmt.Fprintf(&b, "- orphan terms (no narrative echo): %s\n", strings.Join(c.OrphanTerms, ", "))
+		fmt.Fprintf(&b, "- suggested action: %s\n\n", framingAction(c.Verdict))
+	}
+	return b.String()
+}
+
+func framingAction(verdict string) string {
+	if verdict == "foreign" {
+		return "likely foreign-framed — propose a dom_* gate term for the orphan term(s), or re-frame the zone; human confirms + commits + pins a GT case"
+	}
+	return "partial echo — likely generic vocabulary; human to confirm whether any orphan term is a foreign-machine component"
+}
@@ -0,0 +1,33 @@
+package iace
+
+import "testing"
+
+func TestFindFramingCandidates_FlagsForeignZone(t *testing.T) {
+	narrative := "Gewerbliche Geschirrspuelmaschine mit Boiler und Tank. Die Tuer ist verriegelt."
+	fired := []PatternMatch{
+		mkPM("HPforeign", "mechanical_hazard", "Walzen, Transportbaender, Bearbeitungszone", "Einzug", 80, nil, nil),
+		mkPM("HPlocal", "thermal_hazard", "Boiler, Tank, Tuer", "Verbrennung", 80, nil, nil),
+		mkPM("HPgeneric", "mechanical_hazard", "Quetschstelle, Gefahrbereich", "Quetschen", 80, nil, nil),
+	}
+	got := FindFramingCandidates(fired, narrative, 0.6)
+	if len(got) != 1 || got[0].Pattern != "HPforeign" {
+		t.Fatalf("want only HPforeign flagged, got %+v", got)
+	}
+	if got[0].Verdict != "foreign" {
+		t.Errorf("fully-orphan zone should be 'foreign', got %s", got[0].Verdict)
+	}
+}
+
+func TestFindFramingCandidates_PartialEchoIsPlausible(t *testing.T) {
+	narrative := "Maschine mit Boiler und Tank."
+	fired := []PatternMatch{
+		mkPM("HPx", "thermal_hazard", "Boiler, Tank, Auspuffleitung", "x", 80, nil, nil),
+	}
+	got := FindFramingCandidates(fired, narrative, 0.3)
+	if len(got) != 1 {
+		t.Fatalf("want 1 candidate (1/3 orphan >= 0.3), got %d", len(got))
+	}
+	if got[0].Verdict != "plausible" || len(got[0].OrphanTerms) != 1 || got[0].OrphanTerms[0] != "auspuffleitung" {
+		t.Errorf("want plausible + orphan [auspuffleitung], got %s %v", got[0].Verdict, got[0].OrphanTerms)
+	}
+}
@@ -0,0 +1,123 @@
+package iace
+
+import "github.com/google/uuid"
+
+// Non-test plumbing for the offline proposer (P2 slice 3): run the engine for a
+// narrative and produce the fired patterns + the engine-built hazards/mitigations
+// the dedup proposer and GT screen consume. This is the same pipeline the GT
+// benchmark tests use, lifted out of test scope so the dev-time CLI can call it.
+
+// universalLifecyclePhases are appended so patterns gated to a specific lifecycle
+// (maintenance/cleaning/setup/fault clearing) still fire — the proposer wants the
+// full hazard picture, not only normal-operation hazards.
+var universalLifecyclePhases = []string{"normal_operation", "maintenance", "cleaning", "setup", "fault_clearing"}
+
+// BuildProposerInput parses a narrative, runs the pattern engine, keeps the
+// narrative-relevant patterns, and returns the hazards, mitigations and fired
+// patterns. NOTE: it does not apply the CE cyber-category skip, so the proposer
+// view may include cyber/AI hazards that the CE log excludes — harmless for the
+// GT recall screen (they match no CE ground-truth entry).
+func BuildProposerInput(narrative, machineType string, extraMachineTypes []string) ([]Hazard, []Mitigation, []PatternMatch) {
+	res := ParseNarrative(narrative, machineType)
+
+	var compIDs, compNames, energyIDs []string
+	for _, c := range res.Components {
+		if c.Negated {
+			continue
+		}
+		compIDs = append(compIDs, c.LibraryID)
+		compNames = append(compNames, c.NameDE)
+	}
+	for _, e := range res.EnergySources {
+		energyIDs = append(energyIDs, e.SourceID)
+	}
+
+	machineTypes := append([]string{}, extraMachineTypes...)
+	if machineType != "" {
+		machineTypes = append(machineTypes, machineType)
+	}
+	lifecycles := append(append([]string{}, res.LifecyclePhases...), universalLifecyclePhases...)
+
+	out := NewPatternEngine().Match(MatchInput{
+		ComponentLibraryIDs: compIDs,
+		EnergySourceIDs:     energyIDs,
+		LifecyclePhases:     lifecycles,
+		CustomTags:          res.CustomTags,
+		OperationalStates:   res.OperationalStates,
+		StateTransitions:    res.StateTransitions,
+		HumanRoles:          res.Roles,
+		MachineTypes:        machineTypes,
+	})
+
+	kept := make([]PatternMatch, 0, len(out.MatchedPatterns))
+	for _, pm := range out.MatchedPatterns {
+		if IsPatternRelevant(pm, narrative, compNames) {
+			kept = append(kept, pm)
+		}
+	}
+	filtered := *out
+	filtered.MatchedPatterns = kept
+	hazards, mits := patternsToHazardsAndMitigations(&filtered)
+	return hazards, mits, kept
+}
+
+// patternsToHazardsAndMitigations converts engine output into the hazard/mitigation
+// entities the benchmark + proposer compare on. Simplified vs InitializeProject
+// (no risk estimation, no norm refs) — it only needs category/zone/scenario/measures.
+func patternsToHazardsAndMitigations(out *MatchOutput) ([]Hazard, []Mitigation) {
+	hazards := make([]Hazard, 0, len(out.MatchedPatterns))
+	patternToHazard := make(map[string]uuid.UUID, len(out.MatchedPatterns))
+
+	for _, pm := range out.MatchedPatterns {
+		cat := ""
+		if len(pm.HazardCats) > 0 {
+			cat = pm.HazardCats[0]
+		}
+		lifecycle := ""
+		if len(pm.ApplicableLifecycles) > 0 {
+			lifecycle = pm.ApplicableLifecycles[0]
+		}
+		h := Hazard{
+			ID:             uuid.New(),
+			Name:           pm.ScenarioDE,
+			Category:       cat,
+			Description:    pm.ScenarioDE,
+			Scenario:       pm.ScenarioDE,
+			TriggerEvent:   pm.TriggerDE,
+			PossibleHarm:   pm.HarmDE,
+			AffectedPerson: pm.AffectedDE,
+			HazardousZone:  pm.ZoneDE,
+			LifecyclePhase: lifecycle,
+		}
+		if h.Name == "" {
+			h.Name = pm.PatternName
+		}
+		hazards = append(hazards, h)
+		patternToHazard[pm.PatternID] = h.ID
+	}
+
+	measureNames := make(map[string]string)
+	for _, m := range GetProtectiveMeasureLibrary() {
+		measureNames[m.ID] = m.Name
+	}
+
+	var mitigations []Mitigation
+	for _, sm := range out.SuggestedMeasures {
+		name := measureNames[sm.MeasureID]
+		if name == "" {
+			name = sm.MeasureID
+		}
+		for _, srcPattern := range sm.SourcePatterns {
+			hid, ok := patternToHazard[srcPattern]
+			if !ok {
+				continue
+			}
+			mitigations = append(mitigations, Mitigation{
+				ID:       uuid.New(),
+				HazardID: hid,
+				Name:     name,
+			})
+		}
+	}
+	return hazards, mitigations
+}
@@ -0,0 +1,25 @@
+package iace
+
+import "testing"
+
+func TestBuildProposerInput_WarewashingFires(t *testing.T) {
+	hazards, _, fired := BuildProposerInput(
+		warewashingNarrative,
+		"Gewerbliche Untertisch-Geschirrspuelmaschine (vernetzt)",
+		[]string{"food_processing"},
+	)
+	if len(fired) == 0 || len(hazards) == 0 {
+		t.Fatalf("want fired patterns + hazards, got %d patterns / %d hazards", len(fired), len(hazards))
+	}
+	has := func(id string) bool {
+		for _, pm := range fired {
+			if pm.PatternID == id {
+				return true
+			}
+		}
+		return false
+	}
+	if !has("HP2201") {
+		t.Errorf("warewashing-specific HP2201 must fire via BuildProposerInput")
+	}
+}
@@ -0,0 +1,174 @@
+package iace
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"strings"
+
+	"github.com/breakpilot/ai-compliance-sdk/internal/llm"
+)
+
+// Semantic judgement over RECALL-SAFE dedup candidates (P2 slice 2). DEV-TIME,
+// propose-only. The deterministic GT wall (proposer_screen.go) has already
+// removed candidates that would drop recall or that credit different GT entries;
+// the judge only adds an opinion on whether the survivors are truly the same
+// hazard, plus a rationale, for the human review queue. It NEVER mutates anything.
+//
+// The judge is pluggable behind CandidateJudge so the runtime/tests stay
+// deterministic (HeuristicJudge) while the dev-time CLI can plug in the
+// non-deterministic LLM (LLMJudge over the shared llm.ProviderRegistry).
+
+const (
+	VerdictDuplicate = "duplicate"
+	VerdictDistinct  = "distinct"
+	VerdictUncertain = "uncertain"
+)
+
+// JudgedProposal is one candidate with its GT-wall result and the judge's opinion.
+type JudgedProposal struct {
+	Candidate  DedupCandidate `json:"candidate"`
+	Screen     ScreenResult   `json:"screen"`
+	Verdict    string         `json:"verdict"`
+	Confidence string         `json:"confidence"`
+	Rationale  string         `json:"rationale"`
+	Judge      string         `json:"judge"`
+}
+
+// CandidateJudge decides whether two near-duplicate patterns are the same hazard.
+type CandidateJudge interface {
+	Name() string
+	Judge(ctx context.Context, c DedupCandidate, a, b PatternMatch) (verdict, confidence, rationale string)
+}
+
+// HeuristicJudge is the deterministic default/fallback. It only ever returns "low"
+// confidence — it is a placeholder for the LLM, and it deliberately punts to
+// "uncertain" on the hard cases (low text overlap, shared measures) so the queue
+// makes clear exactly where the LLM earns its keep.
+type HeuristicJudge struct{}
+
+func (HeuristicJudge) Name() string { return "heuristic" }
+
+func (HeuristicJudge) Judge(_ context.Context, c DedupCandidate, _, _ PatternMatch) (string, string, string) {
+	switch {
+	case c.ScenarioJaccard >= 0.5 || (c.ZoneJaccard >= 0.5 && c.MeasureJaccard >= 0.5):
+		return VerdictDuplicate, "low", "structural: high scenario, or combined zone+measure, overlap"
+	case c.MeasureJaccard >= 0.99 && c.ZoneJaccard == 0 && c.ScenarioJaccard < 0.3:
+		return VerdictDistinct, "low", "structural: identical measures but no zone/scenario overlap — likely distinct hazards sharing generic measures"
+	default:
+		return VerdictUncertain, "low", "structural signal inconclusive — needs the LLM judge"
+	}
+}
+
+// LLMJudge asks an offline model to make the semantic call. Non-deterministic, so
+// it lives only in the dev-time tool, never in tests or the runtime. It degrades
+// to "uncertain" on any transport or parse error — it must never break the run.
+type LLMJudge struct {
+	Completer    LLMCompleter
+	MachineClass string
+}
+
+func (LLMJudge) Name() string { return "llm" }
+
+func (j LLMJudge) Judge(ctx context.Context, c DedupCandidate, a, b PatternMatch) (string, string, string) {
+	system, user := BuildJudgePrompt(j.MachineClass, a, b)
+	raw, err := j.Completer.Complete(ctx, system, user)
+	if err != nil {
+		return VerdictUncertain, "low", "LLM error: " + err.Error()
+	}
+	return parseJudgeJSON(raw)
+}
+
+// BuildJudgePrompt is the real LLM artifact — built and unit-tested deterministically
+// even though the call itself is not. It frames the ISO 12100 same-vs-distinct
+// question and forces a JSON answer.
+func BuildJudgePrompt(machineClass string, a, b PatternMatch) (system, user string) {
+	system = "Du bist Sachverstaendiger fuer Maschinensicherheit nach EN ISO 12100. " +
+		"Entscheide, ob zwei generierte Gefaehrdungen fuer DIESE Maschine DIESELBE Gefaehrdung " +
+		"beschreiben (Dublette) oder fachlich VERSCHIEDENE Gefaehrdungen sind, die nur zufaellig " +
+		"dieselben Schutzmassnahmen teilen. Verschieden, wenn Wirkort, Ausloeser oder " +
+		"Schadensmechanismus abweichen — auch bei gleicher Kategorie und gleichen Massnahmen. " +
+		"Antworte AUSSCHLIESSLICH als JSON: " +
+		`{"verdict":"duplicate|distinct|uncertain","confidence":"high|medium|low","rationale":"..."}.`
+	user = fmt.Sprintf(`Maschinenklasse: %s
+
+Gefaehrdung A (%s):
+  Name: %s
+  Kategorie: %s
+  Zone: %s
+  Szenario: %s
+  Ausloeser: %s
+  Schaden: %s
+  Massnahmen: %s
+
+Gefaehrdung B (%s):
+  Name: %s
+  Kategorie: %s
+  Zone: %s
+  Szenario: %s
+  Ausloeser: %s
+  Schaden: %s
+  Massnahmen: %s
+
+Sind A und B dieselbe Gefaehrdung fuer diese Maschine?`,
+		machineClass,
+		a.PatternID, a.PatternName, primaryCat(a), a.ZoneDE, a.ScenarioDE, a.TriggerDE, a.HarmDE, strings.Join(a.SuggestedMeasureIDs, ", "),
+		b.PatternID, b.PatternName, primaryCat(b), b.ZoneDE, b.ScenarioDE, b.TriggerDE, b.HarmDE, strings.Join(b.SuggestedMeasureIDs, ", "))
+	return system, user
+}
+
+func parseJudgeJSON(raw string) (verdict, confidence, rationale string) {
+	start, end := strings.Index(raw, "{"), strings.LastIndex(raw, "}")
+	if start < 0 || end <= start {
+		return VerdictUncertain, "low", "unparseable LLM output"
+	}
+	var v struct {
+		Verdict    string `json:"verdict"`
+		Confidence string `json:"confidence"`
+		Rationale  string `json:"rationale"`
+	}
+	if err := json.Unmarshal([]byte(raw[start:end+1]), &v); err != nil {
+		return VerdictUncertain, "low", "unparseable LLM JSON: " + err.Error()
+	}
+	switch v.Verdict {
+	case VerdictDuplicate, VerdictDistinct, VerdictUncertain:
+	default:
+		v.Verdict = VerdictUncertain
+	}
+	if v.Confidence == "" {
+		v.Confidence = "low"
+	}
+	return v.Verdict, v.Confidence, v.Rationale
+}
+
+// LLMCompleter is the minimal text-in/text-out the LLM judge needs. Tests pass a
+// stub; the dev-time tool passes a registry-backed adapter (NewRegistryCompleter).
+type LLMCompleter interface {
+	Complete(ctx context.Context, system, user string) (string, error)
+}
+
+type registryCompleter struct {
+	reg   *llm.ProviderRegistry
+	model string
+}
+
+// NewRegistryCompleter adapts the shared llm.ProviderRegistry to LLMCompleter so
+// the proposer can reuse the platform's offline model wiring (e.g. self-hosted qwen).
+func NewRegistryCompleter(reg *llm.ProviderRegistry, model string) LLMCompleter {
+	return &registryCompleter{reg: reg, model: model}
+}
+
+func (rc *registryCompleter) Complete(ctx context.Context, system, user string) (string, error) {
+	resp, err := rc.reg.Chat(ctx, &llm.ChatRequest{
+		Model: rc.model,
+		Messages: []llm.Message{
+			{Role: "system", Content: system},
+			{Role: "user", Content: user},
+		},
+		Temperature: 0,
+	})
+	if err != nil {
+		return "", err
+	}
+	return resp.Message.Content, nil
+}
@@ -0,0 +1,104 @@
+package iace
+
+import (
+	"context"
+	"errors"
+	"strings"
+	"testing"
+)
+
+func TestHeuristicJudge_Verdicts(t *testing.T) {
+	tests := []struct {
+		name        string
+		zone, meas  float64
+		scenario    float64
+		wantVerdict string
+	}{
+		{"high scenario overlap -> duplicate", 0, 0.3, 0.6, VerdictDuplicate},
+		{"high zone+measure -> duplicate", 0.6, 0.6, 0.1, VerdictDuplicate},
+		{"identical measures, no text -> distinct", 0, 1.0, 0.0, VerdictDistinct},
+		{"shared measures, low text -> uncertain", 0, 0.67, 0.19, VerdictUncertain},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			c := DedupCandidate{ZoneJaccard: tt.zone, MeasureJaccard: tt.meas, ScenarioJaccard: tt.scenario}
+			v, conf, _ := HeuristicJudge{}.Judge(context.Background(), c, PatternMatch{}, PatternMatch{})
+			if v != tt.wantVerdict {
+				t.Errorf("verdict: want %s, got %s", tt.wantVerdict, v)
+			}
+			if conf != "low" {
+				t.Errorf("heuristic confidence must be low, got %s", conf)
+			}
+		})
+	}
+}
+
+func TestBuildJudgePrompt_ContainsKeyFacts(t *testing.T) {
+	a := PatternMatch{PatternID: "HPa", PatternName: "Heisse Flaeche", HazardCats: []string{"thermal_hazard"},
+		ZoneDE: "Boiler", ScenarioDE: "Beruehrung heisser Boiler", SuggestedMeasureIDs: []string{"M071"}}
+	b := PatternMatch{PatternID: "HPb", PatternName: "Heisses Spuelgut", HazardCats: []string{"thermal_hazard"},
+		ZoneDE: "Spuelgut", ScenarioDE: "Beruehrung heisses Geschirr", SuggestedMeasureIDs: []string{"M071"}}
+	system, user := BuildJudgePrompt("Geschirrspuelmaschine", a, b)
+
+	for _, want := range []string{"EN ISO 12100", "JSON", "verdict"} {
+		if !strings.Contains(system, want) {
+			t.Errorf("system prompt missing %q", want)
+		}
+	}
+	for _, want := range []string{"Geschirrspuelmaschine", "HPa", "HPb", "Boiler", "Spuelgut", "thermal_hazard"} {
+		if !strings.Contains(user, want) {
+			t.Errorf("user prompt missing %q", want)
+		}
+	}
+}
+
+type fakeCompleter struct {
+	out string
+	err error
+}
+
+func (f fakeCompleter) Complete(_ context.Context, _, _ string) (string, error) { return f.out, f.err }
+
+func TestLLMJudge_ParsesAndDegrades(t *testing.T) {
+	cand := DedupCandidate{KeepPattern: "HPa", DropPattern: "HPb"}
+
+	// Well-formed JSON, even wrapped in chatter, parses.
+	j := LLMJudge{Completer: fakeCompleter{out: "Sicher. {\"verdict\":\"distinct\",\"confidence\":\"high\",\"rationale\":\"andere Wirkorte\"}"}, MachineClass: "x"}
+	if v, conf, r := j.Judge(context.Background(), cand, PatternMatch{}, PatternMatch{}); v != VerdictDistinct || conf != "high" || r != "andere Wirkorte" {
+		t.Errorf("parse: got %s/%s/%q", v, conf, r)
+	}
+
+	// Unknown verdict value normalises to uncertain.
+	j2 := LLMJudge{Completer: fakeCompleter{out: `{"verdict":"maybe","confidence":"medium","rationale":"x"}`}}
+	if v, _, _ := j2.Judge(context.Background(), cand, PatternMatch{}, PatternMatch{}); v != VerdictUncertain {
+		t.Errorf("unknown verdict must normalise to uncertain, got %s", v)
+	}
+
+	// Transport error degrades gracefully, never panics.
+	j3 := LLMJudge{Completer: fakeCompleter{err: errors.New("connection refused")}}
+	if v, _, r := j3.Judge(context.Background(), cand, PatternMatch{}, PatternMatch{}); v != VerdictUncertain || !strings.Contains(r, "LLM error") {
+		t.Errorf("error path: got %s / %q", v, r)
+	}
+
+	// Garbage (no JSON) degrades to uncertain.
+	j4 := LLMJudge{Completer: fakeCompleter{out: "no json here"}}
+	if v, _, _ := j4.Judge(context.Background(), cand, PatternMatch{}, PatternMatch{}); v != VerdictUncertain {
+		t.Errorf("garbage must degrade to uncertain, got %s", v)
+	}
+}
+
+func TestRenderProposalQueue_ShowsActions(t *testing.T) {
+	proposals := []JudgedProposal{
+		{
+			Candidate: DedupCandidate{KeepPattern: "HP807", DropPattern: "HP033", Category: "update_failure", Score: 0.32},
+			Screen:    ScreenResult{RecallBefore: 1, RecallAfter: 1},
+			Verdict:   VerdictDuplicate, Confidence: "medium", Rationale: "same update failure", Judge: "llm",
+		},
+	}
+	out := RenderProposalQueue("Geschirrspuelmaschine", proposals)
+	for _, want := range []string{"HP807", "HP033", "update_failure", "supersession", "Propose-only"} {
+		if !strings.Contains(out, want) {
+			t.Errorf("queue missing %q\n%s", want, out)
+		}
+	}
+}
@@ -0,0 +1,47 @@
+package iace
+
+import (
+	"fmt"
+	"strings"
+)
+
+// RenderProposalQueue turns judged dedup proposals into the human-review queue
+// (markdown). Deterministic. Nothing here applies a change — every entry is a
+// suggestion for a human to confirm, edit, commit, and pin with a GT case.
+func RenderProposalQueue(machine string, proposals []JudgedProposal) string {
+	var b strings.Builder
+	fmt.Fprintf(&b, "# Dedup proposal queue — %s\n\n", machine)
+	fmt.Fprintf(&b, "%d candidate(s) survived the deterministic GT wall. Propose-only — nothing is applied automatically.\n\n", len(proposals))
+
+	for i, p := range proposals {
+		c := p.Candidate
+		fmt.Fprintf(&b, "## %d. keep %s  ⊃  drop %s   [%s → %s (%s)]\n",
+			i+1, c.KeepPattern, c.DropPattern, p.Judge, p.Verdict, p.Confidence)
+		fmt.Fprintf(&b, "- category %s · score %.2f (measures %.0f%%, zone %.0f%%, scenario %.0f%%)\n",
+			c.Category, c.Score, c.MeasureJaccard*100, c.ZoneJaccard*100, c.ScenarioJaccard*100)
+		fmt.Fprintf(&b, "- GT recall %.1f%% → %.1f%% when %s is dropped (wall: %s)\n",
+			p.Screen.RecallBefore*100, p.Screen.RecallAfter*100, c.DropPattern, wallNote(p.Screen))
+		fmt.Fprintf(&b, "- keep: %s\n- drop: %s\n", c.KeepHazardName, c.DropName)
+		fmt.Fprintf(&b, "- judge rationale: %s\n", p.Rationale)
+		fmt.Fprintf(&b, "- suggested action: %s\n\n", suggestedAction(p))
+	}
+	return b.String()
+}
+
+func wallNote(s ScreenResult) string {
+	if s.DistinctGT {
+		return fmt.Sprintf("distinct GT %s vs %s", s.KeepGT, s.DropGT)
+	}
+	return "recall-safe"
+}
+
+func suggestedAction(p JudgedProposal) string {
+	switch p.Verdict {
+	case VerdictDuplicate:
+		return fmt.Sprintf("add %s to a supersession set, then a human confirms + commits + pins a GT case", p.Candidate.DropPattern)
+	case VerdictDistinct:
+		return "keep both — judge considers them distinct hazards"
+	default:
+		return "needs human (or higher-confidence LLM) review — no automatic action"
+	}
+}
@@ -0,0 +1,61 @@
+package iace
+
+import "github.com/google/uuid"
+
+// ScreenResult is the deterministic GT verdict for one proposed supersession.
+type ScreenResult struct {
+	RecallBefore float64 `json:"recall_before"`
+	RecallAfter  float64 `json:"recall_after"`
+	KeepGT       string  `json:"keep_gt,omitempty"` // GT entry the keeper credits (if any)
+	DropGT       string  `json:"drop_gt,omitempty"` // GT entry the drop credits (if any)
+	DistinctGT   bool    `json:"distinct_gt"`       // keep & drop credit DIFFERENT GT entries -> distinct hazards
+	Safe         bool    `json:"safe"`              // recall preserved AND not distinct
+}
+
+// ScreenSupersession is the WALL between "propose" and "decide". A proposal is
+// safe only if BOTH deterministic checks pass:
+//
+//  1. RECALL is not reduced when the drop-hazard (and its mitigations) is removed
+//     — otherwise the drop is load-bearing for GT coverage.
+//  2. The two hazards do NOT credit DIFFERENT ground-truth entries. Recall alone
+//     is necessary but not sufficient: two genuinely distinct hazards that share
+//     the same measures (e.g. hot boiler surface vs hot ware on unloading) keep
+//     recall at 100% when one is dropped, yet must NOT be merged. If keep and
+//     drop each match a different GT entry, they are distinct.
+//
+// Whatever survives both is still only RECALL-SAFE — a candidate for a human (and
+// in slice 2, an LLM) to confirm semantically. Deterministic; reuses
+// CompareBenchmark; touches neither the library nor the runtime.
+func ScreenSupersession(gt *GroundTruth, hazards []Hazard, mits []Mitigation, keepHazardName, dropHazardName string) ScreenResult {
+	before := CompareBenchmark(gt, hazards, mits)
+
+	gtOf := map[string]string{}
+	for _, p := range before.MatchedPairs {
+		gtOf[p.EngineHazard.Name] = p.GTEntry.Nr
+	}
+	keepGT, dropGT := gtOf[keepHazardName], gtOf[dropHazardName]
+	distinct := keepGT != "" && dropGT != "" && keepGT != dropGT
+
+	kept := make([]Hazard, 0, len(hazards))
+	dropped := map[uuid.UUID]bool{}
+	for _, h := range hazards {
+		if h.Name == dropHazardName {
+			dropped[h.ID] = true
+			continue
+		}
+		kept = append(kept, h)
+	}
+	keptMits := make([]Mitigation, 0, len(mits))
+	for _, m := range mits {
+		if !dropped[m.HazardID] {
+			keptMits = append(keptMits, m)
+		}
+	}
+	after := CompareBenchmark(gt, kept, keptMits)
+
+	return ScreenResult{
+		RecallBefore: before.CoverageScore, RecallAfter: after.CoverageScore,
+		KeepGT: keepGT, DropGT: dropGT, DistinctGT: distinct,
+		Safe: after.CoverageScore >= before.CoverageScore && !distinct,
+	}
+}
@@ -160,6 +160,7 @@ func (s *Store) ListHazards(ctx context.Context, projectID uuid.UUID) ([]Hazard,
 		hazards = append(hazards, h)
 	}

+	SortHazardsByISO12100(hazards)
 	return hazards, nil
 }