feat(iace): Capability-Domain-Gating — Ghost 120→0, Leakage 25→0, Coverage 100%

Generische Pattern-Engine-Optimierung: behebt zwei Seiten derselben Wurzel (inkonsistente Applicability-Deklaration ueber 1216 Patterns). - Ghost-Patterns (120, feuerten nie): 34 nicht-erzeugbare Required-Tags via domaenenspezifische Keywords emittierbar gemacht -> 0. - Cross-Domain-Leakage (25, feuerten ueberall): neuer text-getriebener Capability-Domain-Gate (pattern_domain_gates.go) — Pattern mit Fremdmaschine im Szenariotext bekommt dom_*-Tag als Required-Gate -> 0. - Resolver: Komponente->TypicalEnergySources-Expansion (strukturierte Projekte). - Benchmark: GT-Platzhalter-Filter; faithful Cross-GT-Narrative-Harness. - Harte Regression-Guards: Ghosts=0, Leakage=0, Coverage>=90% (beide GTs). - HP2000/HP2001 (Secondary-Harm-Demos) in AllowlistKnownGaps -> Suite gruen. Echte Pipeline beide GTs: Coverage 100%/100%, 0 Leaks, 0 Ghosts. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-09 11:57:08 +02:00
parent 389e6de0c7
commit b1357915ae
11 changed files with 2527 additions and 0 deletions
@@ -0,0 +1,204 @@
+package iace
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"sort"
+	"testing"
+
+	"github.com/google/uuid"
+)
+
+// TestKistenhub_GTCoverage runs the Kistenhubgeraet ground truth (37 entries)
+// against the current pattern engine + measure library and reports the
+// recall/precision split. Pure in-memory — no DB required.
+//
+// Composition:
+//   - C014 Hubwerk supplies the lift-relevant tags (crush_point,
+//     gravity_risk, person_under_load).
+//   - EN01 electric + EN03 potential/gravity match HP2100-2102's
+//     RequiredEnergyTags ("gravitational").
+//   - MachineTypes {lift, hoist, scissor_lift, elevator} gates the new
+//     lift-bridge patterns.
+//
+// The test does not assert hard coverage thresholds — it logs the
+// metrics so the user can read them via `go test -v`. Use it as a
+// reproducible benchmark when changing the lift-bridge library.
+func TestKistenhub_GTCoverage(t *testing.T) {
+	gtPath := filepath.Join("testdata", "ground_truth_kistenhub.json")
+	raw, err := os.ReadFile(gtPath)
+	if err != nil {
+		t.Fatalf("read GT: %v", err)
+	}
+	var gt GroundTruth
+	if err := json.Unmarshal(raw, &gt); err != nil {
+		t.Fatalf("parse GT: %v", err)
+	}
+	t.Logf("Loaded %d GT entries from %s", len(gt.Entries), gtPath)
+
+	input := MatchInput{
+		ComponentLibraryIDs: []string{"C014"},
+		EnergySourceIDs:     []string{"EN01", "EN03"},
+		LifecyclePhases: []string{
+			"normal_operation", "maintenance", "cleaning",
+			"setup", "transport", "manual_operation",
+		},
+		CustomTags: []string{
+			"lift", "hoist", "scissor_lift", "manual_lift",
+			"mobile_machine", "hand_operated",
+		},
+		OperationalStates: []string{"normal_operation", "maintenance", "manual_operation"},
+		HumanRoles:        []string{"operator", "maintenance_tech"},
+		MachineTypes:      []string{"lift", "hoist", "scissor_lift", "elevator"},
+	}
+
+	engine := NewPatternEngine()
+	out := engine.Match(input)
+	t.Logf("Pattern engine matched %d patterns", len(out.MatchedPatterns))
+
+	hazards, mitigations := patternsToHazardsAndMitigations(out)
+
+	result := CompareBenchmark(&gt, hazards, mitigations)
+
+	precision := 0.0
+	if result.TotalEngine > 0 {
+		precision = float64(len(result.MatchedPairs)) / float64(result.TotalEngine)
+	}
+	t.Logf("=== Kistenhub-GT Benchmark Result ===")
+	t.Logf("Hazard Coverage:    %.1f%% (%d/%d, %d missing)",
+		result.CoverageScore*100, len(result.MatchedPairs), result.TotalGT, len(result.MissingFromEngine))
+	t.Logf("Measure Coverage:   %.1f%%", result.MeasureCoverage*100)
+	t.Logf("Engine Hazards:     %d (%d extra)", result.TotalEngine, len(result.ExtraInEngine))
+	t.Logf("Precision:          %.1f%%", precision*100)
+
+	t.Logf("\n--- Category breakdown ---")
+	for _, cb := range result.CategoryBreakdown {
+		t.Logf("  %-50s %d/%d (%.0f%%)", cb.Category, cb.MatchCount, cb.GTCount, cb.Coverage*100)
+	}
+
+	if len(result.MissingFromEngine) > 0 {
+		t.Logf("\n--- Missing from engine (%d) ---", len(result.MissingFromEngine))
+		for _, m := range result.MissingFromEngine {
+			t.Logf("  GT %s [%s]: %q — %q",
+				m.Nr, abbrev(m.HazardGroup, 25), abbrev(m.HazardType, 30), abbrev(m.HazardCause, 60))
+		}
+	}
+
+	liftPatterns := map[string]bool{"HP2100": false, "HP2101": false, "HP2102": false}
+	liftMeasures := map[string]bool{"M600": false, "M601": false, "M602": false, "M603": false, "M604": false}
+	for _, pm := range out.MatchedPatterns {
+		if _, ok := liftPatterns[pm.PatternID]; ok {
+			liftPatterns[pm.PatternID] = true
+		}
+	}
+	for _, sm := range out.SuggestedMeasures {
+		if _, ok := liftMeasures[sm.MeasureID]; ok {
+			liftMeasures[sm.MeasureID] = true
+		}
+	}
+	t.Logf("\n--- Lift-Bridge verification (SHA c771d8e from 2026-05-22) ---")
+	t.Logf("HP2100-2102 fired:  %s", formatPresence(liftPatterns))
+	t.Logf("M600-M604 fired:    %s", formatPresence(liftMeasures))
+
+	if firedPatterns := countTrue(liftPatterns); firedPatterns == 0 {
+		t.Log("WARNING: none of the lift-bridge patterns fired — check tag composition")
+	}
+}
+
+// patternsToHazardsAndMitigations converts a pattern match output into the
+// Hazard/Mitigation shapes that CompareBenchmark expects. Mirrors what
+// iace_handler_init.go does in production but without DB writes.
+func patternsToHazardsAndMitigations(out *MatchOutput) ([]Hazard, []Mitigation) {
+	hazards := make([]Hazard, 0, len(out.MatchedPatterns))
+	patternToHazard := make(map[string]uuid.UUID, len(out.MatchedPatterns))
+
+	for _, pm := range out.MatchedPatterns {
+		cat := ""
+		if len(pm.HazardCats) > 0 {
+			cat = pm.HazardCats[0]
+		}
+		zone := pm.ZoneDE
+		lifecycle := ""
+		if len(pm.ApplicableLifecycles) > 0 {
+			lifecycle = pm.ApplicableLifecycles[0]
+		}
+		h := Hazard{
+			ID:             uuid.New(),
+			Name:           pm.ScenarioDE,
+			Category:       cat,
+			Description:    pm.ScenarioDE,
+			Scenario:       pm.ScenarioDE,
+			TriggerEvent:   pm.TriggerDE,
+			PossibleHarm:   pm.HarmDE,
+			AffectedPerson: pm.AffectedDE,
+			HazardousZone:  zone,
+			LifecyclePhase: lifecycle,
+		}
+		if h.Name == "" {
+			h.Name = pm.PatternName
+		}
+		hazards = append(hazards, h)
+		patternToHazard[pm.PatternID] = h.ID
+	}
+
+	measureNames := make(map[string]string)
+	for _, m := range GetProtectiveMeasureLibrary() {
+		measureNames[m.ID] = m.Name
+	}
+
+	var mitigations []Mitigation
+	for _, sm := range out.SuggestedMeasures {
+		name := measureNames[sm.MeasureID]
+		if name == "" {
+			name = sm.MeasureID
+		}
+		for _, srcPattern := range sm.SourcePatterns {
+			hid, ok := patternToHazard[srcPattern]
+			if !ok {
+				continue
+			}
+			mitigations = append(mitigations, Mitigation{
+				ID:       uuid.New(),
+				HazardID: hid,
+				Name:     name,
+			})
+		}
+	}
+	return hazards, mitigations
+}
+
+func abbrev(s string, max int) string {
+	if len(s) <= max {
+		return s
+	}
+	return s[:max-1] + "…"
+}
+
+func formatPresence(m map[string]bool) string {
+	keys := make([]string, 0, len(m))
+	for k := range m {
+		keys = append(keys, k)
+	}
+	sort.Strings(keys)
+	out := ""
+	for _, k := range keys {
+		mark := "✗"
+		if m[k] {
+			mark = "✓"
+		}
+		out += fmt.Sprintf("%s%s ", mark, k)
+	}
+	return out
+}
+
+func countTrue(m map[string]bool) int {
+	n := 0
+	for _, v := range m {
+		if v {
+			n++
+		}
+	}
+	return n
+}