package iace import ( "encoding/json" "os" "path/filepath" "sort" "testing" ) // GT #3 — commercial UNDERCOUNTER dishwasher (Winterhalter UC-M). Self-assessed // ground truth: we can judge what a dishwasher is. The test runs the narrative // through the SAME chain as production (ParseNarrative -> engine -> relevance // filter + cyber-skip), so keyword/gating fixes are measured on the hazard set // the user actually sees — not the raw pattern flood. // Condensed UC-M limits_form narrative. Deliberately includes "Cool-Ausfuehrung" // and "Filter" so the known false components (Kuehlaggregat, Absauganlage) are // reproduced and visible in the baseline. const warewashingNarrative = `Gewerbliche Untertisch-Geschirrspuelmaschine fuer Gastronomie-Kueche, ` + `vernetzt ueber LAN und WLAN (Connected Wash Internetportal). Heisswasser-Boiler mit ` + `Nachspueltemperatur ca. 85 Grad C, Tank mit Hygiene-Tankheizkoerper. Spuelpumpe 150-200 l/min ` + `mit rotierenden Spuelfeldern und Spuelarmen, Ablaufpumpe. Eingebautes Dosiergeraet fuer Reiniger ` + `und Klarspueler (aetzende Konzentrate). 4-fach-Laugenfiltration mit Filter. Doppelwandige Tuer ` + `mit Sicherheitsschalter und Rastposition (Thermostopp). Elektromotor (Drehstrom) 400 V. ` + `Touch-Steuerung (SPS) mit Bedienfeld und HMI, USB-Schnittstelle fuer Softwareupdates, ` + `PIN-geschuetzter Servicetechniker-Fernzugriff. Cool-Ausfuehrung mit kalter Nachspuelung. ` + `Untertischmontage. Eingreifen in die Spuelkammer moeglich. Aerosole und Daempfe der ` + `Reinigungschemie gelangen in die Atemzone. Manuelles Be- und Entladen der Spuelkoerbe von Hand. ` + `Reinigung und Wartung durch Servicetechniker. Branche Lebensmittel und Getraenke.` // warewashingCyberCategories mirrors handlers.nativeCyberSecurityCategories — // native cyber/AI hazards are routed to the CRA module, not the CE hazard log. var warewashingCyberCategories = map[string]bool{ "unauthorized_access": true, "firmware_corruption": true, "cyber_resilience": true, "logging_audit_failure": true, "cyber_network": true, "sensor_spoofing": true, "ai_specific": true, "ai_misclassification": true, "false_classification": true, "model_drift": true, "data_poisoning": true, "unintended_bias": true, } // warewashingEngineOutput runs the production chain and returns the filtered // hazards/mitigations the user would see for the UC-M. func warewashingEngineOutput() ([]Hazard, []Mitigation, int) { res := ParseNarrative(warewashingNarrative, "Gewerbliche Untertisch-Geschirrspuelmaschine (vernetzt)") var compIDs, compNames []string for _, c := range res.Components { if c.Negated { continue } compIDs = append(compIDs, c.LibraryID) compNames = append(compNames, c.NameDE) } var energyIDs []string for _, e := range res.EnergySources { energyIDs = append(energyIDs, e.SourceID) } lifecycles := append([]string{}, res.LifecyclePhases...) lifecycles = append(lifecycles, "normal_operation", "maintenance", "cleaning", "setup", "fault_clearing") input := MatchInput{ ComponentLibraryIDs: compIDs, EnergySourceIDs: energyIDs, LifecyclePhases: lifecycles, CustomTags: res.CustomTags, OperationalStates: append(res.OperationalStates, "normal_operation", "cleaning", "maintenance"), HumanRoles: res.Roles, MachineTypes: []string{"food_processing", "Gewerbliche Untertisch-Geschirrspuelmaschine (vernetzt)"}, } out := NewPatternEngine().Match(input) var kept []PatternMatch for _, pm := range out.MatchedPatterns { if !IsPatternRelevant(pm, warewashingNarrative, compNames) { continue } allCyber := len(pm.HazardCats) > 0 for _, c := range pm.HazardCats { if !warewashingCyberCategories[c] { allCyber = false } } if allCyber { continue } kept = append(kept, pm) } filtered := *out filtered.MatchedPatterns = kept hazards, mitigations := patternsToHazardsAndMitigations(&filtered) return hazards, mitigations, len(kept) } func TestWarewashing_GTCoverage(t *testing.T) { gtPath := filepath.Join("testdata", "ground_truth_warewashing.json") raw, err := os.ReadFile(gtPath) if err != nil { t.Fatalf("read GT: %v", err) } var gt GroundTruth if err := json.Unmarshal(raw, >); err != nil { t.Fatalf("parse GT: %v", err) } hazards, mitigations, nPatterns := warewashingEngineOutput() t.Logf("Engine: %d patterns kept (relevance+cyber filter) -> %d hazards", nPatterns, len(hazards)) result := CompareBenchmark(>, hazards, mitigations) precision := 0.0 if result.TotalEngine > 0 { precision = float64(len(result.MatchedPairs)) / float64(result.TotalEngine) } t.Logf("=== Warewashing-GT (GT #3) Baseline ===") t.Logf("Recall (Coverage): %.1f%% (%d/%d matched, %d missing)", result.CoverageScore*100, len(result.MatchedPairs), result.TotalGT, len(result.MissingFromEngine)) t.Logf("Precision: %.1f%% (%d engine hazards, %d extra)", precision*100, result.TotalEngine, len(result.ExtraInEngine)) if len(result.MissingFromEngine) > 0 { t.Logf("--- MISSING (recall gaps) ---") for _, m := range result.MissingFromEngine { t.Logf(" MISS %s: %s", m.Nr, abbrev(m.HazardType, 60)) } } if len(result.ExtraInEngine) > 0 { t.Logf("--- EXTRA (false positives / precision loss) ---") names := make([]string, 0, len(result.ExtraInEngine)) for _, e := range result.ExtraInEngine { n := e.Name if n == "" { n = e.Scenario } names = append(names, "["+e.Category+"] "+n) } sort.Strings(names) for _, n := range names { t.Logf(" EXTRA %s", abbrev(n, 85)) } } // Loose smoke floor for the baseline — fixes should push recall up, not down. if result.CoverageScore < 0.4 { t.Errorf("warewashing recall below 40%% floor: %.1f%%", result.CoverageScore*100) } }