fe5dc59152
CI / detect-changes (push) Successful in 7s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Successful in 6s
CI / validate-canonical-controls (push) Successful in 4s
CI / loc-budget (push) Successful in 17s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Successful in 58s
CI / iace-gt-coverage (push) Successful in 15s
CI / test-python-backend (push) Has been skipped
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
Phase 1 of the commercial white-goods expansion (EN ISO 10472 family). Extend GT #3 with 8 completeness hazards a Fachmann expects but that were neither in the GT nor previously questioned: dry-run boiler overheating, residual/stored electrical energy, sharp-edge cut, tipping, interlock-failure, unexpected restart, backflow (EN 1717), microbial/legionella. Enrich the UC-M narrative with the real features so existing library patterns can fire. Result: 4/8 auto-covered by existing patterns (dry-run, residual voltage, tipping, interlock-failure) — recall 84% (21/25). Remaining gaps documented: spray-arm contact (4.3), sharp-edge cut (4.6), backflow (2.3), restart (6.4). Gate the re-surfaced CNC leak ("spanende Bearbeitung", high_temperature-only) via dom_cnc. Kistenhub 97.1% and Bremse pinned mappings unchanged. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
183 lines
6.9 KiB
Go
183 lines
6.9 KiB
Go
package iace
|
|
|
|
import (
|
|
"encoding/json"
|
|
"os"
|
|
"path/filepath"
|
|
"sort"
|
|
"testing"
|
|
)
|
|
|
|
// GT #3 — commercial UNDERCOUNTER dishwasher (Winterhalter UC-M). Self-assessed
|
|
// ground truth: we can judge what a dishwasher is. The test runs the narrative
|
|
// through the SAME chain as production (ParseNarrative -> engine -> relevance
|
|
// filter + cyber-skip), so keyword/gating fixes are measured on the hazard set
|
|
// the user actually sees — not the raw pattern flood.
|
|
|
|
// Condensed UC-M limits_form narrative. Deliberately includes "Cool-Ausfuehrung"
|
|
// and "Filter" so the known false components (Kuehlaggregat, Absauganlage) are
|
|
// reproduced and visible in the baseline.
|
|
const warewashingNarrative = `Gewerbliche Untertisch-Geschirrspuelmaschine fuer Gastronomie-Kueche, ` +
|
|
`vernetzt ueber LAN und WLAN (Connected Wash Internetportal). Heisswasser-Boiler mit ` +
|
|
`Nachspueltemperatur ca. 85 Grad C, Tank mit Hygiene-Tankheizkoerper. Spuelpumpe 150-200 l/min ` +
|
|
`mit rotierenden Spuelfeldern und Spuelarmen, Ablaufpumpe. Eingebautes Dosiergeraet fuer Reiniger ` +
|
|
`und Klarspueler (aetzende Konzentrate). 4-fach-Laugenfiltration mit Filter. Doppelwandige Tuer ` +
|
|
`mit Sicherheitsschalter und Rastposition (Thermostopp). Elektromotor (Drehstrom) 400 V. ` +
|
|
`Touch-Steuerung (SPS) mit Bedienfeld und HMI, USB-Schnittstelle fuer Softwareupdates, ` +
|
|
`PIN-geschuetzter Servicetechniker-Fernzugriff. Cool-Ausfuehrung mit kalter Nachspuelung. ` +
|
|
`Untertischmontage. Eingreifen in die Spuelkammer moeglich. Aerosole und Daempfe der ` +
|
|
`Reinigungschemie gelangen in die Atemzone. Manuelles Be- und Entladen der Spuelkoerbe von Hand. ` +
|
|
`Reinigung und Wartung durch Servicetechniker. Branche Lebensmittel und Getraenke. ` +
|
|
`Siebe und scharfe Blechkanten in der Spuelkammer. Boiler kann bei Wassermangel trockenlaufen. ` +
|
|
`Frequenzumrichter und Elektronik mit Restspannung nach dem Abschalten. Wartung nur im ` +
|
|
`freigeschalteten Zustand; Gefahr des unerwarteten Wiederanlaufs. Frischwasseranschluss mit ` +
|
|
`Rueckflussverhinderer gegen Ruecksaugen in das Trinkwassernetz. Stehwasser im Boiler ` +
|
|
`(Hygiene/Legionellen). Standsicherheit bei Untertischmontage.`
|
|
|
|
// warewashingCyberCategories mirrors handlers.nativeCyberSecurityCategories —
|
|
// native cyber/AI hazards are routed to the CRA module, not the CE hazard log.
|
|
var warewashingCyberCategories = map[string]bool{
|
|
"unauthorized_access": true, "firmware_corruption": true, "cyber_resilience": true,
|
|
"logging_audit_failure": true, "cyber_network": true, "sensor_spoofing": true,
|
|
"ai_specific": true, "ai_misclassification": true, "false_classification": true,
|
|
"model_drift": true, "data_poisoning": true, "unintended_bias": true,
|
|
}
|
|
|
|
// warewashingEngineOutput runs the production chain and returns the filtered
|
|
// hazards/mitigations the user would see for the UC-M.
|
|
func warewashingEngineOutput() ([]Hazard, []Mitigation, int) {
|
|
res := ParseNarrative(warewashingNarrative, "Gewerbliche Untertisch-Geschirrspuelmaschine (vernetzt)")
|
|
|
|
var compIDs, compNames []string
|
|
for _, c := range res.Components {
|
|
if c.Negated {
|
|
continue
|
|
}
|
|
compIDs = append(compIDs, c.LibraryID)
|
|
compNames = append(compNames, c.NameDE)
|
|
}
|
|
var energyIDs []string
|
|
for _, e := range res.EnergySources {
|
|
energyIDs = append(energyIDs, e.SourceID)
|
|
}
|
|
lifecycles := append([]string{}, res.LifecyclePhases...)
|
|
lifecycles = append(lifecycles, "normal_operation", "maintenance", "cleaning", "setup", "fault_clearing")
|
|
|
|
input := MatchInput{
|
|
ComponentLibraryIDs: compIDs,
|
|
EnergySourceIDs: energyIDs,
|
|
LifecyclePhases: lifecycles,
|
|
CustomTags: res.CustomTags,
|
|
OperationalStates: append(res.OperationalStates, "normal_operation", "cleaning", "maintenance"),
|
|
HumanRoles: res.Roles,
|
|
MachineTypes: []string{"food_processing", "Gewerbliche Untertisch-Geschirrspuelmaschine (vernetzt)"},
|
|
}
|
|
|
|
out := NewPatternEngine().Match(input)
|
|
|
|
var kept []PatternMatch
|
|
for _, pm := range out.MatchedPatterns {
|
|
if !IsPatternRelevant(pm, warewashingNarrative, compNames) {
|
|
continue
|
|
}
|
|
allCyber := len(pm.HazardCats) > 0
|
|
for _, c := range pm.HazardCats {
|
|
if !warewashingCyberCategories[c] {
|
|
allCyber = false
|
|
}
|
|
}
|
|
if allCyber {
|
|
continue
|
|
}
|
|
kept = append(kept, pm)
|
|
}
|
|
filtered := *out
|
|
filtered.MatchedPatterns = kept
|
|
hazards, mitigations := patternsToHazardsAndMitigations(&filtered)
|
|
return hazards, mitigations, len(kept)
|
|
}
|
|
|
|
func TestWarewashing_GTCoverage(t *testing.T) {
|
|
gtPath := filepath.Join("testdata", "ground_truth_warewashing.json")
|
|
raw, err := os.ReadFile(gtPath)
|
|
if err != nil {
|
|
t.Fatalf("read GT: %v", err)
|
|
}
|
|
var gt GroundTruth
|
|
if err := json.Unmarshal(raw, >); err != nil {
|
|
t.Fatalf("parse GT: %v", err)
|
|
}
|
|
|
|
{
|
|
res := ParseNarrative(warewashingNarrative, "Gewerbliche Untertisch-Geschirrspuelmaschine (vernetzt)")
|
|
var cn []string
|
|
for _, c := range res.Components {
|
|
if !c.Negated {
|
|
cn = append(cn, c.NameDE)
|
|
}
|
|
}
|
|
t.Logf("Parsed components: %v", cn)
|
|
}
|
|
|
|
hazards, mitigations, nPatterns := warewashingEngineOutput()
|
|
t.Logf("Engine: %d patterns kept (relevance+cyber filter) -> %d hazards", nPatterns, len(hazards))
|
|
|
|
result := CompareBenchmark(>, hazards, mitigations)
|
|
precision := 0.0
|
|
if result.TotalEngine > 0 {
|
|
precision = float64(len(result.MatchedPairs)) / float64(result.TotalEngine)
|
|
}
|
|
t.Logf("=== Warewashing-GT (GT #3) Baseline ===")
|
|
t.Logf("Recall (Coverage): %.1f%% (%d/%d matched, %d missing)",
|
|
result.CoverageScore*100, len(result.MatchedPairs), result.TotalGT, len(result.MissingFromEngine))
|
|
t.Logf("Precision: %.1f%% (%d engine hazards, %d extra)",
|
|
precision*100, result.TotalEngine, len(result.ExtraInEngine))
|
|
|
|
if len(result.MissingFromEngine) > 0 {
|
|
t.Logf("--- MISSING (recall gaps) ---")
|
|
for _, m := range result.MissingFromEngine {
|
|
t.Logf(" MISS %s: %s", m.Nr, abbrev(m.HazardType, 60))
|
|
}
|
|
}
|
|
|
|
// Measure completeness: which generated hazards have NO protective measure?
|
|
t.Logf("--- Measure completeness ---")
|
|
t.Logf("Measure coverage (GT-matched): %.0f%%", result.MeasureCoverage*100)
|
|
withMeas := make(map[string]bool)
|
|
for _, m := range mitigations {
|
|
withMeas[m.HazardID.String()] = true
|
|
}
|
|
noMeasure := 0
|
|
for _, h := range hazards {
|
|
if !withMeas[h.ID.String()] {
|
|
noMeasure++
|
|
n := h.Name
|
|
if n == "" {
|
|
n = h.Scenario
|
|
}
|
|
t.Logf(" NO-MEASURE: [%s] %s", h.Category, abbrev(n, 60))
|
|
}
|
|
}
|
|
t.Logf("Hazards without any measure: %d/%d", noMeasure, len(hazards))
|
|
if len(result.ExtraInEngine) > 0 {
|
|
t.Logf("--- EXTRA (false positives / precision loss) ---")
|
|
names := make([]string, 0, len(result.ExtraInEngine))
|
|
for _, e := range result.ExtraInEngine {
|
|
n := e.Name
|
|
if n == "" {
|
|
n = e.Scenario
|
|
}
|
|
names = append(names, "["+e.Category+"] "+n)
|
|
}
|
|
sort.Strings(names)
|
|
for _, n := range names {
|
|
t.Logf(" EXTRA %s", abbrev(n, 85))
|
|
}
|
|
}
|
|
|
|
// Loose smoke floor for the baseline — fixes should push recall up, not down.
|
|
if result.CoverageScore < 0.4 {
|
|
t.Errorf("warewashing recall below 40%% floor: %.1f%%", result.CoverageScore*100)
|
|
}
|
|
}
|