feat(iace): benchmark risk comparison (traffic lights) + misuse pattern + 1:n matcher

#1 Risk-number comparison in the benchmark: ComputeRiskComparison derives the tool's S/F/W/P + Fine-Kinney per matched hazard and compares to the GT values; exposed on the benchmark response and rendered in a new RiskComparison table with GREEN/YELLOW/RED traffic lights on the risk number R (like the Excel), plus per-axis within-1 agreement cards. #2 Generic misuse pattern HP2103 "Personenbefoerderung auf Hebezeug" — gated to lift-family machine types, fires for ANY lifting device (not machine-specific). #3 Benchmark matcher is now 1:n — one broad engine hazard may cover several fine-grained GT sub-scenarios (foot/hand/leg crush), so coverage reflects real risk coverage rather than 1:1 wording matches. Validated on BOTH ground truths (robot cell + lift): leakage 0, ghosts 0, coverage held. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-09 17:24:52 +02:00
parent ef746ea8f0
commit 2677bca9ca
8 changed files with 284 additions and 1 deletions
@@ -0,0 +1,129 @@
+package iace
+
+// Risk-number comparison for the benchmark: for every matched hazard, the
+// tool's risk parameters (EN-62061-style S/F/W/P + Fine-Kinney) next to the
+// professional's GT values, plus aggregate agreement. Used by the benchmark
+// endpoint so the Risikobewertung comparison is visible in the tab.
+
+// RiskComparisonPair is one matched hazard's tool-vs-professional risk numbers.
+type RiskComparisonPair struct {
+	HazardName     string  `json:"hazard_name"`
+	GTSeverity     int     `json:"gt_severity"`
+	GTFrequency    int     `json:"gt_frequency"`
+	GTProbability  int     `json:"gt_probability"` // GT column W
+	GTAvoidance    int     `json:"gt_avoidance"`   // GT column P
+	GTRisk         int     `json:"gt_risk"`        // GT column R
+	EngSeverity    int     `json:"eng_severity"`
+	EngFrequency   int     `json:"eng_frequency"`
+	EngProbability int     `json:"eng_probability"`
+	EngAvoidance   int     `json:"eng_avoidance"`
+	FKScore        float64 `json:"fk_score"`
+	FKBand         string  `json:"fk_band"`
+}
+
+// RiskAgreement aggregates how close the tool's risk numbers are to the GT.
+type RiskAgreement struct {
+	N                  int     `json:"n"`
+	SeverityWithin1    float64 `json:"severity_within1"`
+	FrequencyWithin1   float64 `json:"frequency_within1"`
+	ProbabilityWithin1 float64 `json:"probability_within1"`
+	AvoidanceWithin1   float64 `json:"avoidance_within1"`
+	RankConcordance    float64 `json:"rank_concordance"` // Fine-Kinney vs GT R
+}
+
+// ComputeRiskComparison derives the tool's risk numbers for each matched hazard
+// and compares them to the professional's GT values.
+func ComputeRiskComparison(matched []HazardMatchPair) ([]RiskComparisonPair, RiskAgreement) {
+	pairs := make([]RiskComparisonPair, 0, len(matched))
+	var sevOK, freqOK, probOK, avoidOK, n int
+	var engFK, gtR []float64
+
+	for _, m := range matched {
+		eh := m.EngineHazard
+		cats := []string{eh.Category}
+		scenario := eh.Scenario
+		if scenario == "" {
+			scenario = eh.Name
+		}
+		lifecycle := splitLifecyclePhases(eh.LifecyclePhase)
+
+		engS := EstimateSeverity(cats, scenario, 0)
+		engF := EstimateFrequency(lifecycle)
+		engW := EstimateProbabilityW(cats, scenario)
+		engP := EstimateAvoidabilityP(cats, scenario)
+		fk := SuggestFineKinney(cats, scenario, lifecycle, 0)
+		gt := m.GTEntry.RiskIn
+
+		pairs = append(pairs, RiskComparisonPair{
+			HazardName:     m.GTEntry.HazardType,
+			GTSeverity:     gt.S, GTFrequency: gt.F, GTProbability: gt.W, GTAvoidance: gt.P, GTRisk: gt.R,
+			EngSeverity:    engS, EngFrequency: engF, EngProbability: engW, EngAvoidance: engP,
+			FKScore:        fk.Score, FKBand: fk.Band,
+		})
+
+		if gt.S > 0 {
+			n++
+			if abs(engS-gt.S) <= 1 {
+				sevOK++
+			}
+			if gt.F > 0 && abs(engF-gt.F) <= 1 {
+				freqOK++
+			}
+			if gt.W > 0 && abs(engW-gt.W) <= 1 {
+				probOK++
+			}
+			if gt.P > 0 && abs(engP-gt.P) <= 1 {
+				avoidOK++
+			}
+			engFK = append(engFK, fk.Score)
+			gtR = append(gtR, float64(gt.R))
+		}
+	}
+
+	agg := RiskAgreement{N: n}
+	if n > 0 {
+		agg.SeverityWithin1 = pct(sevOK, n)
+		agg.FrequencyWithin1 = pct(freqOK, n)
+		agg.ProbabilityWithin1 = pct(probOK, n)
+		agg.AvoidanceWithin1 = pct(avoidOK, n)
+		agg.RankConcordance = rankConcordance(engFK, gtR)
+	}
+	return pairs, agg
+}
+
+func abs(x int) int {
+	if x < 0 {
+		return -x
+	}
+	return x
+}
+
+func pct(x, total int) float64 {
+	if total == 0 {
+		return 0
+	}
+	return 100 * float64(x) / float64(total)
+}
+
+// rankConcordance returns the fraction of comparable hazard pairs the tool
+// orders the same way the professional does (scale-invariant, 0.5 = random).
+func rankConcordance(a, b []float64) float64 {
+	concordant, discordant := 0, 0
+	for i := 0; i < len(a); i++ {
+		for j := i + 1; j < len(a); j++ {
+			da, db := a[i]-a[j], b[i]-b[j]
+			if da == 0 || db == 0 {
+				continue
+			}
+			if (da > 0) == (db > 0) {
+				concordant++
+			} else {
+				discordant++
+			}
+		}
+	}
+	if concordant+discordant == 0 {
+		return 0
+	}
+	return 100 * float64(concordant) / float64(concordant+discordant)
+}