feat(iace): benchmark risk comparison (traffic lights) + misuse pattern + 1:n matcher

#1 Risk-number comparison in the benchmark: ComputeRiskComparison derives the tool's S/F/W/P + Fine-Kinney per matched hazard and compares to the GT values; exposed on the benchmark response and rendered in a new RiskComparison table with GREEN/YELLOW/RED traffic lights on the risk number R (like the Excel), plus per-axis within-1 agreement cards. #2 Generic misuse pattern HP2103 "Personenbefoerderung auf Hebezeug" — gated to lift-family machine types, fires for ANY lifting device (not machine-specific). #3 Benchmark matcher is now 1:n — one broad engine hazard may cover several fine-grained GT sub-scenarios (foot/hand/leg crush), so coverage reflects real risk coverage rather than 1:1 wording matches. Validated on BOTH ground truths (robot cell + lift): leakage 0, ghosts 0, coverage held. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-09 17:24:52 +02:00
parent ef746ea8f0
commit 2677bca9ca
8 changed files with 284 additions and 1 deletions
@@ -105,6 +105,7 @@ func (h *IACEHandler) RunBenchmark(c *gin.Context) {
 	}

 	result := iace.CompareBenchmark(gt, hazards, mitigations)
+	result.RiskComparison, result.RiskAgreement = iace.ComputeRiskComparison(result.MatchedPairs)
 	c.JSON(http.StatusOK, result)
 }

@@ -74,8 +74,12 @@ func CompareBenchmark(gt *GroundTruth, hazards []Hazard, mitigations []Mitigatio
 	usedEng := make(map[int]bool)
 	var matched []HazardMatchPair

+	// 1:n matching: a single broad engine hazard may legitimately cover several
+	// fine-grained GT sub-scenarios (e.g. one "crush under descending load"
+	// pattern covers the GT's separate foot / hand / leg crush rows). We only
+	// block a GT entry from matching twice; an engine hazard may match several.
 	for _, p := range pairs {
-		if usedGT[p.gtIdx] || usedEng[p.engIdx] {
+		if usedGT[p.gtIdx] {
 			continue
 		}
 		usedGT[p.gtIdx] = true
@@ -80,6 +80,9 @@ type BenchmarkResult struct {
 	ExtraInEngine     []HazardSummary    `json:"extra_in_engine"`
 	CategoryBreakdown []CategoryScore    `json:"category_breakdown"`
 	RiskRankPairs     []RiskRankPair     `json:"risk_rank_pairs"`
+	// Risk-number comparison (tool vs professional) per matched hazard + aggregate.
+	RiskComparison    []RiskComparisonPair `json:"risk_comparison,omitempty"`
+	RiskAgreement     RiskAgreement        `json:"risk_agreement"`
 }

 // HazardMatchPair links a GT entry to an engine hazard.
@@ -40,6 +40,32 @@ func GetLiftEndstopPatterns() []HazardPattern {
 				"Verhindert ein Trittblech / Unterfahrschutz das Hineinfahren von Fuessen?",
 			},
 		},
+		{
+			ID:                    "HP2103",
+			NameDE:                "Bestimmungswidrige Personenbefoerderung auf Hebezeug",
+			NameEN:                "Misuse: transporting persons on a lifting device",
+			RequiredComponentTags: []string{"gravity_risk"},
+			RequiredEnergyTags:    []string{"gravitational"},
+			MachineTypes:          liftTypes,
+			GeneratedHazardCats:   []string{"mechanical_hazard"},
+			SuggestedMeasureIDs:   []string{"M601", "M141"},
+			Priority:              90,
+			ScenarioDE: "Die Hebevorrichtung wird bestimmungswidrig zum Heben oder Befoerdern von " +
+				"Personen verwendet (z.B. Mitfahren auf der Plattform). Absturz aus der Hoehe oder " +
+				"Quetschen bei unkontrollierter Bewegung.",
+			TriggerDE:           "Fehlendes Verbotsschild, keine konstruktive Verhinderung (z.B. zu kleine Standflaeche/Haltepunkte), unzureichende Unterweisung",
+			HarmDE:              "Absturz aus der Hoehe, schwere Verletzungen, Tod",
+			AffectedDE:          "Bediener, Dritte",
+			ZoneDE:              "Hubplattform / Lastaufnahme",
+			DefaultSeverity:     4,
+			DefaultExposure:     1,
+			DefaultAvoidability: 2,
+			ISO12100Section:     "6.4.5 Vernuenftigerweise vorhersehbare Fehlanwendung",
+			ClarificationQuestionsDE: []string{
+				"Ist ein Verbotsschild 'Personenbefoerderung verboten' (EN ISO 7010 P-Zeichen) angebracht?",
+				"Verhindert die Konstruktion das Mitfahren (z.B. zu kleine Standflaeche, keine Haltepunkte)?",
+			},
+		},
 		{
 			ID:                    "HP2101",
 			NameDE:                "Hand- oder Koerper-Quetschung gegen feste Struktur beim Hochfahren der Hubeinheit",
@@ -0,0 +1,129 @@
+package iace
+
+// Risk-number comparison for the benchmark: for every matched hazard, the
+// tool's risk parameters (EN-62061-style S/F/W/P + Fine-Kinney) next to the
+// professional's GT values, plus aggregate agreement. Used by the benchmark
+// endpoint so the Risikobewertung comparison is visible in the tab.
+
+// RiskComparisonPair is one matched hazard's tool-vs-professional risk numbers.
+type RiskComparisonPair struct {
+	HazardName     string  `json:"hazard_name"`
+	GTSeverity     int     `json:"gt_severity"`
+	GTFrequency    int     `json:"gt_frequency"`
+	GTProbability  int     `json:"gt_probability"` // GT column W
+	GTAvoidance    int     `json:"gt_avoidance"`   // GT column P
+	GTRisk         int     `json:"gt_risk"`        // GT column R
+	EngSeverity    int     `json:"eng_severity"`
+	EngFrequency   int     `json:"eng_frequency"`
+	EngProbability int     `json:"eng_probability"`
+	EngAvoidance   int     `json:"eng_avoidance"`
+	FKScore        float64 `json:"fk_score"`
+	FKBand         string  `json:"fk_band"`
+}
+
+// RiskAgreement aggregates how close the tool's risk numbers are to the GT.
+type RiskAgreement struct {
+	N                  int     `json:"n"`
+	SeverityWithin1    float64 `json:"severity_within1"`
+	FrequencyWithin1   float64 `json:"frequency_within1"`
+	ProbabilityWithin1 float64 `json:"probability_within1"`
+	AvoidanceWithin1   float64 `json:"avoidance_within1"`
+	RankConcordance    float64 `json:"rank_concordance"` // Fine-Kinney vs GT R
+}
+
+// ComputeRiskComparison derives the tool's risk numbers for each matched hazard
+// and compares them to the professional's GT values.
+func ComputeRiskComparison(matched []HazardMatchPair) ([]RiskComparisonPair, RiskAgreement) {
+	pairs := make([]RiskComparisonPair, 0, len(matched))
+	var sevOK, freqOK, probOK, avoidOK, n int
+	var engFK, gtR []float64
+
+	for _, m := range matched {
+		eh := m.EngineHazard
+		cats := []string{eh.Category}
+		scenario := eh.Scenario
+		if scenario == "" {
+			scenario = eh.Name
+		}
+		lifecycle := splitLifecyclePhases(eh.LifecyclePhase)
+
+		engS := EstimateSeverity(cats, scenario, 0)
+		engF := EstimateFrequency(lifecycle)
+		engW := EstimateProbabilityW(cats, scenario)
+		engP := EstimateAvoidabilityP(cats, scenario)
+		fk := SuggestFineKinney(cats, scenario, lifecycle, 0)
+		gt := m.GTEntry.RiskIn
+
+		pairs = append(pairs, RiskComparisonPair{
+			HazardName:     m.GTEntry.HazardType,
+			GTSeverity:     gt.S, GTFrequency: gt.F, GTProbability: gt.W, GTAvoidance: gt.P, GTRisk: gt.R,
+			EngSeverity:    engS, EngFrequency: engF, EngProbability: engW, EngAvoidance: engP,
+			FKScore:        fk.Score, FKBand: fk.Band,
+		})
+
+		if gt.S > 0 {
+			n++
+			if abs(engS-gt.S) <= 1 {
+				sevOK++
+			}
+			if gt.F > 0 && abs(engF-gt.F) <= 1 {
+				freqOK++
+			}
+			if gt.W > 0 && abs(engW-gt.W) <= 1 {
+				probOK++
+			}
+			if gt.P > 0 && abs(engP-gt.P) <= 1 {
+				avoidOK++
+			}
+			engFK = append(engFK, fk.Score)
+			gtR = append(gtR, float64(gt.R))
+		}
+	}
+
+	agg := RiskAgreement{N: n}
+	if n > 0 {
+		agg.SeverityWithin1 = pct(sevOK, n)
+		agg.FrequencyWithin1 = pct(freqOK, n)
+		agg.ProbabilityWithin1 = pct(probOK, n)
+		agg.AvoidanceWithin1 = pct(avoidOK, n)
+		agg.RankConcordance = rankConcordance(engFK, gtR)
+	}
+	return pairs, agg
+}
+
+func abs(x int) int {
+	if x < 0 {
+		return -x
+	}
+	return x
+}
+
+func pct(x, total int) float64 {
+	if total == 0 {
+		return 0
+	}
+	return 100 * float64(x) / float64(total)
+}
+
+// rankConcordance returns the fraction of comparable hazard pairs the tool
+// orders the same way the professional does (scale-invariant, 0.5 = random).
+func rankConcordance(a, b []float64) float64 {
+	concordant, discordant := 0, 0
+	for i := 0; i < len(a); i++ {
+		for j := i + 1; j < len(a); j++ {
+			da, db := a[i]-a[j], b[i]-b[j]
+			if da == 0 || db == 0 {
+				continue
+			}
+			if (da > 0) == (db > 0) {
+				concordant++
+			} else {
+				discordant++
+			}
+		}
+	}
+	if concordant+discordant == 0 {
+		return 0
+	}
+	return 100 * float64(concordant) / float64(concordant+discordant)
+}