feat(iace): benchmark risk comparison (traffic lights) + misuse pattern + 1:n matcher
CI / detect-changes (push) Successful in 7s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Failing after 4s
CI / validate-canonical-controls (push) Successful in 11s
CI / loc-budget (push) Failing after 14s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m23s
CI / test-go (push) Failing after 37s
CI / iace-gt-coverage (push) Successful in 24s
CI / test-python-backend (push) Has been skipped
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
CI / detect-changes (push) Successful in 7s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Failing after 4s
CI / validate-canonical-controls (push) Successful in 11s
CI / loc-budget (push) Failing after 14s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m23s
CI / test-go (push) Failing after 37s
CI / iace-gt-coverage (push) Successful in 24s
CI / test-python-backend (push) Has been skipped
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
#1 Risk-number comparison in the benchmark: ComputeRiskComparison derives the tool's S/F/W/P + Fine-Kinney per matched hazard and compares to the GT values; exposed on the benchmark response and rendered in a new RiskComparison table with GREEN/YELLOW/RED traffic lights on the risk number R (like the Excel), plus per-axis within-1 agreement cards. #2 Generic misuse pattern HP2103 "Personenbefoerderung auf Hebezeug" — gated to lift-family machine types, fires for ANY lifting device (not machine-specific). #3 Benchmark matcher is now 1:n — one broad engine hazard may cover several fine-grained GT sub-scenarios (foot/hand/leg crush), so coverage reflects real risk coverage rather than 1:1 wording matches. Validated on BOTH ground truths (robot cell + lift): leakage 0, ghosts 0, coverage held. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -105,6 +105,7 @@ func (h *IACEHandler) RunBenchmark(c *gin.Context) {
|
||||
}
|
||||
|
||||
result := iace.CompareBenchmark(gt, hazards, mitigations)
|
||||
result.RiskComparison, result.RiskAgreement = iace.ComputeRiskComparison(result.MatchedPairs)
|
||||
c.JSON(http.StatusOK, result)
|
||||
}
|
||||
|
||||
|
||||
@@ -74,8 +74,12 @@ func CompareBenchmark(gt *GroundTruth, hazards []Hazard, mitigations []Mitigatio
|
||||
usedEng := make(map[int]bool)
|
||||
var matched []HazardMatchPair
|
||||
|
||||
// 1:n matching: a single broad engine hazard may legitimately cover several
|
||||
// fine-grained GT sub-scenarios (e.g. one "crush under descending load"
|
||||
// pattern covers the GT's separate foot / hand / leg crush rows). We only
|
||||
// block a GT entry from matching twice; an engine hazard may match several.
|
||||
for _, p := range pairs {
|
||||
if usedGT[p.gtIdx] || usedEng[p.engIdx] {
|
||||
if usedGT[p.gtIdx] {
|
||||
continue
|
||||
}
|
||||
usedGT[p.gtIdx] = true
|
||||
|
||||
@@ -80,6 +80,9 @@ type BenchmarkResult struct {
|
||||
ExtraInEngine []HazardSummary `json:"extra_in_engine"`
|
||||
CategoryBreakdown []CategoryScore `json:"category_breakdown"`
|
||||
RiskRankPairs []RiskRankPair `json:"risk_rank_pairs"`
|
||||
// Risk-number comparison (tool vs professional) per matched hazard + aggregate.
|
||||
RiskComparison []RiskComparisonPair `json:"risk_comparison,omitempty"`
|
||||
RiskAgreement RiskAgreement `json:"risk_agreement"`
|
||||
}
|
||||
|
||||
// HazardMatchPair links a GT entry to an engine hazard.
|
||||
|
||||
@@ -40,6 +40,32 @@ func GetLiftEndstopPatterns() []HazardPattern {
|
||||
"Verhindert ein Trittblech / Unterfahrschutz das Hineinfahren von Fuessen?",
|
||||
},
|
||||
},
|
||||
{
|
||||
ID: "HP2103",
|
||||
NameDE: "Bestimmungswidrige Personenbefoerderung auf Hebezeug",
|
||||
NameEN: "Misuse: transporting persons on a lifting device",
|
||||
RequiredComponentTags: []string{"gravity_risk"},
|
||||
RequiredEnergyTags: []string{"gravitational"},
|
||||
MachineTypes: liftTypes,
|
||||
GeneratedHazardCats: []string{"mechanical_hazard"},
|
||||
SuggestedMeasureIDs: []string{"M601", "M141"},
|
||||
Priority: 90,
|
||||
ScenarioDE: "Die Hebevorrichtung wird bestimmungswidrig zum Heben oder Befoerdern von " +
|
||||
"Personen verwendet (z.B. Mitfahren auf der Plattform). Absturz aus der Hoehe oder " +
|
||||
"Quetschen bei unkontrollierter Bewegung.",
|
||||
TriggerDE: "Fehlendes Verbotsschild, keine konstruktive Verhinderung (z.B. zu kleine Standflaeche/Haltepunkte), unzureichende Unterweisung",
|
||||
HarmDE: "Absturz aus der Hoehe, schwere Verletzungen, Tod",
|
||||
AffectedDE: "Bediener, Dritte",
|
||||
ZoneDE: "Hubplattform / Lastaufnahme",
|
||||
DefaultSeverity: 4,
|
||||
DefaultExposure: 1,
|
||||
DefaultAvoidability: 2,
|
||||
ISO12100Section: "6.4.5 Vernuenftigerweise vorhersehbare Fehlanwendung",
|
||||
ClarificationQuestionsDE: []string{
|
||||
"Ist ein Verbotsschild 'Personenbefoerderung verboten' (EN ISO 7010 P-Zeichen) angebracht?",
|
||||
"Verhindert die Konstruktion das Mitfahren (z.B. zu kleine Standflaeche, keine Haltepunkte)?",
|
||||
},
|
||||
},
|
||||
{
|
||||
ID: "HP2101",
|
||||
NameDE: "Hand- oder Koerper-Quetschung gegen feste Struktur beim Hochfahren der Hubeinheit",
|
||||
|
||||
@@ -0,0 +1,129 @@
|
||||
package iace
|
||||
|
||||
// Risk-number comparison for the benchmark: for every matched hazard, the
|
||||
// tool's risk parameters (EN-62061-style S/F/W/P + Fine-Kinney) next to the
|
||||
// professional's GT values, plus aggregate agreement. Used by the benchmark
|
||||
// endpoint so the Risikobewertung comparison is visible in the tab.
|
||||
|
||||
// RiskComparisonPair is one matched hazard's tool-vs-professional risk numbers.
|
||||
type RiskComparisonPair struct {
|
||||
HazardName string `json:"hazard_name"`
|
||||
GTSeverity int `json:"gt_severity"`
|
||||
GTFrequency int `json:"gt_frequency"`
|
||||
GTProbability int `json:"gt_probability"` // GT column W
|
||||
GTAvoidance int `json:"gt_avoidance"` // GT column P
|
||||
GTRisk int `json:"gt_risk"` // GT column R
|
||||
EngSeverity int `json:"eng_severity"`
|
||||
EngFrequency int `json:"eng_frequency"`
|
||||
EngProbability int `json:"eng_probability"`
|
||||
EngAvoidance int `json:"eng_avoidance"`
|
||||
FKScore float64 `json:"fk_score"`
|
||||
FKBand string `json:"fk_band"`
|
||||
}
|
||||
|
||||
// RiskAgreement aggregates how close the tool's risk numbers are to the GT.
|
||||
type RiskAgreement struct {
|
||||
N int `json:"n"`
|
||||
SeverityWithin1 float64 `json:"severity_within1"`
|
||||
FrequencyWithin1 float64 `json:"frequency_within1"`
|
||||
ProbabilityWithin1 float64 `json:"probability_within1"`
|
||||
AvoidanceWithin1 float64 `json:"avoidance_within1"`
|
||||
RankConcordance float64 `json:"rank_concordance"` // Fine-Kinney vs GT R
|
||||
}
|
||||
|
||||
// ComputeRiskComparison derives the tool's risk numbers for each matched hazard
|
||||
// and compares them to the professional's GT values.
|
||||
func ComputeRiskComparison(matched []HazardMatchPair) ([]RiskComparisonPair, RiskAgreement) {
|
||||
pairs := make([]RiskComparisonPair, 0, len(matched))
|
||||
var sevOK, freqOK, probOK, avoidOK, n int
|
||||
var engFK, gtR []float64
|
||||
|
||||
for _, m := range matched {
|
||||
eh := m.EngineHazard
|
||||
cats := []string{eh.Category}
|
||||
scenario := eh.Scenario
|
||||
if scenario == "" {
|
||||
scenario = eh.Name
|
||||
}
|
||||
lifecycle := splitLifecyclePhases(eh.LifecyclePhase)
|
||||
|
||||
engS := EstimateSeverity(cats, scenario, 0)
|
||||
engF := EstimateFrequency(lifecycle)
|
||||
engW := EstimateProbabilityW(cats, scenario)
|
||||
engP := EstimateAvoidabilityP(cats, scenario)
|
||||
fk := SuggestFineKinney(cats, scenario, lifecycle, 0)
|
||||
gt := m.GTEntry.RiskIn
|
||||
|
||||
pairs = append(pairs, RiskComparisonPair{
|
||||
HazardName: m.GTEntry.HazardType,
|
||||
GTSeverity: gt.S, GTFrequency: gt.F, GTProbability: gt.W, GTAvoidance: gt.P, GTRisk: gt.R,
|
||||
EngSeverity: engS, EngFrequency: engF, EngProbability: engW, EngAvoidance: engP,
|
||||
FKScore: fk.Score, FKBand: fk.Band,
|
||||
})
|
||||
|
||||
if gt.S > 0 {
|
||||
n++
|
||||
if abs(engS-gt.S) <= 1 {
|
||||
sevOK++
|
||||
}
|
||||
if gt.F > 0 && abs(engF-gt.F) <= 1 {
|
||||
freqOK++
|
||||
}
|
||||
if gt.W > 0 && abs(engW-gt.W) <= 1 {
|
||||
probOK++
|
||||
}
|
||||
if gt.P > 0 && abs(engP-gt.P) <= 1 {
|
||||
avoidOK++
|
||||
}
|
||||
engFK = append(engFK, fk.Score)
|
||||
gtR = append(gtR, float64(gt.R))
|
||||
}
|
||||
}
|
||||
|
||||
agg := RiskAgreement{N: n}
|
||||
if n > 0 {
|
||||
agg.SeverityWithin1 = pct(sevOK, n)
|
||||
agg.FrequencyWithin1 = pct(freqOK, n)
|
||||
agg.ProbabilityWithin1 = pct(probOK, n)
|
||||
agg.AvoidanceWithin1 = pct(avoidOK, n)
|
||||
agg.RankConcordance = rankConcordance(engFK, gtR)
|
||||
}
|
||||
return pairs, agg
|
||||
}
|
||||
|
||||
func abs(x int) int {
|
||||
if x < 0 {
|
||||
return -x
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
||||
func pct(x, total int) float64 {
|
||||
if total == 0 {
|
||||
return 0
|
||||
}
|
||||
return 100 * float64(x) / float64(total)
|
||||
}
|
||||
|
||||
// rankConcordance returns the fraction of comparable hazard pairs the tool
|
||||
// orders the same way the professional does (scale-invariant, 0.5 = random).
|
||||
func rankConcordance(a, b []float64) float64 {
|
||||
concordant, discordant := 0, 0
|
||||
for i := 0; i < len(a); i++ {
|
||||
for j := i + 1; j < len(a); j++ {
|
||||
da, db := a[i]-a[j], b[i]-b[j]
|
||||
if da == 0 || db == 0 {
|
||||
continue
|
||||
}
|
||||
if (da > 0) == (db > 0) {
|
||||
concordant++
|
||||
} else {
|
||||
discordant++
|
||||
}
|
||||
}
|
||||
}
|
||||
if concordant+discordant == 0 {
|
||||
return 0
|
||||
}
|
||||
return 100 * float64(concordant) / float64(concordant+discordant)
|
||||
}
|
||||
Reference in New Issue
Block a user