a7dc12f30f
Report the tool's risk number as a plausible range with a confidence label instead of a false-precision point value (confidence-aware tonality — the assessment is confirmed by the DSB / safety expert). - risk_estimation.go: EstimateConfidence (hoch/mittel/niedrig from how the contact mode resolved), EstimateRiskRange (S±1 and aggregate L=F+W+P ±1, the empirically validated per-parameter accuracy), RiskLevelRange; share the riskBandLabel thresholds with EstimateRiskLevel. - risk_benchmark.go: RiskComparisonPair gains eng_risk_point/low/high + level + level_range + confidence; RiskAgreement gains high_confidence_pct. - RiskComparison.tsx: per-hazard range "low–high (level range)" + point, confidence chip, and an aggregate confidence line; types in useBenchmark.ts. - Unit tests for the range/confidence helpers. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
152 lines
5.0 KiB
Go
152 lines
5.0 KiB
Go
package iace
|
||
|
||
// Risk-number comparison for the benchmark: for every matched hazard, the
|
||
// tool's risk parameters (EN-62061-style S/F/W/P + Fine-Kinney) next to the
|
||
// professional's GT values, plus aggregate agreement. Used by the benchmark
|
||
// endpoint so the Risikobewertung comparison is visible in the tab.
|
||
|
||
// RiskComparisonPair is one matched hazard's tool-vs-professional risk numbers.
|
||
type RiskComparisonPair struct {
|
||
HazardName string `json:"hazard_name"`
|
||
GTSeverity int `json:"gt_severity"`
|
||
GTFrequency int `json:"gt_frequency"`
|
||
GTProbability int `json:"gt_probability"` // GT column W
|
||
GTAvoidance int `json:"gt_avoidance"` // GT column P
|
||
GTRisk int `json:"gt_risk"` // GT column R
|
||
EngSeverity int `json:"eng_severity"`
|
||
EngFrequency int `json:"eng_frequency"`
|
||
EngProbability int `json:"eng_probability"`
|
||
EngAvoidance int `json:"eng_avoidance"`
|
||
FKScore float64 `json:"fk_score"`
|
||
FKBand string `json:"fk_band"`
|
||
// Confidence-aware risk: a point estimate plus a plausible low/high band and
|
||
// a confidence label, so the tool reports a RANGE (not a false-precision
|
||
// point) — the assessment is confirmed by the DSB / safety expert.
|
||
EngRiskPoint int `json:"eng_risk_point"`
|
||
EngRiskLow int `json:"eng_risk_low"`
|
||
EngRiskHigh int `json:"eng_risk_high"`
|
||
EngRiskLevel string `json:"eng_risk_level"` // band of the point value
|
||
EngRiskLevelRange string `json:"eng_risk_level_range"` // e.g. "mittel–hoch"
|
||
Confidence string `json:"confidence"` // hoch / mittel / niedrig
|
||
}
|
||
|
||
// RiskAgreement aggregates how close the tool's risk numbers are to the GT.
|
||
type RiskAgreement struct {
|
||
N int `json:"n"`
|
||
SeverityWithin1 float64 `json:"severity_within1"`
|
||
FrequencyWithin1 float64 `json:"frequency_within1"`
|
||
ProbabilityWithin1 float64 `json:"probability_within1"`
|
||
AvoidanceWithin1 float64 `json:"avoidance_within1"`
|
||
RankConcordance float64 `json:"rank_concordance"` // Fine-Kinney vs GT R
|
||
HighConfidencePct float64 `json:"high_confidence_pct"` // share of matched hazards with "hoch" confidence
|
||
}
|
||
|
||
// ComputeRiskComparison derives the tool's risk numbers for each matched hazard
|
||
// and compares them to the professional's GT values.
|
||
func ComputeRiskComparison(matched []HazardMatchPair) ([]RiskComparisonPair, RiskAgreement) {
|
||
pairs := make([]RiskComparisonPair, 0, len(matched))
|
||
var sevOK, freqOK, probOK, avoidOK, n, hiConf int
|
||
var engFK, gtR []float64
|
||
|
||
for _, m := range matched {
|
||
eh := m.EngineHazard
|
||
cats := []string{eh.Category}
|
||
scenario := eh.Scenario
|
||
if scenario == "" {
|
||
scenario = eh.Name
|
||
}
|
||
lifecycle := splitLifecyclePhases(eh.LifecyclePhase)
|
||
|
||
engS := EstimateSeverity(cats, scenario, 0)
|
||
engF := EstimateFrequency(lifecycle)
|
||
engW := EstimateProbabilityW(cats, scenario)
|
||
engP := EstimateAvoidabilityP(cats, scenario)
|
||
fk := SuggestFineKinney(cats, scenario, lifecycle, 0)
|
||
gt := m.GTEntry.RiskIn
|
||
|
||
rLow, rPoint, rHigh := EstimateRiskRange(engS, engF, engW, engP)
|
||
rLevel, rLevelRange := RiskLevelRange(rLow, rPoint, rHigh)
|
||
conf := EstimateConfidence(cats, scenario)
|
||
if conf == "hoch" {
|
||
hiConf++
|
||
}
|
||
|
||
pairs = append(pairs, RiskComparisonPair{
|
||
HazardName: m.GTEntry.HazardType,
|
||
GTSeverity: gt.S, GTFrequency: gt.F, GTProbability: gt.W, GTAvoidance: gt.P, GTRisk: gt.R,
|
||
EngSeverity: engS, EngFrequency: engF, EngProbability: engW, EngAvoidance: engP,
|
||
FKScore: fk.Score, FKBand: fk.Band,
|
||
EngRiskPoint: rPoint, EngRiskLow: rLow, EngRiskHigh: rHigh,
|
||
EngRiskLevel: rLevel, EngRiskLevelRange: rLevelRange, Confidence: conf,
|
||
})
|
||
|
||
if gt.S > 0 {
|
||
n++
|
||
if abs(engS-gt.S) <= 1 {
|
||
sevOK++
|
||
}
|
||
if gt.F > 0 && abs(engF-gt.F) <= 1 {
|
||
freqOK++
|
||
}
|
||
if gt.W > 0 && abs(engW-gt.W) <= 1 {
|
||
probOK++
|
||
}
|
||
if gt.P > 0 && abs(engP-gt.P) <= 1 {
|
||
avoidOK++
|
||
}
|
||
engFK = append(engFK, fk.Score)
|
||
gtR = append(gtR, float64(gt.R))
|
||
}
|
||
}
|
||
|
||
agg := RiskAgreement{N: n}
|
||
if n > 0 {
|
||
agg.SeverityWithin1 = pct(sevOK, n)
|
||
agg.FrequencyWithin1 = pct(freqOK, n)
|
||
agg.ProbabilityWithin1 = pct(probOK, n)
|
||
agg.AvoidanceWithin1 = pct(avoidOK, n)
|
||
agg.RankConcordance = rankConcordance(engFK, gtR)
|
||
}
|
||
if len(pairs) > 0 {
|
||
agg.HighConfidencePct = pct(hiConf, len(pairs))
|
||
}
|
||
return pairs, agg
|
||
}
|
||
|
||
func abs(x int) int {
|
||
if x < 0 {
|
||
return -x
|
||
}
|
||
return x
|
||
}
|
||
|
||
func pct(x, total int) float64 {
|
||
if total == 0 {
|
||
return 0
|
||
}
|
||
return 100 * float64(x) / float64(total)
|
||
}
|
||
|
||
// rankConcordance returns the fraction of comparable hazard pairs the tool
|
||
// orders the same way the professional does (scale-invariant, 0.5 = random).
|
||
func rankConcordance(a, b []float64) float64 {
|
||
concordant, discordant := 0, 0
|
||
for i := 0; i < len(a); i++ {
|
||
for j := i + 1; j < len(a); j++ {
|
||
da, db := a[i]-a[j], b[i]-b[j]
|
||
if da == 0 || db == 0 {
|
||
continue
|
||
}
|
||
if (da > 0) == (db > 0) {
|
||
concordant++
|
||
} else {
|
||
discordant++
|
||
}
|
||
}
|
||
}
|
||
if concordant+discordant == 0 {
|
||
return 0
|
||
}
|
||
return 100 * float64(concordant) / float64(concordant+discordant)
|
||
}
|