feat(iace): risk as confidence range + label in benchmark tab
Report the tool's risk number as a plausible range with a confidence label instead of a false-precision point value (confidence-aware tonality — the assessment is confirmed by the DSB / safety expert). - risk_estimation.go: EstimateConfidence (hoch/mittel/niedrig from how the contact mode resolved), EstimateRiskRange (S±1 and aggregate L=F+W+P ±1, the empirically validated per-parameter accuracy), RiskLevelRange; share the riskBandLabel thresholds with EstimateRiskLevel. - risk_benchmark.go: RiskComparisonPair gains eng_risk_point/low/high + level + level_range + confidence; RiskAgreement gains high_confidence_pct. - RiskComparison.tsx: per-hazard range "low–high (level range)" + point, confidence chip, and an aggregate confidence line; types in useBenchmark.ts. - Unit tests for the range/confidence helpers. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -19,6 +19,15 @@ type RiskComparisonPair struct {
|
||||
EngAvoidance int `json:"eng_avoidance"`
|
||||
FKScore float64 `json:"fk_score"`
|
||||
FKBand string `json:"fk_band"`
|
||||
// Confidence-aware risk: a point estimate plus a plausible low/high band and
|
||||
// a confidence label, so the tool reports a RANGE (not a false-precision
|
||||
// point) — the assessment is confirmed by the DSB / safety expert.
|
||||
EngRiskPoint int `json:"eng_risk_point"`
|
||||
EngRiskLow int `json:"eng_risk_low"`
|
||||
EngRiskHigh int `json:"eng_risk_high"`
|
||||
EngRiskLevel string `json:"eng_risk_level"` // band of the point value
|
||||
EngRiskLevelRange string `json:"eng_risk_level_range"` // e.g. "mittel–hoch"
|
||||
Confidence string `json:"confidence"` // hoch / mittel / niedrig
|
||||
}
|
||||
|
||||
// RiskAgreement aggregates how close the tool's risk numbers are to the GT.
|
||||
@@ -28,14 +37,15 @@ type RiskAgreement struct {
|
||||
FrequencyWithin1 float64 `json:"frequency_within1"`
|
||||
ProbabilityWithin1 float64 `json:"probability_within1"`
|
||||
AvoidanceWithin1 float64 `json:"avoidance_within1"`
|
||||
RankConcordance float64 `json:"rank_concordance"` // Fine-Kinney vs GT R
|
||||
RankConcordance float64 `json:"rank_concordance"` // Fine-Kinney vs GT R
|
||||
HighConfidencePct float64 `json:"high_confidence_pct"` // share of matched hazards with "hoch" confidence
|
||||
}
|
||||
|
||||
// ComputeRiskComparison derives the tool's risk numbers for each matched hazard
|
||||
// and compares them to the professional's GT values.
|
||||
func ComputeRiskComparison(matched []HazardMatchPair) ([]RiskComparisonPair, RiskAgreement) {
|
||||
pairs := make([]RiskComparisonPair, 0, len(matched))
|
||||
var sevOK, freqOK, probOK, avoidOK, n int
|
||||
var sevOK, freqOK, probOK, avoidOK, n, hiConf int
|
||||
var engFK, gtR []float64
|
||||
|
||||
for _, m := range matched {
|
||||
@@ -54,11 +64,20 @@ func ComputeRiskComparison(matched []HazardMatchPair) ([]RiskComparisonPair, Ris
|
||||
fk := SuggestFineKinney(cats, scenario, lifecycle, 0)
|
||||
gt := m.GTEntry.RiskIn
|
||||
|
||||
rLow, rPoint, rHigh := EstimateRiskRange(engS, engF, engW, engP)
|
||||
rLevel, rLevelRange := RiskLevelRange(rLow, rPoint, rHigh)
|
||||
conf := EstimateConfidence(cats, scenario)
|
||||
if conf == "hoch" {
|
||||
hiConf++
|
||||
}
|
||||
|
||||
pairs = append(pairs, RiskComparisonPair{
|
||||
HazardName: m.GTEntry.HazardType,
|
||||
GTSeverity: gt.S, GTFrequency: gt.F, GTProbability: gt.W, GTAvoidance: gt.P, GTRisk: gt.R,
|
||||
EngSeverity: engS, EngFrequency: engF, EngProbability: engW, EngAvoidance: engP,
|
||||
FKScore: fk.Score, FKBand: fk.Band,
|
||||
EngRiskPoint: rPoint, EngRiskLow: rLow, EngRiskHigh: rHigh,
|
||||
EngRiskLevel: rLevel, EngRiskLevelRange: rLevelRange, Confidence: conf,
|
||||
})
|
||||
|
||||
if gt.S > 0 {
|
||||
@@ -88,6 +107,9 @@ func ComputeRiskComparison(matched []HazardMatchPair) ([]RiskComparisonPair, Ris
|
||||
agg.AvoidanceWithin1 = pct(avoidOK, n)
|
||||
agg.RankConcordance = rankConcordance(engFK, gtR)
|
||||
}
|
||||
if len(pairs) > 0 {
|
||||
agg.HighConfidencePct = pct(hiConf, len(pairs))
|
||||
}
|
||||
return pairs, agg
|
||||
}
|
||||
|
||||
|
||||
@@ -203,16 +203,92 @@ func EstimateRiskLevel(s, f, w, p int) (int, string) {
|
||||
s = 1
|
||||
}
|
||||
idx := s * (f + w + p)
|
||||
return idx, riskBandLabel(idx)
|
||||
}
|
||||
|
||||
// riskBandLabel maps a risk index (3..75) to BreakPilot's German level band.
|
||||
// Single source of truth for the thresholds, shared by EstimateRiskLevel and
|
||||
// the confidence-range derivation.
|
||||
func riskBandLabel(idx int) string {
|
||||
switch {
|
||||
case idx >= 45:
|
||||
return idx, "kritisch"
|
||||
return "kritisch"
|
||||
case idx >= 30:
|
||||
return idx, "hoch"
|
||||
return "hoch"
|
||||
case idx >= 18:
|
||||
return idx, "mittel"
|
||||
return "mittel"
|
||||
case idx >= 9:
|
||||
return idx, "gering"
|
||||
return "gering"
|
||||
default:
|
||||
return idx, "vernachlaessigbar"
|
||||
return "vernachlaessigbar"
|
||||
}
|
||||
}
|
||||
|
||||
func clampRisk1to5(x int) int {
|
||||
if x < 1 {
|
||||
return 1
|
||||
}
|
||||
if x > 5 {
|
||||
return 5
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
||||
// EstimateConfidence reports how well-anchored the tool's risk parameters are,
|
||||
// from HOW the injury mechanism (contact mode) was resolved: an explicit
|
||||
// scenario keyword → "hoch" (strong kinematic signal), a category fallback →
|
||||
// "mittel", nothing → "niedrig" (parameters fell back to neutral). This is an
|
||||
// honest signal that the point estimate is a heuristic, not a guarantee — the
|
||||
// final assessment stays with the DSB / safety expert.
|
||||
func EstimateConfidence(cats []string, scenario string) string {
|
||||
text := normalizeDE(scenario)
|
||||
for _, e := range contactModeKeywords {
|
||||
for _, kw := range e.keywords {
|
||||
if strings.Contains(text, kw) {
|
||||
return "hoch"
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, c := range cats {
|
||||
if _, ok := categoryDefaultMode[c]; ok {
|
||||
return "mittel"
|
||||
}
|
||||
}
|
||||
return "niedrig"
|
||||
}
|
||||
|
||||
// EstimateRiskRange returns the point risk index plus a plausible low/high band.
|
||||
// The band shifts severity S by ±1 and the aggregate likelihood L = F+W+P by ±1
|
||||
// (each within its domain). We move L as a whole rather than each of F/W/P
|
||||
// independently because the validation shows the per-parameter errors largely
|
||||
// cancel in the sum (W is within ±1 of the GT ~100% of the time). The result
|
||||
// communicates that the risk number is an ESTIMATE with uncertainty rather than
|
||||
// a false-precision point value — aligned with the confidence-aware tonality.
|
||||
func EstimateRiskRange(s, f, w, p int) (low, point, high int) {
|
||||
s = clampRisk1to5(s)
|
||||
l := clampRisk1to5(f) + clampRisk1to5(w) + clampRisk1to5(p) // 3..15
|
||||
clampL := func(x int) int {
|
||||
if x < 3 {
|
||||
return 3
|
||||
}
|
||||
if x > 15 {
|
||||
return 15
|
||||
}
|
||||
return x
|
||||
}
|
||||
point = s * l
|
||||
low = clampRisk1to5(s-1) * clampL(l-1)
|
||||
high = clampRisk1to5(s+1) * clampL(l+1)
|
||||
return low, point, high
|
||||
}
|
||||
|
||||
// RiskLevelRange returns the German level band for the point plus a combined
|
||||
// "low–high" range label (single label when low and high fall in the same band).
|
||||
func RiskLevelRange(low, point, high int) (level, levelRange string) {
|
||||
level = riskBandLabel(point)
|
||||
ll, lh := riskBandLabel(low), riskBandLabel(high)
|
||||
if ll == lh {
|
||||
return level, ll
|
||||
}
|
||||
return level, ll + "–" + lh // en dash
|
||||
}
|
||||
|
||||
@@ -0,0 +1,58 @@
|
||||
package iace
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestEstimateRiskRange(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
s, f, w, p int
|
||||
wantLow, wantP, wantH int
|
||||
}{
|
||||
// S=4, L=F+W+P=8 → point 32; low 3*clampL(7)=21; high 5*clampL(9)=45.
|
||||
{"typical electrical", 4, 3, 2, 3, 21, 32, 45},
|
||||
// Min likelihood: L=3; low clamps L to 3 (clampL(2)=3) and S to 1.
|
||||
{"low end clamps", 2, 1, 1, 1, 3, 6, 12},
|
||||
// Max: S=5, L=15 → point 75; high clamps S to 5 and L to 15.
|
||||
{"high end clamps", 5, 5, 5, 5, 56, 75, 75},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
low, point, high := EstimateRiskRange(tc.s, tc.f, tc.w, tc.p)
|
||||
if low != tc.wantLow || point != tc.wantP || high != tc.wantH {
|
||||
t.Errorf("EstimateRiskRange(%d,%d,%d,%d) = (%d,%d,%d), want (%d,%d,%d)",
|
||||
tc.s, tc.f, tc.w, tc.p, low, point, high, tc.wantLow, tc.wantP, tc.wantH)
|
||||
}
|
||||
if low > point || point > high {
|
||||
t.Errorf("range not ordered: low=%d point=%d high=%d", low, point, high)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestEstimateConfidence(t *testing.T) {
|
||||
cases := []struct {
|
||||
cats []string
|
||||
scenario string
|
||||
want string
|
||||
}{
|
||||
{[]string{"mechanical_hazard"}, "Quetschen der Hand im Werkzeugraum", "hoch"}, // keyword "quetsch"
|
||||
{[]string{"electrical_hazard"}, "Elektrischer Schlag am Gehaeuse", "hoch"}, // keyword "elektrisch"
|
||||
{[]string{"mechanical_hazard"}, "Allgemeine Restgefahr an der Anlage", "mittel"}, // category fallback
|
||||
{[]string{"made_up_category"}, "Unspezifische Situation", "niedrig"}, // nothing
|
||||
}
|
||||
for _, tc := range cases {
|
||||
if got := EstimateConfidence(tc.cats, tc.scenario); got != tc.want {
|
||||
t.Errorf("EstimateConfidence(%v, %q) = %q, want %q", tc.cats, tc.scenario, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRiskLevelRange(t *testing.T) {
|
||||
// Same band low+high → single label; spanning bands → "low–high".
|
||||
if lvl, rng := RiskLevelRange(9, 12, 16); lvl != "gering" || rng != "gering" {
|
||||
t.Errorf("single-band: got (%q,%q), want (gering,gering)", lvl, rng)
|
||||
}
|
||||
if lvl, rng := RiskLevelRange(21, 32, 45); lvl != "hoch" || rng != "mittel–kritisch" {
|
||||
t.Errorf("multi-band: got (%q,%q), want (hoch, mittel–kritisch)", lvl, rng)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user