Files
breakpilot-compliance/ai-compliance-sdk/internal/iace/risk_benchmark.go
T
Benjamin Admin a7dc12f30f feat(iace): risk as confidence range + label in benchmark tab
Report the tool's risk number as a plausible range with a confidence
label instead of a false-precision point value (confidence-aware
tonality — the assessment is confirmed by the DSB / safety expert).

- risk_estimation.go: EstimateConfidence (hoch/mittel/niedrig from how the
  contact mode resolved), EstimateRiskRange (S±1 and aggregate L=F+W+P ±1,
  the empirically validated per-parameter accuracy), RiskLevelRange; share
  the riskBandLabel thresholds with EstimateRiskLevel.
- risk_benchmark.go: RiskComparisonPair gains eng_risk_point/low/high +
  level + level_range + confidence; RiskAgreement gains high_confidence_pct.
- RiskComparison.tsx: per-hazard range "low–high (level range)" + point,
  confidence chip, and an aggregate confidence line; types in useBenchmark.ts.
- Unit tests for the range/confidence helpers.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-10 23:04:56 +02:00

152 lines
5.0 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package iace
// Risk-number comparison for the benchmark: for every matched hazard, the
// tool's risk parameters (EN-62061-style S/F/W/P + Fine-Kinney) next to the
// professional's GT values, plus aggregate agreement. Used by the benchmark
// endpoint so the Risikobewertung comparison is visible in the tab.
// RiskComparisonPair is one matched hazard's tool-vs-professional risk numbers.
type RiskComparisonPair struct {
HazardName string `json:"hazard_name"`
GTSeverity int `json:"gt_severity"`
GTFrequency int `json:"gt_frequency"`
GTProbability int `json:"gt_probability"` // GT column W
GTAvoidance int `json:"gt_avoidance"` // GT column P
GTRisk int `json:"gt_risk"` // GT column R
EngSeverity int `json:"eng_severity"`
EngFrequency int `json:"eng_frequency"`
EngProbability int `json:"eng_probability"`
EngAvoidance int `json:"eng_avoidance"`
FKScore float64 `json:"fk_score"`
FKBand string `json:"fk_band"`
// Confidence-aware risk: a point estimate plus a plausible low/high band and
// a confidence label, so the tool reports a RANGE (not a false-precision
// point) — the assessment is confirmed by the DSB / safety expert.
EngRiskPoint int `json:"eng_risk_point"`
EngRiskLow int `json:"eng_risk_low"`
EngRiskHigh int `json:"eng_risk_high"`
EngRiskLevel string `json:"eng_risk_level"` // band of the point value
EngRiskLevelRange string `json:"eng_risk_level_range"` // e.g. "mittelhoch"
Confidence string `json:"confidence"` // hoch / mittel / niedrig
}
// RiskAgreement aggregates how close the tool's risk numbers are to the GT.
type RiskAgreement struct {
N int `json:"n"`
SeverityWithin1 float64 `json:"severity_within1"`
FrequencyWithin1 float64 `json:"frequency_within1"`
ProbabilityWithin1 float64 `json:"probability_within1"`
AvoidanceWithin1 float64 `json:"avoidance_within1"`
RankConcordance float64 `json:"rank_concordance"` // Fine-Kinney vs GT R
HighConfidencePct float64 `json:"high_confidence_pct"` // share of matched hazards with "hoch" confidence
}
// ComputeRiskComparison derives the tool's risk numbers for each matched hazard
// and compares them to the professional's GT values.
func ComputeRiskComparison(matched []HazardMatchPair) ([]RiskComparisonPair, RiskAgreement) {
pairs := make([]RiskComparisonPair, 0, len(matched))
var sevOK, freqOK, probOK, avoidOK, n, hiConf int
var engFK, gtR []float64
for _, m := range matched {
eh := m.EngineHazard
cats := []string{eh.Category}
scenario := eh.Scenario
if scenario == "" {
scenario = eh.Name
}
lifecycle := splitLifecyclePhases(eh.LifecyclePhase)
engS := EstimateSeverity(cats, scenario, 0)
engF := EstimateFrequency(lifecycle)
engW := EstimateProbabilityW(cats, scenario)
engP := EstimateAvoidabilityP(cats, scenario)
fk := SuggestFineKinney(cats, scenario, lifecycle, 0)
gt := m.GTEntry.RiskIn
rLow, rPoint, rHigh := EstimateRiskRange(engS, engF, engW, engP)
rLevel, rLevelRange := RiskLevelRange(rLow, rPoint, rHigh)
conf := EstimateConfidence(cats, scenario)
if conf == "hoch" {
hiConf++
}
pairs = append(pairs, RiskComparisonPair{
HazardName: m.GTEntry.HazardType,
GTSeverity: gt.S, GTFrequency: gt.F, GTProbability: gt.W, GTAvoidance: gt.P, GTRisk: gt.R,
EngSeverity: engS, EngFrequency: engF, EngProbability: engW, EngAvoidance: engP,
FKScore: fk.Score, FKBand: fk.Band,
EngRiskPoint: rPoint, EngRiskLow: rLow, EngRiskHigh: rHigh,
EngRiskLevel: rLevel, EngRiskLevelRange: rLevelRange, Confidence: conf,
})
if gt.S > 0 {
n++
if abs(engS-gt.S) <= 1 {
sevOK++
}
if gt.F > 0 && abs(engF-gt.F) <= 1 {
freqOK++
}
if gt.W > 0 && abs(engW-gt.W) <= 1 {
probOK++
}
if gt.P > 0 && abs(engP-gt.P) <= 1 {
avoidOK++
}
engFK = append(engFK, fk.Score)
gtR = append(gtR, float64(gt.R))
}
}
agg := RiskAgreement{N: n}
if n > 0 {
agg.SeverityWithin1 = pct(sevOK, n)
agg.FrequencyWithin1 = pct(freqOK, n)
agg.ProbabilityWithin1 = pct(probOK, n)
agg.AvoidanceWithin1 = pct(avoidOK, n)
agg.RankConcordance = rankConcordance(engFK, gtR)
}
if len(pairs) > 0 {
agg.HighConfidencePct = pct(hiConf, len(pairs))
}
return pairs, agg
}
func abs(x int) int {
if x < 0 {
return -x
}
return x
}
func pct(x, total int) float64 {
if total == 0 {
return 0
}
return 100 * float64(x) / float64(total)
}
// rankConcordance returns the fraction of comparable hazard pairs the tool
// orders the same way the professional does (scale-invariant, 0.5 = random).
func rankConcordance(a, b []float64) float64 {
concordant, discordant := 0, 0
for i := 0; i < len(a); i++ {
for j := i + 1; j < len(a); j++ {
da, db := a[i]-a[j], b[i]-b[j]
if da == 0 || db == 0 {
continue
}
if (da > 0) == (db > 0) {
concordant++
} else {
discordant++
}
}
}
if concordant+discordant == 0 {
return 0
}
return 100 * float64(concordant) / float64(concordant+discordant)
}