From a7dc12f30fe466ff284e87f2bd931af759454a80 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Wed, 10 Jun 2026 23:04:56 +0200 Subject: [PATCH] feat(iace): risk as confidence range + label in benchmark tab MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Report the tool's risk number as a plausible range with a confidence label instead of a false-precision point value (confidence-aware tonality — the assessment is confirmed by the DSB / safety expert). - risk_estimation.go: EstimateConfidence (hoch/mittel/niedrig from how the contact mode resolved), EstimateRiskRange (S±1 and aggregate L=F+W+P ±1, the empirically validated per-parameter accuracy), RiskLevelRange; share the riskBandLabel thresholds with EstimateRiskLevel. - risk_benchmark.go: RiskComparisonPair gains eng_risk_point/low/high + level + level_range + confidence; RiskAgreement gains high_confidence_pct. - RiskComparison.tsx: per-hazard range "low–high (level range)" + point, confidence chip, and an aggregate confidence line; types in useBenchmark.ts. - Unit tests for the range/confidence helpers. Co-Authored-By: Claude Opus 4.8 --- .../benchmark/_components/RiskComparison.tsx | 88 ++++++++++++------- .../benchmark/_hooks/useBenchmark.ts | 4 + .../internal/iace/risk_benchmark.go | 26 +++++- .../internal/iace/risk_estimation.go | 86 ++++++++++++++++-- .../iace/risk_estimation_range_test.go | 58 ++++++++++++ 5 files changed, 225 insertions(+), 37 deletions(-) create mode 100644 ai-compliance-sdk/internal/iace/risk_estimation_range_test.go diff --git a/admin-compliance/app/sdk/iace/[projectId]/benchmark/_components/RiskComparison.tsx b/admin-compliance/app/sdk/iace/[projectId]/benchmark/_components/RiskComparison.tsx index 44e4aecb..4b8d2753 100644 --- a/admin-compliance/app/sdk/iace/[projectId]/benchmark/_components/RiskComparison.tsx +++ b/admin-compliance/app/sdk/iace/[projectId]/benchmark/_components/RiskComparison.tsx @@ -17,6 +17,13 @@ function ampelBand(band: string): Ampel { return 'green' } +// Tool confidence (how well-anchored the estimate is) → chip color. +function ampelConfidence(c: string): Ampel { + if (c === 'hoch') return 'green' + if (c === 'mittel') return 'yellow' + return 'red' +} + const cellColor: Record = { red: 'bg-red-100 text-red-700 dark:bg-red-900/40 dark:text-red-300', yellow: 'bg-yellow-100 text-yellow-700 dark:bg-yellow-900/40 dark:text-yellow-300', @@ -47,18 +54,28 @@ export function RiskComparison({ pairs, agreement }: { pairs?: RiskComparisonPai

Risikozahlen-Vergleich (Fachmann vs. Tool)

- R = S × (F + W + P), Ampel wie in der Excel. Fine-Kinney (P×E×C) als zweite, US-anerkannte Bewertung. + R = S × (F + W + P), Ampel wie in der Excel. Das Tool nennt einen Schätzbereich{' '} + (nicht einen exakten Punktwert) plus Konfidenz — die endgültige Bewertung trifft der/die Sachverständige. + Fine-Kinney (P×E×C) als zweite, US-anerkannte Bewertung.

{agreement && agreement.n > 0 && ( -
- - - - - -
+ <> +
+ + + + + +
+ {typeof agreement.high_confidence_pct === 'number' && ( +

+ Tool-Konfidenz: {Math.round(agreement.high_confidence_pct)}% der erkannten + Gefaehrdungen mit hoher Konfidenz (Verletzungsmechanismus eindeutig aus dem Szenario ableitbar). +

+ )} + )}
@@ -67,31 +84,42 @@ export function RiskComparison({ pairs, agreement }: { pairs?: RiskComparisonPai Gefaehrdung Fachmann · S F W P R - Tool · S F W P R / FK + Tool · S F W P + Risiko (Schätzbereich) / FK + Konfidenz - {pairs.map((p, i) => { - const engR = p.eng_severity * (p.eng_frequency + p.eng_probability + p.eng_avoidance) - return ( - - {p.hazard_name || '—'} - {p.gt_severity} - {p.gt_frequency} - {p.gt_probability} - {p.gt_avoidance} - {p.gt_risk} - {p.eng_severity} - {p.eng_frequency} - {p.eng_probability} - {p.eng_avoidance} - - {engR} - FK {Math.round(p.fk_score)} - - - ) - })} + {pairs.map((p, i) => ( + + {p.hazard_name || '—'} + {p.gt_severity} + {p.gt_frequency} + {p.gt_probability} + {p.gt_avoidance} + {p.gt_risk} + {p.eng_severity} + {p.eng_frequency} + {p.eng_probability} + {p.eng_avoidance} + + + {p.eng_risk_low}–{p.eng_risk_high} + + ≈{p.eng_risk_point} + FK {Math.round(p.fk_score)} +
{p.eng_risk_level_range}
+ + + + {p.confidence} + + + + ))}
diff --git a/admin-compliance/app/sdk/iace/[projectId]/benchmark/_hooks/useBenchmark.ts b/admin-compliance/app/sdk/iace/[projectId]/benchmark/_hooks/useBenchmark.ts index a9668067..f793dc81 100644 --- a/admin-compliance/app/sdk/iace/[projectId]/benchmark/_hooks/useBenchmark.ts +++ b/admin-compliance/app/sdk/iace/[projectId]/benchmark/_hooks/useBenchmark.ts @@ -53,6 +53,9 @@ export interface RiskComparisonPair { gt_severity: number; gt_frequency: number; gt_probability: number; gt_avoidance: number; gt_risk: number eng_severity: number; eng_frequency: number; eng_probability: number; eng_avoidance: number fk_score: number; fk_band: string + eng_risk_point: number; eng_risk_low: number; eng_risk_high: number + eng_risk_level: string; eng_risk_level_range: string + confidence: string // hoch | mittel | niedrig } export interface RiskAgreement { @@ -60,6 +63,7 @@ export interface RiskAgreement { severity_within1: number; frequency_within1: number probability_within1: number; avoidance_within1: number rank_concordance: number + high_confidence_pct: number } export interface BenchmarkResult { diff --git a/ai-compliance-sdk/internal/iace/risk_benchmark.go b/ai-compliance-sdk/internal/iace/risk_benchmark.go index 5b3b4097..43d4e778 100644 --- a/ai-compliance-sdk/internal/iace/risk_benchmark.go +++ b/ai-compliance-sdk/internal/iace/risk_benchmark.go @@ -19,6 +19,15 @@ type RiskComparisonPair struct { EngAvoidance int `json:"eng_avoidance"` FKScore float64 `json:"fk_score"` FKBand string `json:"fk_band"` + // Confidence-aware risk: a point estimate plus a plausible low/high band and + // a confidence label, so the tool reports a RANGE (not a false-precision + // point) — the assessment is confirmed by the DSB / safety expert. + EngRiskPoint int `json:"eng_risk_point"` + EngRiskLow int `json:"eng_risk_low"` + EngRiskHigh int `json:"eng_risk_high"` + EngRiskLevel string `json:"eng_risk_level"` // band of the point value + EngRiskLevelRange string `json:"eng_risk_level_range"` // e.g. "mittel–hoch" + Confidence string `json:"confidence"` // hoch / mittel / niedrig } // RiskAgreement aggregates how close the tool's risk numbers are to the GT. @@ -28,14 +37,15 @@ type RiskAgreement struct { FrequencyWithin1 float64 `json:"frequency_within1"` ProbabilityWithin1 float64 `json:"probability_within1"` AvoidanceWithin1 float64 `json:"avoidance_within1"` - RankConcordance float64 `json:"rank_concordance"` // Fine-Kinney vs GT R + RankConcordance float64 `json:"rank_concordance"` // Fine-Kinney vs GT R + HighConfidencePct float64 `json:"high_confidence_pct"` // share of matched hazards with "hoch" confidence } // ComputeRiskComparison derives the tool's risk numbers for each matched hazard // and compares them to the professional's GT values. func ComputeRiskComparison(matched []HazardMatchPair) ([]RiskComparisonPair, RiskAgreement) { pairs := make([]RiskComparisonPair, 0, len(matched)) - var sevOK, freqOK, probOK, avoidOK, n int + var sevOK, freqOK, probOK, avoidOK, n, hiConf int var engFK, gtR []float64 for _, m := range matched { @@ -54,11 +64,20 @@ func ComputeRiskComparison(matched []HazardMatchPair) ([]RiskComparisonPair, Ris fk := SuggestFineKinney(cats, scenario, lifecycle, 0) gt := m.GTEntry.RiskIn + rLow, rPoint, rHigh := EstimateRiskRange(engS, engF, engW, engP) + rLevel, rLevelRange := RiskLevelRange(rLow, rPoint, rHigh) + conf := EstimateConfidence(cats, scenario) + if conf == "hoch" { + hiConf++ + } + pairs = append(pairs, RiskComparisonPair{ HazardName: m.GTEntry.HazardType, GTSeverity: gt.S, GTFrequency: gt.F, GTProbability: gt.W, GTAvoidance: gt.P, GTRisk: gt.R, EngSeverity: engS, EngFrequency: engF, EngProbability: engW, EngAvoidance: engP, FKScore: fk.Score, FKBand: fk.Band, + EngRiskPoint: rPoint, EngRiskLow: rLow, EngRiskHigh: rHigh, + EngRiskLevel: rLevel, EngRiskLevelRange: rLevelRange, Confidence: conf, }) if gt.S > 0 { @@ -88,6 +107,9 @@ func ComputeRiskComparison(matched []HazardMatchPair) ([]RiskComparisonPair, Ris agg.AvoidanceWithin1 = pct(avoidOK, n) agg.RankConcordance = rankConcordance(engFK, gtR) } + if len(pairs) > 0 { + agg.HighConfidencePct = pct(hiConf, len(pairs)) + } return pairs, agg } diff --git a/ai-compliance-sdk/internal/iace/risk_estimation.go b/ai-compliance-sdk/internal/iace/risk_estimation.go index 604375c0..b8633d89 100644 --- a/ai-compliance-sdk/internal/iace/risk_estimation.go +++ b/ai-compliance-sdk/internal/iace/risk_estimation.go @@ -203,16 +203,92 @@ func EstimateRiskLevel(s, f, w, p int) (int, string) { s = 1 } idx := s * (f + w + p) + return idx, riskBandLabel(idx) +} + +// riskBandLabel maps a risk index (3..75) to BreakPilot's German level band. +// Single source of truth for the thresholds, shared by EstimateRiskLevel and +// the confidence-range derivation. +func riskBandLabel(idx int) string { switch { case idx >= 45: - return idx, "kritisch" + return "kritisch" case idx >= 30: - return idx, "hoch" + return "hoch" case idx >= 18: - return idx, "mittel" + return "mittel" case idx >= 9: - return idx, "gering" + return "gering" default: - return idx, "vernachlaessigbar" + return "vernachlaessigbar" } } + +func clampRisk1to5(x int) int { + if x < 1 { + return 1 + } + if x > 5 { + return 5 + } + return x +} + +// EstimateConfidence reports how well-anchored the tool's risk parameters are, +// from HOW the injury mechanism (contact mode) was resolved: an explicit +// scenario keyword → "hoch" (strong kinematic signal), a category fallback → +// "mittel", nothing → "niedrig" (parameters fell back to neutral). This is an +// honest signal that the point estimate is a heuristic, not a guarantee — the +// final assessment stays with the DSB / safety expert. +func EstimateConfidence(cats []string, scenario string) string { + text := normalizeDE(scenario) + for _, e := range contactModeKeywords { + for _, kw := range e.keywords { + if strings.Contains(text, kw) { + return "hoch" + } + } + } + for _, c := range cats { + if _, ok := categoryDefaultMode[c]; ok { + return "mittel" + } + } + return "niedrig" +} + +// EstimateRiskRange returns the point risk index plus a plausible low/high band. +// The band shifts severity S by ±1 and the aggregate likelihood L = F+W+P by ±1 +// (each within its domain). We move L as a whole rather than each of F/W/P +// independently because the validation shows the per-parameter errors largely +// cancel in the sum (W is within ±1 of the GT ~100% of the time). The result +// communicates that the risk number is an ESTIMATE with uncertainty rather than +// a false-precision point value — aligned with the confidence-aware tonality. +func EstimateRiskRange(s, f, w, p int) (low, point, high int) { + s = clampRisk1to5(s) + l := clampRisk1to5(f) + clampRisk1to5(w) + clampRisk1to5(p) // 3..15 + clampL := func(x int) int { + if x < 3 { + return 3 + } + if x > 15 { + return 15 + } + return x + } + point = s * l + low = clampRisk1to5(s-1) * clampL(l-1) + high = clampRisk1to5(s+1) * clampL(l+1) + return low, point, high +} + +// RiskLevelRange returns the German level band for the point plus a combined +// "low–high" range label (single label when low and high fall in the same band). +func RiskLevelRange(low, point, high int) (level, levelRange string) { + level = riskBandLabel(point) + ll, lh := riskBandLabel(low), riskBandLabel(high) + if ll == lh { + return level, ll + } + return level, ll + "–" + lh // en dash +} diff --git a/ai-compliance-sdk/internal/iace/risk_estimation_range_test.go b/ai-compliance-sdk/internal/iace/risk_estimation_range_test.go new file mode 100644 index 00000000..71c95657 --- /dev/null +++ b/ai-compliance-sdk/internal/iace/risk_estimation_range_test.go @@ -0,0 +1,58 @@ +package iace + +import "testing" + +func TestEstimateRiskRange(t *testing.T) { + tests := []struct { + name string + s, f, w, p int + wantLow, wantP, wantH int + }{ + // S=4, L=F+W+P=8 → point 32; low 3*clampL(7)=21; high 5*clampL(9)=45. + {"typical electrical", 4, 3, 2, 3, 21, 32, 45}, + // Min likelihood: L=3; low clamps L to 3 (clampL(2)=3) and S to 1. + {"low end clamps", 2, 1, 1, 1, 3, 6, 12}, + // Max: S=5, L=15 → point 75; high clamps S to 5 and L to 15. + {"high end clamps", 5, 5, 5, 5, 56, 75, 75}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + low, point, high := EstimateRiskRange(tc.s, tc.f, tc.w, tc.p) + if low != tc.wantLow || point != tc.wantP || high != tc.wantH { + t.Errorf("EstimateRiskRange(%d,%d,%d,%d) = (%d,%d,%d), want (%d,%d,%d)", + tc.s, tc.f, tc.w, tc.p, low, point, high, tc.wantLow, tc.wantP, tc.wantH) + } + if low > point || point > high { + t.Errorf("range not ordered: low=%d point=%d high=%d", low, point, high) + } + }) + } +} + +func TestEstimateConfidence(t *testing.T) { + cases := []struct { + cats []string + scenario string + want string + }{ + {[]string{"mechanical_hazard"}, "Quetschen der Hand im Werkzeugraum", "hoch"}, // keyword "quetsch" + {[]string{"electrical_hazard"}, "Elektrischer Schlag am Gehaeuse", "hoch"}, // keyword "elektrisch" + {[]string{"mechanical_hazard"}, "Allgemeine Restgefahr an der Anlage", "mittel"}, // category fallback + {[]string{"made_up_category"}, "Unspezifische Situation", "niedrig"}, // nothing + } + for _, tc := range cases { + if got := EstimateConfidence(tc.cats, tc.scenario); got != tc.want { + t.Errorf("EstimateConfidence(%v, %q) = %q, want %q", tc.cats, tc.scenario, got, tc.want) + } + } +} + +func TestRiskLevelRange(t *testing.T) { + // Same band low+high → single label; spanning bands → "low–high". + if lvl, rng := RiskLevelRange(9, 12, 16); lvl != "gering" || rng != "gering" { + t.Errorf("single-band: got (%q,%q), want (gering,gering)", lvl, rng) + } + if lvl, rng := RiskLevelRange(21, 32, 45); lvl != "hoch" || rng != "mittel–kritisch" { + t.Errorf("multi-band: got (%q,%q), want (hoch, mittel–kritisch)", lvl, rng) + } +}