diff --git a/ai-compliance-sdk/internal/api/handlers/iace_handler_init.go b/ai-compliance-sdk/internal/api/handlers/iace_handler_init.go index 485859d3..52b44944 100644 --- a/ai-compliance-sdk/internal/api/handlers/iace_handler_init.go +++ b/ai-compliance-sdk/internal/api/handlers/iace_handler_init.go @@ -225,11 +225,12 @@ func (h *IACEHandler) InitializeProject(c *gin.Context) { // (see iace/risk_estimation.go + DATA_SOURCES.md). No EN ISO // 13849-1 risk-graph table or parameter binning is reproduced. if mp.DefaultSeverity > 0 && mp.DefaultExposure > 0 { + s := iace.EstimateSeverity(mp.HazardCats, mp.ScenarioDE, mp.DefaultSeverity) w := iace.EstimateProbabilityW(mp.HazardCats, mp.ScenarioDE) p := iace.EstimateAvoidabilityP(mp.HazardCats, mp.ScenarioDE) - _, level := iace.EstimateRiskLevel(mp.DefaultSeverity, mp.DefaultExposure, w, p) + _, level := iace.EstimateRiskLevel(s, mp.DefaultExposure, w, p) desc += fmt.Sprintf("\n\nRisikoeinschaetzung (BreakPilot-Modell): S%d · F%d · W%d · P%d → Risiko: %s", - mp.DefaultSeverity, mp.DefaultExposure, w, p, level) + s, mp.DefaultExposure, w, p, level) } if mp.ISO12100Section != "" { desc += "\n\nKlassifikation: EN ISO 12100 Abschnitt " + mp.ISO12100Section diff --git a/ai-compliance-sdk/internal/iace/gt_risk_benchmark_test.go b/ai-compliance-sdk/internal/iace/gt_risk_benchmark_test.go index bfbe33cb..e4c7748b 100644 --- a/ai-compliance-sdk/internal/iace/gt_risk_benchmark_test.go +++ b/ai-compliance-sdk/internal/iace/gt_risk_benchmark_test.go @@ -98,9 +98,9 @@ func kendallConcordance(engine, gt []float64) (float64, int) { } type riskAgg struct { - sev, freq, avoid axisStats - wEst, pEst axisStats - noAvoidDefault int + sev, freq, avoid axisStats + wEst, pEst, sevEst axisStats + noAvoidDefault int engineRisk []float64 newEngineRisk []float64 gtRisk []float64 @@ -112,9 +112,10 @@ type riskAgg struct { // W and P vs our current estimate — the input for calibrating contactModeTable. func TestGT_RiskCalibrationData(t *testing.T) { type acc struct { - n int - sumGTW, sumGTP int - estW, estP int + n int + sumGTW, sumGTP int + sumEngS, sumGTS int + estW, estP int } byMode := map[string]*acc{} @@ -131,7 +132,7 @@ func TestGT_RiskCalibrationData(t *testing.T) { if key == "" { key = normalizeDE(pm.PatternName) } - byName[key] = riskParams{cats: pm.HazardCats, scenario: pm.ScenarioDE} + byName[key] = riskParams{s: pm.DefaultSeverity, cats: pm.HazardCats, scenario: pm.ScenarioDE} } hazards, mitigations := patternsToHazardsAndMitigations(out) res := CompareBenchmark(>Data, hazards, mitigations) @@ -152,14 +153,17 @@ func TestGT_RiskCalibrationData(t *testing.T) { a.n++ a.sumGTW += mp.GTEntry.RiskIn.W a.sumGTP += mp.GTEntry.RiskIn.P + a.sumEngS += rp.s + a.sumGTS += mp.GTEntry.RiskIn.S } } - t.Logf("=== Per-contact-mode calibration data (GT mean vs our tier) ===") - t.Logf(" %-18s %4s | %7s %7s | %7s %7s", "mode", "n", "estW", "gtW̄", "estP", "gtP̄") + t.Logf("=== Per-contact-mode calibration data (engine vs GT mean) ===") + t.Logf(" %-18s %4s | %5s %5s | %5s %5s | %6s %6s", "mode", "n", "estW", "gtW̄", "estP", "gtP̄", "engS̄", "gtS̄") for mode, a := range byMode { - t.Logf(" %-18s %4d | %7d %7.1f | %7d %7.1f", - mode, a.n, a.estW, float64(a.sumGTW)/float64(a.n), a.estP, float64(a.sumGTP)/float64(a.n)) + t.Logf(" %-18s %4d | %5d %5.1f | %5d %5.1f | %6.1f %6.1f", + mode, a.n, a.estW, float64(a.sumGTW)/float64(a.n), a.estP, float64(a.sumGTP)/float64(a.n), + float64(a.sumEngS)/float64(a.n), float64(a.sumGTS)/float64(a.n)) } } @@ -216,9 +220,11 @@ func TestGT_RiskBenchmark(t *testing.T) { overall.noAvoidDefault++ } - // NEW: data-anchored estimates for the two missing axes. + // NEW: data-anchored estimates for the three axes the engine got + // wrong (W missing, P missing, S systematically over-estimated). estW := EstimateProbabilityW(rp.cats, rp.scenario) estP := EstimateAvoidabilityP(rp.cats, rp.scenario) + estS := EstimateSeverity(rp.cats, rp.scenario, rp.s) if gtR.W > 0 { local.wEst.add(estW, gtR.W) overall.wEst.add(estW, gtR.W) @@ -227,14 +233,16 @@ func TestGT_RiskBenchmark(t *testing.T) { local.pEst.add(estP, gtR.P) overall.pEst.add(estP, gtR.P) } + if gtR.S > 0 { + local.sevEst.add(estS, gtR.S) + overall.sevEst.add(estS, gtR.S) + } // Two risk proxies for RANK comparison (our own aggregates, NOT a - // norm formula): OLD = today's engine (severity x exposure, with - // avoidability mostly unset); NEW = severity scaled by summed - // likelihood factors incl. the estimated W and P. - sev := maxInt(rp.s, 1) - oldProxy := float64(sev * maxInt(rp.f, 1) * maxInt(rp.a, 1)) - newProxy := float64(sev * (maxInt(rp.f, 1) + estW + estP)) + // norm formula): OLD = today's engine (raw severity x exposure); + // NEW = de-biased severity scaled by summed likelihood incl. W + P. + oldProxy := float64(maxInt(rp.s, 1) * maxInt(rp.f, 1) * maxInt(rp.a, 1)) + newProxy := float64(maxInt(estS, 1) * (maxInt(rp.f, 1) + estW + estP)) local.engineRisk = append(local.engineRisk, oldProxy) local.newEngineRisk = append(local.newEngineRisk, newProxy) local.gtRisk = append(local.gtRisk, float64(gtR.R)) @@ -247,7 +255,8 @@ func TestGT_RiskBenchmark(t *testing.T) { newConc, pairs := kendallConcordance(local.newEngineRisk, local.gtRisk) t.Logf("=== %s — Risk benchmark ===", c.name) t.Logf(" Matched hazards w/ engine params: %d (%d pairs had no pattern param)", local.matched, local.noParam) - t.Logf(" Severity S: MAE %.2f | within±1 %.0f%% | exact %.0f%% (n=%d)", local.sev.mae(), local.sev.pct(local.sev.within1), local.sev.pct(local.sev.exact), local.sev.n) + t.Logf(" Severity S (raw default): MAE %.2f | within±1 %.0f%% | exact %.0f%% (n=%d)", local.sev.mae(), local.sev.pct(local.sev.within1), local.sev.pct(local.sev.exact), local.sev.n) + t.Logf(" Severity S (NEW estimate): MAE %.2f | within±1 %.0f%% | exact %.0f%% (n=%d)", local.sevEst.mae(), local.sevEst.pct(local.sevEst.within1), local.sevEst.pct(local.sevEst.exact), local.sevEst.n) t.Logf(" Frequency F: MAE %.2f | within±1 %.0f%% | exact %.0f%% (n=%d)", local.freq.mae(), local.freq.pct(local.freq.within1), local.freq.pct(local.freq.exact), local.freq.n) t.Logf(" Probability W (NEW estimate): MAE %.2f | within±1 %.0f%% | exact %.0f%% (n=%d)", local.wEst.mae(), local.wEst.pct(local.wEst.within1), local.wEst.pct(local.wEst.exact), local.wEst.n) t.Logf(" Avoidance P (NEW estimate): MAE %.2f | within±1 %.0f%% | exact %.0f%% (n=%d)", local.pEst.mae(), local.pEst.pct(local.pEst.within1), local.pEst.pct(local.pEst.exact), local.pEst.n) @@ -257,7 +266,8 @@ func TestGT_RiskBenchmark(t *testing.T) { oldConc, _ := kendallConcordance(overall.engineRisk, overall.gtRisk) newConc, pairs := kendallConcordance(overall.newEngineRisk, overall.gtRisk) t.Logf("\n=== Cross-GT aggregate ===") - t.Logf(" Severity S: MAE %.2f | within±1 %.0f%% | exact %.0f%% (n=%d)", overall.sev.mae(), overall.sev.pct(overall.sev.within1), overall.sev.pct(overall.sev.exact), overall.sev.n) + t.Logf(" Severity S (raw default): MAE %.2f | within±1 %.0f%% | exact %.0f%% (n=%d)", overall.sev.mae(), overall.sev.pct(overall.sev.within1), overall.sev.pct(overall.sev.exact), overall.sev.n) + t.Logf(" Severity S (NEW estimate): MAE %.2f | within±1 %.0f%% | exact %.0f%% (n=%d)", overall.sevEst.mae(), overall.sevEst.pct(overall.sevEst.within1), overall.sevEst.pct(overall.sevEst.exact), overall.sevEst.n) t.Logf(" Frequency F: MAE %.2f | within±1 %.0f%% | exact %.0f%% (n=%d)", overall.freq.mae(), overall.freq.pct(overall.freq.within1), overall.freq.pct(overall.freq.exact), overall.freq.n) t.Logf(" Probability W (NEW): MAE %.2f | within±1 %.0f%% | exact %.0f%% (n=%d)", overall.wEst.mae(), overall.wEst.pct(overall.wEst.within1), overall.wEst.pct(overall.wEst.exact), overall.wEst.n) t.Logf(" Avoidance P (NEW): MAE %.2f | within±1 %.0f%% | exact %.0f%% (n=%d)", overall.pEst.mae(), overall.pEst.pct(overall.pEst.within1), overall.pEst.pct(overall.pEst.exact), overall.pEst.n) diff --git a/ai-compliance-sdk/internal/iace/risk_estimation.go b/ai-compliance-sdk/internal/iace/risk_estimation.go index a9ead673..b2d19800 100644 --- a/ai-compliance-sdk/internal/iace/risk_estimation.go +++ b/ai-compliance-sdk/internal/iace/risk_estimation.go @@ -35,6 +35,10 @@ type contactMode struct { // Anchored to injury kinematics (sudden, no-warning events are hard to // avoid; gradual exposure is easy). OUR reasoning, no norm table. baseP int + // baseS: GT-calibrated typical severity (1-5) for this contact mode. Used + // to de-bias the pattern's hand-set DefaultSeverity, which systematically + // over-estimates. OUR calibrated scale, no norm table. + baseS int } // contactModeTable — our tiers. Initially anchored to the public ESAW @@ -46,19 +50,20 @@ type contactMode struct { // hard-code per-machine values into patterns. See DATA_SOURCES.md for the // public-data provenance and license. var contactModeTable = map[string]contactMode{ - "impact_stationary": {"impact_stationary", 3, 1}, // seen coming -> easy to avoid - "struck_by": {"struck_by", 2, 3}, // GT-calibrated (n=14) - "crushing": {"crushing", 2, 3}, // GT-calibrated (n=40) - "cutting": {"cutting", 2, 3}, - "entanglement": {"entanglement", 3, 3}, - "shearing": {"shearing", 2, 3}, - "fall": {"fall", 3, 4}, // higher avoidance difficulty in GT - "electrical": {"electrical", 2, 3}, // GT-calibrated (n=20) - "thermal": {"thermal", 2, 2}, - "ergonomic": {"ergonomic", 2, 3}, - "chemical": {"chemical", 2, 3}, - "pressure_burst": {"pressure_burst", 2, 3}, - "radiation": {"radiation", 2, 3}, + // name W P S (S = GT-calibrated typical severity) + "impact_stationary": {"impact_stationary", 3, 1, 2}, + "struck_by": {"struck_by", 2, 3, 3}, // GT n=14 (S̄ 2.5) + "crushing": {"crushing", 2, 3, 2}, // GT n=40 (S̄ 2.2) + "cutting": {"cutting", 2, 3, 3}, + "entanglement": {"entanglement", 3, 3, 3}, + "shearing": {"shearing", 2, 3, 3}, // GT n=4 (S̄ 3.2) + "fall": {"fall", 3, 4, 3}, + "electrical": {"electrical", 2, 3, 4}, // GT n=20 (S̄ 3.6) + "thermal": {"thermal", 2, 2, 2}, + "ergonomic": {"ergonomic", 2, 3, 2}, + "chemical": {"chemical", 2, 3, 2}, + "pressure_burst": {"pressure_burst", 2, 3, 2}, + "radiation": {"radiation", 2, 3, 3}, } // contactModeKeywords maps umlaut-normalised scenario keywords to a contact @@ -134,6 +139,33 @@ func EstimateAvoidabilityP(cats []string, scenario string) int { return 3 } +// EstimateSeverity de-biases the pattern's hand-set DefaultSeverity by blending +// it 50/50 with the contact mode's GT-calibrated typical severity (baseS). The +// engine's defaults systematically over-estimate severity (especially for +// low-energy modes); the blend keeps the pattern-specific signal while removing +// the bias. OUR model, no norm table. Falls back to the default when the mode +// is unknown. +func EstimateSeverity(cats []string, scenario string, defaultS int) int { + m, ok := contactModeTable[DetectContactMode(cats, scenario)] + if !ok || m.baseS == 0 { + if defaultS < 1 { + return 3 + } + return defaultS + } + if defaultS < 1 { + return m.baseS + } + s := (defaultS + m.baseS + 1) / 2 // 50/50 blend, round half up + if s > 5 { + s = 5 + } + if s < 1 { + s = 1 + } + return s +} + // EstimateRiskLevel combines the four parameters into BreakPilot's OWN risk // index and band. The index is a generic severity-weighted sum of the // likelihood factors — index = S * (F + W + P) — i.e. basic arithmetic on the