Files
breakpilot-compliance/ai-compliance-sdk/internal/iace/risk_estimation.go
T
Benjamin Admin 0f443b6a9c fix(iace): roadmap group B — citation/license/tier cleanup
C1: drop the misleading OSHA §1910.212(a)(5) fan-guard citation from M602
    (overhead lift clearance) — EN 349 + EN ISO 13854 already cover it.
C2: frame M237's 25/500 mm as Richtwerte to be determined per EN ISO 13854
    (single factual values in prose are facts, not table reproduction — but
    keep the conservative caveat).
C3: keep ergonomic W=2 deliberately and document why — ESAW ranks it the most
    frequent non-fatal mode (24.7%) but that population doesn't transfer to an
    acute machine point-hazard; the machine GT governs.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-11 15:21:25 +02:00

300 lines
12 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package iace
import "strings"
// Risk parameter estimation — probability of occurrence (W) and possibility of
// avoidance (P) — for auto-generated hazards.
//
// COPYRIGHT / IP NOTE: This is BreakPilot's OWN heuristic model. It does NOT
// reproduce, transcribe or re-implement any DIN/Beuth/ISO/IEC risk-graph table,
// parameter decision tree, threshold or matrix. It derives values on OUR OWN
// 1-5 scale from (a) PUBLIC, permissively-licensed occupational-accident
// statistics organised by contact mode — primarily Eurostat ESAW (CC BY 4.0,
// commercial reuse permitted with source attribution); US BLS/OSHA (public
// domain) and UK HSE (Open Government Licence) are acceptable supplements —
// and (b) observable machine facts the engine already extracts (hazard
// category, scenario kinematics). The scale and weights are ours and are
// calibrated against our own ground-truth corpus, not copied from a standard.
// NOTE: DGUV statistics are NOT used — their terms permit only editorial use
// and forbid modification, so they are unsuitable for a commercial product.
// Provenance, exact figures used and attribution: see DATA_SOURCES.md.
//
// The universal risk DIMENSIONS (severity, frequency, probability, avoidance)
// are general engineering concepts, not protectable expression.
// contactMode is a coarse injury-mechanism class. ESAW/DGUV publish accident
// frequencies by such modes; we use that public ordering to anchor a relative
// probability tier, and the injury kinematics to anchor an avoidance tier.
type contactMode struct {
name string
// baseW: relative probability-of-occurrence tier (1-5). Anchored to the
// ESAW contact-mode frequency ranking (impact/struck-by/crush/cut are the
// most frequent; pressure-burst/radiation are rare). OUR calibrated scale.
baseW int
// baseP: avoidance-difficulty tier (1-5; higher = harder to avoid).
// Anchored to injury kinematics (sudden, no-warning events are hard to
// avoid; gradual exposure is easy). OUR reasoning, no norm table.
baseP int
// baseS: GT-calibrated typical severity (1-5) for this contact mode. Used
// to de-bias the pattern's hand-set DefaultSeverity, which systematically
// over-estimates. OUR calibrated scale, no norm table.
baseS int
}
// contactModeTable — our tiers. Initially anchored to the public ESAW
// contact-mode frequency ranking, then CALIBRATED against our own ground-truth
// corpus (the professional's W/P distribution per mode). The well-sampled modes
// (crushing n=40, electrical n=20, struck_by n=14) are set to the GT means;
// sparsely-sampled modes (n<=4) use conservative defaults to avoid overfitting
// to noise from a 2-GT sample. This is the single place to tune; never
// hard-code per-machine values into patterns. See DATA_SOURCES.md for the
// public-data provenance and license.
var contactModeTable = map[string]contactMode{
// name W P S (S = GT-calibrated typical severity)
"impact_stationary": {"impact_stationary", 3, 1, 2},
"struck_by": {"struck_by", 2, 3, 3}, // GT n=14 (S̄ 2.5)
"crushing": {"crushing", 2, 3, 2}, // GT n=40 (S̄ 2.2)
"cutting": {"cutting", 2, 3, 3},
"entanglement": {"entanglement", 3, 3, 3},
"shearing": {"shearing", 2, 3, 3}, // GT n=4 (S̄ 3.2)
"fall": {"fall", 3, 4, 3},
"electrical": {"electrical", 2, 3, 4}, // GT n=20 (S̄ 3.6)
"thermal": {"thermal", 2, 2, 2},
// W stays 2 deliberately: ESAW 2023 ranks ergonomic/stress the most frequent
// NON-FATAL contact mode (24.7%), but that population mixes overexertion across
// all workplaces and does not transfer to an ACUTE machine point-hazard. The
// machine-specific GT governs; bumping W to 3 would import a non-representative
// signal. See risk_data_sources.go / project_iace_risk_stats_and_distances.
"ergonomic": {"ergonomic", 2, 3, 2},
"chemical": {"chemical", 2, 3, 2},
"pressure_burst": {"pressure_burst", 2, 3, 2},
"radiation": {"radiation", 2, 3, 3},
}
// contactModeKeywords maps umlaut-normalised scenario keywords to a contact
// mode. Order-independent; the first matching mode in detection order wins.
var contactModeKeywords = []struct {
mode string
keywords []string
}{
{"crushing", []string{"quetsch", "einklemm", "eingeklemmt", "klemm", "zerquetsch"}},
{"entanglement", []string{"einzug", "eingezogen", "erfasst", "aufwickel", "umwickel", "wickelt"}},
{"shearing", []string{"scher"}},
{"cutting", []string{"schneid", "schnitt", "scharfe kante", "abtrenn", "amputation", "stich"}},
{"electrical", []string{"stromschlag", "spannungsfuehr", "koerperdurchstroem", "beruehrungsspannung", "lichtbogen", "elektrisch"}},
{"thermal", []string{"verbrenn", "verbruehung", "heisse", "thermisch", "heisser"}},
{"pressure_burst", []string{"bersten", "hochdruck", "ueberdruck", "druckbehaelter", "injektion"}},
{"fall", []string{"sturz", "stuerz", "absturz", "ausrutsch", "stolper", "abstuerz"}},
{"struck_by", []string{"weggeschleudert", "geschleudert", "geschoss", "herabfallen", "herabstuerz", "getroffen", "wegfliegen", "fallende last", "schlag"}},
{"impact_stationary", []string{"anstossen", "anprall", "stossen gegen", "stoss gegen"}},
{"ergonomic", []string{"belastung", "ergonom", "zwangshaltung", "manuelles heben", "ueberlastung"}},
{"chemical", []string{"exposition", "gefahrstoff", "daempfe", "kontamination", "reizung", "aerosol", "vergiftung"}},
}
// categoryDefaultMode is the fallback contact mode per hazard category when the
// scenario text carries no specific kinematic keyword.
var categoryDefaultMode = map[string]string{
"mechanical_hazard": "crushing",
"electrical_hazard": "electrical",
"thermal_hazard": "thermal",
"chemical_hazard": "chemical",
"material_environmental": "chemical",
"ergonomic": "ergonomic",
"noise_vibration": "ergonomic",
"radiation_hazard": "radiation",
"fire_explosion": "thermal",
"pneumatic_hydraulic": "pressure_burst",
}
// DetectContactMode classifies a hazard's injury mechanism from its scenario
// text first, then its category. Returns the contact-mode key, or "" if none.
func DetectContactMode(cats []string, scenario string) string {
text := normalizeDE(scenario)
for _, e := range contactModeKeywords {
for _, kw := range e.keywords {
if strings.Contains(text, kw) {
return e.mode
}
}
}
for _, c := range cats {
if m, ok := categoryDefaultMode[c]; ok {
return m
}
}
return ""
}
// EstimateProbabilityW returns the probability-of-occurrence tier (1-5) for a
// hazard, anchored to the public accident-frequency ranking of its contact
// mode. Returns 3 (neutral) when the mode is unknown.
func EstimateProbabilityW(cats []string, scenario string) int {
if m, ok := contactModeTable[DetectContactMode(cats, scenario)]; ok {
return m.baseW
}
return 3
}
// EstimateAvoidabilityP returns the avoidance-difficulty tier (1-5; higher =
// harder to avoid) from the contact mode's kinematics. Returns 3 when unknown.
func EstimateAvoidabilityP(cats []string, scenario string) int {
if m, ok := contactModeTable[DetectContactMode(cats, scenario)]; ok {
return m.baseP
}
return 3
}
// EstimateSeverity de-biases the pattern's hand-set DefaultSeverity by blending
// it 50/50 with the contact mode's GT-calibrated typical severity (baseS). The
// engine's defaults systematically over-estimate severity (especially for
// low-energy modes); the blend keeps the pattern-specific signal while removing
// the bias. OUR model, no norm table. Falls back to the default when the mode
// is unknown.
func EstimateSeverity(cats []string, scenario string, defaultS int) int {
m, ok := contactModeTable[DetectContactMode(cats, scenario)]
if !ok || m.baseS == 0 {
if defaultS < 1 {
return 3
}
return defaultS
}
if defaultS < 1 {
return m.baseS
}
s := (defaultS + m.baseS + 1) / 2 // 50/50 blend, round half up
if s > 5 {
s = 5
}
if s < 1 {
s = 1
}
return s
}
// EstimateFrequency maps the active lifecycle phases to a 1-5 exposure-frequency
// value for the EN-62061-style model (how often a person is exposed to the
// task). Our own scale, no norm table.
func EstimateFrequency(phases []string) int {
has := func(n string) bool {
for _, p := range phases {
if strings.Contains(p, n) {
return true
}
}
return false
}
// Calibrated to the professional's scale: the GT assigns lower exposure
// frequencies than a naive "operating = high" mapping. Normal operation is
// 3 (regular exposure), occasional phases (setup/maintenance/cleaning) 2,
// otherwise 2. (Engine F was systematically ~1 too high vs the GT.)
switch {
case has("normal_operation") || has("auto_operation") || has("manual_operation"):
return 3
case has("setup") || has("maintenance") || has("cleaning") || has("changeover"):
return 2
default:
return 2
}
}
// EstimateRiskLevel combines the four parameters into BreakPilot's OWN risk
// index and band. The index is a generic severity-weighted sum of the
// likelihood factors — index = S * (F + W + P) — i.e. basic arithmetic on the
// universal risk dimensions. It is NOT a reproduction of any standard's
// risk graph, parameter table or SIL/PL matrix. The bands are ours, tuned to
// our ground-truth corpus. Returns (index 3..75, German level label).
func EstimateRiskLevel(s, f, w, p int) (int, string) {
if s < 1 {
s = 1
}
idx := s * (f + w + p)
return idx, riskBandLabel(idx)
}
// riskBandLabel maps a risk index (3..75) to BreakPilot's German level band.
// Single source of truth for the thresholds, shared by EstimateRiskLevel and
// the confidence-range derivation.
func riskBandLabel(idx int) string {
switch {
case idx >= 45:
return "kritisch"
case idx >= 30:
return "hoch"
case idx >= 18:
return "mittel"
case idx >= 9:
return "gering"
default:
return "vernachlaessigbar"
}
}
func clampRisk1to5(x int) int {
if x < 1 {
return 1
}
if x > 5 {
return 5
}
return x
}
// EstimateConfidence reports how well-anchored the tool's risk parameters are,
// from HOW the injury mechanism (contact mode) was resolved: an explicit
// scenario keyword → "hoch" (strong kinematic signal), a category fallback →
// "mittel", nothing → "niedrig" (parameters fell back to neutral). This is an
// honest signal that the point estimate is a heuristic, not a guarantee — the
// final assessment stays with the DSB / safety expert.
func EstimateConfidence(cats []string, scenario string) string {
text := normalizeDE(scenario)
for _, e := range contactModeKeywords {
for _, kw := range e.keywords {
if strings.Contains(text, kw) {
return "hoch"
}
}
}
for _, c := range cats {
if _, ok := categoryDefaultMode[c]; ok {
return "mittel"
}
}
return "niedrig"
}
// EstimateRiskRange returns the point risk index plus a plausible low/high band.
// The band shifts severity S by ±1 and the aggregate likelihood L = F+W+P by ±1
// (each within its domain). We move L as a whole rather than each of F/W/P
// independently because the validation shows the per-parameter errors largely
// cancel in the sum (W is within ±1 of the GT ~100% of the time). The result
// communicates that the risk number is an ESTIMATE with uncertainty rather than
// a false-precision point value — aligned with the confidence-aware tonality.
func EstimateRiskRange(s, f, w, p int) (low, point, high int) {
s = clampRisk1to5(s)
l := clampRisk1to5(f) + clampRisk1to5(w) + clampRisk1to5(p) // 3..15
clampL := func(x int) int {
if x < 3 {
return 3
}
if x > 15 {
return 15
}
return x
}
point = s * l
low = clampRisk1to5(s-1) * clampL(l-1)
high = clampRisk1to5(s+1) * clampL(l+1)
return low, point, high
}
// RiskLevelRange returns the German level band for the point plus a combined
// "lowhigh" range label (single label when low and high fall in the same band).
func RiskLevelRange(low, point, high int) (level, levelRange string) {
level = riskBandLabel(point)
ll, lh := riskBandLabel(low), riskBandLabel(high)
if ll == lh {
return level, ll
}
return level, ll + "" + lh // en dash
}