Files
breakpilot-compliance/ai-compliance-sdk/internal/iace/risk_estimation.go
T
Benjamin Admin 77536f04b7
CI / detect-changes (push) Successful in 8s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Failing after 4s
CI / validate-canonical-controls (push) Successful in 11s
CI / loc-budget (push) Successful in 14s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Failing after 38s
CI / iace-gt-coverage (push) Successful in 23s
CI / test-python-backend (push) Has been skipped
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
feat(iace): dual-model risk-suggestion endpoint for Risikobewertung tab
GET /projects/:id/hazards/:hid/risk-suggestion returns BreakPilot's justified
starting values for BOTH risk models per hazard:
- EN-62061-style F/W/P/S (the Excel format the professional knows)
- Fine-Kinney P/E/C (US-recognized)
each with a plain-language justification + the visible formula. Read-only and
computed from public-data anchors (ESAW/NIOSH/OSHA via the engine estimators) —
the professional adjusts the values; no norm table is stored or reproduced.

Adds EstimateFrequency (lifecycle -> 1-5) and BuildRiskSuggestion. Go SDK has no
OpenAPI baseline, so the only contract surface is the frontend consumer (the new
Risikobewertung tab, next).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-09 15:35:39 +02:00

217 lines
8.9 KiB
Go

package iace
import "strings"
// Risk parameter estimation — probability of occurrence (W) and possibility of
// avoidance (P) — for auto-generated hazards.
//
// COPYRIGHT / IP NOTE: This is BreakPilot's OWN heuristic model. It does NOT
// reproduce, transcribe or re-implement any DIN/Beuth/ISO/IEC risk-graph table,
// parameter decision tree, threshold or matrix. It derives values on OUR OWN
// 1-5 scale from (a) PUBLIC, permissively-licensed occupational-accident
// statistics organised by contact mode — primarily Eurostat ESAW (CC BY 4.0,
// commercial reuse permitted with source attribution); US BLS/OSHA (public
// domain) and UK HSE (Open Government Licence) are acceptable supplements —
// and (b) observable machine facts the engine already extracts (hazard
// category, scenario kinematics). The scale and weights are ours and are
// calibrated against our own ground-truth corpus, not copied from a standard.
// NOTE: DGUV statistics are NOT used — their terms permit only editorial use
// and forbid modification, so they are unsuitable for a commercial product.
// Provenance, exact figures used and attribution: see DATA_SOURCES.md.
//
// The universal risk DIMENSIONS (severity, frequency, probability, avoidance)
// are general engineering concepts, not protectable expression.
// contactMode is a coarse injury-mechanism class. ESAW/DGUV publish accident
// frequencies by such modes; we use that public ordering to anchor a relative
// probability tier, and the injury kinematics to anchor an avoidance tier.
type contactMode struct {
name string
// baseW: relative probability-of-occurrence tier (1-5). Anchored to the
// ESAW contact-mode frequency ranking (impact/struck-by/crush/cut are the
// most frequent; pressure-burst/radiation are rare). OUR calibrated scale.
baseW int
// baseP: avoidance-difficulty tier (1-5; higher = harder to avoid).
// Anchored to injury kinematics (sudden, no-warning events are hard to
// avoid; gradual exposure is easy). OUR reasoning, no norm table.
baseP int
// baseS: GT-calibrated typical severity (1-5) for this contact mode. Used
// to de-bias the pattern's hand-set DefaultSeverity, which systematically
// over-estimates. OUR calibrated scale, no norm table.
baseS int
}
// contactModeTable — our tiers. Initially anchored to the public ESAW
// contact-mode frequency ranking, then CALIBRATED against our own ground-truth
// corpus (the professional's W/P distribution per mode). The well-sampled modes
// (crushing n=40, electrical n=20, struck_by n=14) are set to the GT means;
// sparsely-sampled modes (n<=4) use conservative defaults to avoid overfitting
// to noise from a 2-GT sample. This is the single place to tune; never
// hard-code per-machine values into patterns. See DATA_SOURCES.md for the
// public-data provenance and license.
var contactModeTable = map[string]contactMode{
// name W P S (S = GT-calibrated typical severity)
"impact_stationary": {"impact_stationary", 3, 1, 2},
"struck_by": {"struck_by", 2, 3, 3}, // GT n=14 (S̄ 2.5)
"crushing": {"crushing", 2, 3, 2}, // GT n=40 (S̄ 2.2)
"cutting": {"cutting", 2, 3, 3},
"entanglement": {"entanglement", 3, 3, 3},
"shearing": {"shearing", 2, 3, 3}, // GT n=4 (S̄ 3.2)
"fall": {"fall", 3, 4, 3},
"electrical": {"electrical", 2, 3, 4}, // GT n=20 (S̄ 3.6)
"thermal": {"thermal", 2, 2, 2},
"ergonomic": {"ergonomic", 2, 3, 2},
"chemical": {"chemical", 2, 3, 2},
"pressure_burst": {"pressure_burst", 2, 3, 2},
"radiation": {"radiation", 2, 3, 3},
}
// contactModeKeywords maps umlaut-normalised scenario keywords to a contact
// mode. Order-independent; the first matching mode in detection order wins.
var contactModeKeywords = []struct {
mode string
keywords []string
}{
{"crushing", []string{"quetsch", "einklemm", "eingeklemmt", "klemm", "zerquetsch"}},
{"entanglement", []string{"einzug", "eingezogen", "erfasst", "aufwickel", "umwickel", "wickelt"}},
{"shearing", []string{"scher"}},
{"cutting", []string{"schneid", "schnitt", "scharfe kante", "abtrenn", "amputation", "stich"}},
{"electrical", []string{"stromschlag", "spannungsfuehr", "koerperdurchstroem", "beruehrungsspannung", "lichtbogen", "elektrisch"}},
{"thermal", []string{"verbrenn", "verbruehung", "heisse", "thermisch", "heisser"}},
{"pressure_burst", []string{"bersten", "hochdruck", "ueberdruck", "druckbehaelter", "injektion"}},
{"fall", []string{"sturz", "stuerz", "absturz", "ausrutsch", "stolper", "abstuerz"}},
{"struck_by", []string{"weggeschleudert", "geschleudert", "geschoss", "herabfallen", "herabstuerz", "getroffen", "wegfliegen", "fallende last", "schlag"}},
{"impact_stationary", []string{"anstossen", "anprall", "stossen gegen", "stoss gegen"}},
{"ergonomic", []string{"belastung", "ergonom", "zwangshaltung", "manuelles heben", "ueberlastung"}},
{"chemical", []string{"exposition", "gefahrstoff", "daempfe", "kontamination", "reizung", "aerosol", "vergiftung"}},
}
// categoryDefaultMode is the fallback contact mode per hazard category when the
// scenario text carries no specific kinematic keyword.
var categoryDefaultMode = map[string]string{
"mechanical_hazard": "crushing",
"electrical_hazard": "electrical",
"thermal_hazard": "thermal",
"chemical_hazard": "chemical",
"material_environmental": "chemical",
"ergonomic": "ergonomic",
"noise_vibration": "ergonomic",
"radiation_hazard": "radiation",
"fire_explosion": "thermal",
"pneumatic_hydraulic": "pressure_burst",
}
// DetectContactMode classifies a hazard's injury mechanism from its scenario
// text first, then its category. Returns the contact-mode key, or "" if none.
func DetectContactMode(cats []string, scenario string) string {
text := normalizeDE(scenario)
for _, e := range contactModeKeywords {
for _, kw := range e.keywords {
if strings.Contains(text, kw) {
return e.mode
}
}
}
for _, c := range cats {
if m, ok := categoryDefaultMode[c]; ok {
return m
}
}
return ""
}
// EstimateProbabilityW returns the probability-of-occurrence tier (1-5) for a
// hazard, anchored to the public accident-frequency ranking of its contact
// mode. Returns 3 (neutral) when the mode is unknown.
func EstimateProbabilityW(cats []string, scenario string) int {
if m, ok := contactModeTable[DetectContactMode(cats, scenario)]; ok {
return m.baseW
}
return 3
}
// EstimateAvoidabilityP returns the avoidance-difficulty tier (1-5; higher =
// harder to avoid) from the contact mode's kinematics. Returns 3 when unknown.
func EstimateAvoidabilityP(cats []string, scenario string) int {
if m, ok := contactModeTable[DetectContactMode(cats, scenario)]; ok {
return m.baseP
}
return 3
}
// EstimateSeverity de-biases the pattern's hand-set DefaultSeverity by blending
// it 50/50 with the contact mode's GT-calibrated typical severity (baseS). The
// engine's defaults systematically over-estimate severity (especially for
// low-energy modes); the blend keeps the pattern-specific signal while removing
// the bias. OUR model, no norm table. Falls back to the default when the mode
// is unknown.
func EstimateSeverity(cats []string, scenario string, defaultS int) int {
m, ok := contactModeTable[DetectContactMode(cats, scenario)]
if !ok || m.baseS == 0 {
if defaultS < 1 {
return 3
}
return defaultS
}
if defaultS < 1 {
return m.baseS
}
s := (defaultS + m.baseS + 1) / 2 // 50/50 blend, round half up
if s > 5 {
s = 5
}
if s < 1 {
s = 1
}
return s
}
// EstimateFrequency maps the active lifecycle phases to a 1-5 exposure-frequency
// value for the EN-62061-style model (how often a person is exposed to the
// task). Our own scale, no norm table.
func EstimateFrequency(phases []string) int {
has := func(n string) bool {
for _, p := range phases {
if strings.Contains(p, n) {
return true
}
}
return false
}
switch {
case has("normal_operation") || has("auto_operation") || has("manual_operation"):
return 4
case has("setup") || has("maintenance") || has("cleaning") || has("changeover"):
return 3
case len(phases) > 0:
return 2
default:
return 3
}
}
// EstimateRiskLevel combines the four parameters into BreakPilot's OWN risk
// index and band. The index is a generic severity-weighted sum of the
// likelihood factors — index = S * (F + W + P) — i.e. basic arithmetic on the
// universal risk dimensions. It is NOT a reproduction of any standard's
// risk graph, parameter table or SIL/PL matrix. The bands are ours, tuned to
// our ground-truth corpus. Returns (index 3..75, German level label).
func EstimateRiskLevel(s, f, w, p int) (int, string) {
if s < 1 {
s = 1
}
idx := s * (f + w + p)
switch {
case idx >= 45:
return idx, "kritisch"
case idx >= 30:
return idx, "hoch"
case idx >= 18:
return idx, "mittel"
case idx >= 9:
return idx, "gering"
default:
return idx, "vernachlaessigbar"
}
}