breakpilot-compliance/ai-compliance-sdk/internal/iace/risk_estimation.go

package iace

import "strings"

// Risk parameter estimation — probability of occurrence (W) and possibility of
// avoidance (P) — for auto-generated hazards.
//
// COPYRIGHT / IP NOTE: This is BreakPilot's OWN heuristic model. It does NOT
// reproduce, transcribe or re-implement any DIN/Beuth/ISO/IEC risk-graph table,
// parameter decision tree, threshold or matrix. It derives values on OUR OWN
// 1-5 scale from (a) PUBLIC, permissively-licensed occupational-accident
// statistics organised by contact mode — primarily Eurostat ESAW (CC BY 4.0,
// commercial reuse permitted with source attribution); US BLS/OSHA (public
// domain) and UK HSE (Open Government Licence) are acceptable supplements —
// and (b) observable machine facts the engine already extracts (hazard
// category, scenario kinematics). The scale and weights are ours and are
// calibrated against our own ground-truth corpus, not copied from a standard.
// NOTE: DGUV statistics are NOT used — their terms permit only editorial use
// and forbid modification, so they are unsuitable for a commercial product.
// Provenance, exact figures used and attribution: see DATA_SOURCES.md.
//
// The universal risk DIMENSIONS (severity, frequency, probability, avoidance)
// are general engineering concepts, not protectable expression.

// contactMode is a coarse injury-mechanism class. ESAW/DGUV publish accident
// frequencies by such modes; we use that public ordering to anchor a relative
// probability tier, and the injury kinematics to anchor an avoidance tier.
type contactMode struct {
	name string
	// baseW: relative probability-of-occurrence tier (1-5). Anchored to the
	// ESAW contact-mode frequency ranking (impact/struck-by/crush/cut are the
	// most frequent; pressure-burst/radiation are rare). OUR calibrated scale.
	baseW int
	// baseP: avoidance-difficulty tier (1-5; higher = harder to avoid).
	// Anchored to injury kinematics (sudden, no-warning events are hard to
	// avoid; gradual exposure is easy). OUR reasoning, no norm table.
	baseP int
}

// contactModeTable — our tiers. Initially anchored to the public ESAW
// contact-mode frequency ranking, then CALIBRATED against our own ground-truth
// corpus (the professional's W/P distribution per mode). The well-sampled modes
// (crushing n=40, electrical n=20, struck_by n=14) are set to the GT means;
// sparsely-sampled modes (n<=4) use conservative defaults to avoid overfitting
// to noise from a 2-GT sample. This is the single place to tune; never
// hard-code per-machine values into patterns. See DATA_SOURCES.md for the
// public-data provenance and license.
var contactModeTable = map[string]contactMode{
	"impact_stationary": {"impact_stationary", 3, 1}, // seen coming -> easy to avoid
	"struck_by":         {"struck_by", 2, 3},         // GT-calibrated (n=14)
	"crushing":          {"crushing", 2, 3},          // GT-calibrated (n=40)
	"cutting":           {"cutting", 2, 3},
	"entanglement":      {"entanglement", 3, 3},
	"shearing":          {"shearing", 2, 3},
	"fall":              {"fall", 3, 4}, // higher avoidance difficulty in GT
	"electrical":        {"electrical", 2, 3}, // GT-calibrated (n=20)
	"thermal":           {"thermal", 2, 2},
	"ergonomic":         {"ergonomic", 2, 3},
	"chemical":          {"chemical", 2, 3},
	"pressure_burst":    {"pressure_burst", 2, 3},
	"radiation":         {"radiation", 2, 3},
}

// contactModeKeywords maps umlaut-normalised scenario keywords to a contact
// mode. Order-independent; the first matching mode in detection order wins.
var contactModeKeywords = []struct {
	mode     string
	keywords []string
}{
	{"crushing", []string{"quetsch", "einklemm", "eingeklemmt", "klemm", "zerquetsch"}},
	{"entanglement", []string{"einzug", "eingezogen", "erfasst", "aufwickel", "umwickel", "wickelt"}},
	{"shearing", []string{"scher"}},
	{"cutting", []string{"schneid", "schnitt", "scharfe kante", "abtrenn", "amputation", "stich"}},
	{"electrical", []string{"stromschlag", "spannungsfuehr", "koerperdurchstroem", "beruehrungsspannung", "lichtbogen", "elektrisch"}},
	{"thermal", []string{"verbrenn", "verbruehung", "heisse", "thermisch", "heisser"}},
	{"pressure_burst", []string{"bersten", "hochdruck", "ueberdruck", "druckbehaelter", "injektion"}},
	{"fall", []string{"sturz", "stuerz", "absturz", "ausrutsch", "stolper", "abstuerz"}},
	{"struck_by", []string{"weggeschleudert", "geschleudert", "geschoss", "herabfallen", "herabstuerz", "getroffen", "wegfliegen", "fallende last", "schlag"}},
	{"impact_stationary", []string{"anstossen", "anprall", "stossen gegen", "stoss gegen"}},
	{"ergonomic", []string{"belastung", "ergonom", "zwangshaltung", "manuelles heben", "ueberlastung"}},
	{"chemical", []string{"exposition", "gefahrstoff", "daempfe", "kontamination", "reizung", "aerosol", "vergiftung"}},
}

// categoryDefaultMode is the fallback contact mode per hazard category when the
// scenario text carries no specific kinematic keyword.
var categoryDefaultMode = map[string]string{
	"mechanical_hazard":   "crushing",
	"electrical_hazard":   "electrical",
	"thermal_hazard":      "thermal",
	"chemical_hazard":     "chemical",
	"material_environmental": "chemical",
	"ergonomic":           "ergonomic",
	"noise_vibration":     "ergonomic",
	"radiation_hazard":    "radiation",
	"fire_explosion":      "thermal",
	"pneumatic_hydraulic": "pressure_burst",
}

// DetectContactMode classifies a hazard's injury mechanism from its scenario
// text first, then its category. Returns the contact-mode key, or "" if none.
func DetectContactMode(cats []string, scenario string) string {
	text := normalizeDE(scenario)
	for _, e := range contactModeKeywords {
		for _, kw := range e.keywords {
			if strings.Contains(text, kw) {
				return e.mode
			}
		}
	}
	for _, c := range cats {
		if m, ok := categoryDefaultMode[c]; ok {
			return m
		}
	}
	return ""
}

// EstimateProbabilityW returns the probability-of-occurrence tier (1-5) for a
// hazard, anchored to the public accident-frequency ranking of its contact
// mode. Returns 3 (neutral) when the mode is unknown.
func EstimateProbabilityW(cats []string, scenario string) int {
	if m, ok := contactModeTable[DetectContactMode(cats, scenario)]; ok {
		return m.baseW
	}
	return 3
}

// EstimateAvoidabilityP returns the avoidance-difficulty tier (1-5; higher =
// harder to avoid) from the contact mode's kinematics. Returns 3 when unknown.
func EstimateAvoidabilityP(cats []string, scenario string) int {
	if m, ok := contactModeTable[DetectContactMode(cats, scenario)]; ok {
		return m.baseP
	}
	return 3
}

// EstimateRiskLevel combines the four parameters into BreakPilot's OWN risk
// index and band. The index is a generic severity-weighted sum of the
// likelihood factors — index = S * (F + W + P) — i.e. basic arithmetic on the
// universal risk dimensions. It is NOT a reproduction of any standard's
// risk graph, parameter table or SIL/PL matrix. The bands are ours, tuned to
// our ground-truth corpus. Returns (index 3..75, German level label).
func EstimateRiskLevel(s, f, w, p int) (int, string) {
	if s < 1 {
		s = 1
	}
	idx := s * (f + w + p)
	switch {
	case idx >= 45:
		return idx, "kritisch"
	case idx >= 30:
		return idx, "hoch"
	case idx >= 18:
		return idx, "mittel"
	case idx >= 9:
		return idx, "gering"
	default:
		return idx, "vernachlaessigbar"
	}
}