breakpilot-compliance/ai-compliance-sdk/internal/iace/risk_benchmark.go

package iace

// Risk-number comparison for the benchmark: for every matched hazard, the
// tool's risk parameters (EN-62061-style S/F/W/P + Fine-Kinney) next to the
// professional's GT values, plus aggregate agreement. Used by the benchmark
// endpoint so the Risikobewertung comparison is visible in the tab.

// RiskComparisonPair is one matched hazard's tool-vs-professional risk numbers.
type RiskComparisonPair struct {
	HazardName     string  `json:"hazard_name"`
	GTSeverity     int     `json:"gt_severity"`
	GTFrequency    int     `json:"gt_frequency"`
	GTProbability  int     `json:"gt_probability"` // GT column W
	GTAvoidance    int     `json:"gt_avoidance"`   // GT column P
	GTRisk         int     `json:"gt_risk"`        // GT column R
	EngSeverity    int     `json:"eng_severity"`
	EngFrequency   int     `json:"eng_frequency"`
	EngProbability int     `json:"eng_probability"`
	EngAvoidance   int     `json:"eng_avoidance"`
	FKScore        float64 `json:"fk_score"`
	FKBand         string  `json:"fk_band"`
	// Confidence-aware risk: a point estimate plus a plausible low/high band and
	// a confidence label, so the tool reports a RANGE (not a false-precision
	// point) — the assessment is confirmed by the DSB / safety expert.
	EngRiskPoint      int    `json:"eng_risk_point"`
	EngRiskLow        int    `json:"eng_risk_low"`
	EngRiskHigh       int    `json:"eng_risk_high"`
	EngRiskLevel      string `json:"eng_risk_level"`       // band of the point value
	EngRiskLevelRange string `json:"eng_risk_level_range"` // e.g. "mittel–hoch"
	Confidence        string `json:"confidence"`          // hoch / mittel / niedrig
}

// RiskAgreement aggregates how close the tool's risk numbers are to the GT.
type RiskAgreement struct {
	N                  int     `json:"n"`
	SeverityWithin1    float64 `json:"severity_within1"`
	FrequencyWithin1   float64 `json:"frequency_within1"`
	ProbabilityWithin1 float64 `json:"probability_within1"`
	AvoidanceWithin1   float64 `json:"avoidance_within1"`
	RankConcordance    float64 `json:"rank_concordance"`   // Fine-Kinney vs GT R
	HighConfidencePct  float64 `json:"high_confidence_pct"` // share of matched hazards with "hoch" confidence
}

// ComputeRiskComparison derives the tool's risk numbers for each matched hazard
// and compares them to the professional's GT values.
func ComputeRiskComparison(matched []HazardMatchPair) ([]RiskComparisonPair, RiskAgreement) {
	pairs := make([]RiskComparisonPair, 0, len(matched))
	var sevOK, freqOK, probOK, avoidOK, n, hiConf int
	var engFK, gtR []float64

	for _, m := range matched {
		eh := m.EngineHazard
		cats := []string{eh.Category}
		scenario := eh.Scenario
		if scenario == "" {
			scenario = eh.Name
		}
		lifecycle := splitLifecyclePhases(eh.LifecyclePhase)

		engS := EstimateSeverity(cats, scenario, 0)
		engF := EstimateFrequency(lifecycle)
		engW := EstimateProbabilityW(cats, scenario)
		engP := EstimateAvoidabilityP(cats, scenario)
		fk := SuggestFineKinney(cats, scenario, lifecycle, 0)
		gt := m.GTEntry.RiskIn

		rLow, rPoint, rHigh := EstimateRiskRange(engS, engF, engW, engP)
		rLevel, rLevelRange := RiskLevelRange(rLow, rPoint, rHigh)
		conf := EstimateConfidence(cats, scenario)
		if conf == "hoch" {
			hiConf++
		}

		pairs = append(pairs, RiskComparisonPair{
			HazardName:     m.GTEntry.HazardType,
			GTSeverity:     gt.S, GTFrequency: gt.F, GTProbability: gt.W, GTAvoidance: gt.P, GTRisk: gt.R,
			EngSeverity:    engS, EngFrequency: engF, EngProbability: engW, EngAvoidance: engP,
			FKScore:        fk.Score, FKBand: fk.Band,
			EngRiskPoint:   rPoint, EngRiskLow: rLow, EngRiskHigh: rHigh,
			EngRiskLevel:   rLevel, EngRiskLevelRange: rLevelRange, Confidence: conf,
		})

		if gt.S > 0 {
			n++
			if abs(engS-gt.S) <= 1 {
				sevOK++
			}
			if gt.F > 0 && abs(engF-gt.F) <= 1 {
				freqOK++
			}
			if gt.W > 0 && abs(engW-gt.W) <= 1 {
				probOK++
			}
			if gt.P > 0 && abs(engP-gt.P) <= 1 {
				avoidOK++
			}
			engFK = append(engFK, fk.Score)
			gtR = append(gtR, float64(gt.R))
		}
	}

	agg := RiskAgreement{N: n}
	if n > 0 {
		agg.SeverityWithin1 = pct(sevOK, n)
		agg.FrequencyWithin1 = pct(freqOK, n)
		agg.ProbabilityWithin1 = pct(probOK, n)
		agg.AvoidanceWithin1 = pct(avoidOK, n)
		agg.RankConcordance = rankConcordance(engFK, gtR)
	}
	if len(pairs) > 0 {
		agg.HighConfidencePct = pct(hiConf, len(pairs))
	}
	return pairs, agg
}

func abs(x int) int {
	if x < 0 {
		return -x
	}
	return x
}

func pct(x, total int) float64 {
	if total == 0 {
		return 0
	}
	return 100 * float64(x) / float64(total)
}

// rankConcordance returns the fraction of comparable hazard pairs the tool
// orders the same way the professional does (scale-invariant, 0.5 = random).
func rankConcordance(a, b []float64) float64 {
	concordant, discordant := 0, 0
	for i := 0; i < len(a); i++ {
		for j := i + 1; j < len(a); j++ {
			da, db := a[i]-a[j], b[i]-b[j]
			if da == 0 || db == 0 {
				continue
			}
			if (da > 0) == (db > 0) {
				concordant++
			} else {
				discordant++
			}
		}
	}
	if concordant+discordant == 0 {
		return 0
	}
	return 100 * float64(concordant) / float64(concordant+discordant)
}