From a7dc12f30fe466ff284e87f2bd931af759454a80 Mon Sep 17 00:00:00 2001
From: Benjamin Admin <benjaminadmin@MacBook-Pro.local>
Date: Wed, 10 Jun 2026 23:04:56 +0200
Subject: [PATCH] feat(iace): risk as confidence range + label in benchmark tab
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Report the tool's risk number as a plausible range with a confidence
label instead of a false-precision point value (confidence-aware
tonality — the assessment is confirmed by the DSB / safety expert).

- risk_estimation.go: EstimateConfidence (hoch/mittel/niedrig from how the
  contact mode resolved), EstimateRiskRange (S±1 and aggregate L=F+W+P ±1,
  the empirically validated per-parameter accuracy), RiskLevelRange; share
  the riskBandLabel thresholds with EstimateRiskLevel.
- risk_benchmark.go: RiskComparisonPair gains eng_risk_point/low/high +
  level + level_range + confidence; RiskAgreement gains high_confidence_pct.
- RiskComparison.tsx: per-hazard range "low–high (level range)" + point,
  confidence chip, and an aggregate confidence line; types in useBenchmark.ts.
- Unit tests for the range/confidence helpers.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .../benchmark/_components/RiskComparison.tsx  | 88 ++++++++++++-------
 .../benchmark/_hooks/useBenchmark.ts          |  4 +
 .../internal/iace/risk_benchmark.go           | 26 +++++-
 .../internal/iace/risk_estimation.go          | 86 ++++++++++++++++--
 .../iace/risk_estimation_range_test.go        | 58 ++++++++++++
 5 files changed, 225 insertions(+), 37 deletions(-)
 create mode 100644 ai-compliance-sdk/internal/iace/risk_estimation_range_test.go
diff --git a/admin-compliance/app/sdk/iace/[projectId]/benchmark/_components/RiskComparison.tsx b/admin-compliance/app/sdk/iace/[projectId]/benchmark/_components/RiskComparison.tsx
index 44e4aecb..4b8d2753 100644
--- a/admin-compliance/app/sdk/iace/[projectId]/benchmark/_components/RiskComparison.tsx
+++ b/admin-compliance/app/sdk/iace/[projectId]/benchmark/_components/RiskComparison.tsx
@@ -17,6 +17,13 @@ function ampelBand(band: string): Ampel {
   return 'green'
 }
 
+// Tool confidence (how well-anchored the estimate is) → chip color.
+function ampelConfidence(c: string): Ampel {
+  if (c === 'hoch') return 'green'
+  if (c === 'mittel') return 'yellow'
+  return 'red'
+}
+
 const cellColor: Record<Ampel, string> = {
   red: 'bg-red-100 text-red-700 dark:bg-red-900/40 dark:text-red-300',
   yellow: 'bg-yellow-100 text-yellow-700 dark:bg-yellow-900/40 dark:text-yellow-300',
@@ -47,18 +54,28 @@ export function RiskComparison({ pairs, agreement }: { pairs?: RiskComparisonPai
       <div>
         <h3 className="text-sm font-semibold text-gray-700 dark:text-gray-300">Risikozahlen-Vergleich (Fachmann vs. Tool)</h3>
         <p className="text-xs text-gray-500 mt-0.5">
-          R = S × (F + W + P), Ampel wie in der Excel. Fine-Kinney (P×E×C) als zweite, US-anerkannte Bewertung.
+          R = S × (F + W + P), Ampel wie in der Excel. Das Tool nennt einen <strong>Schätzbereich</strong>{' '}
+          (nicht einen exakten Punktwert) plus Konfidenz — die endgültige Bewertung trifft der/die Sachverständige.
+          Fine-Kinney (P×E×C) als zweite, US-anerkannte Bewertung.
         </p>
       </div>
 
       {agreement && agreement.n > 0 && (
-        <div className="grid grid-cols-2 md:grid-cols-5 gap-3">
-          <Stat label="Schwere S ±1" pct={agreement.severity_within1} />
-          <Stat label="Haeufigkeit F ±1" pct={agreement.frequency_within1} />
-          <Stat label="Wahrsch. W ±1" pct={agreement.probability_within1} />
-          <Stat label="Vermeidb. P ±1" pct={agreement.avoidance_within1} />
-          <Stat label="Ranking (FK)" pct={agreement.rank_concordance} />
-        </div>
+        <>
+          <div className="grid grid-cols-2 md:grid-cols-5 gap-3">
+            <Stat label="Schwere S ±1" pct={agreement.severity_within1} />
+            <Stat label="Haeufigkeit F ±1" pct={agreement.frequency_within1} />
+            <Stat label="Wahrsch. W ±1" pct={agreement.probability_within1} />
+            <Stat label="Vermeidb. P ±1" pct={agreement.avoidance_within1} />
+            <Stat label="Ranking (FK)" pct={agreement.rank_concordance} />
+          </div>
+          {typeof agreement.high_confidence_pct === 'number' && (
+            <p className="text-xs text-gray-500">
+              Tool-Konfidenz: <strong>{Math.round(agreement.high_confidence_pct)}%</strong> der erkannten
+              Gefaehrdungen mit hoher Konfidenz (Verletzungsmechanismus eindeutig aus dem Szenario ableitbar).
+            </p>
+          )}
+        </>
       )}
 
       <div className="overflow-x-auto">
@@ -67,31 +84,42 @@ export function RiskComparison({ pairs, agreement }: { pairs?: RiskComparisonPai
             <tr className="text-gray-500 border-b border-gray-200 dark:border-gray-700">
               <th className="text-left py-1.5 px-2">Gefaehrdung</th>
               <th className="px-1 text-center" colSpan={5}>Fachmann · S F W P <strong>R</strong></th>
-              <th className="px-1 text-center border-l border-gray-200 dark:border-gray-700" colSpan={5}>Tool · S F W P <strong>R</strong> / FK</th>
+              <th className="px-1 text-center border-l border-gray-200 dark:border-gray-700" colSpan={4}>Tool · S F W P</th>
+              <th className="px-1 text-center border-l border-gray-200 dark:border-gray-700">Risiko (Schätzbereich) / FK</th>
+              <th className="px-1 text-center border-l border-gray-200 dark:border-gray-700">Konfidenz</th>
             </tr>
           </thead>
           <tbody>
-            {pairs.map((p, i) => {
-              const engR = p.eng_severity * (p.eng_frequency + p.eng_probability + p.eng_avoidance)
-              return (
-                <tr key={i} className="border-b border-gray-100 dark:border-gray-700/50">
-                  <td className="py-1 px-2 text-gray-700 dark:text-gray-300">{p.hazard_name || '—'}</td>
-                  <td className="text-center text-gray-500">{p.gt_severity}</td>
-                  <td className="text-center text-gray-500">{p.gt_frequency}</td>
-                  <td className="text-center text-gray-500">{p.gt_probability}</td>
-                  <td className="text-center text-gray-500">{p.gt_avoidance}</td>
-                  <td className={`text-center font-bold rounded ${cellColor[ampelEN(p.gt_risk)]}`}>{p.gt_risk}</td>
-                  <td className="text-center text-gray-500 border-l border-gray-200 dark:border-gray-700">{p.eng_severity}</td>
-                  <td className="text-center text-gray-500">{p.eng_frequency}</td>
-                  <td className="text-center text-gray-500">{p.eng_probability}</td>
-                  <td className="text-center text-gray-500">{p.eng_avoidance}</td>
-                  <td className="text-center">
-                    <span className={`inline-block font-bold rounded px-1.5 ${cellColor[ampelEN(engR)]}`}>{engR}</span>
-                    <span className={`ml-1 inline-block rounded px-1 ${cellColor[ampelBand(p.fk_band)]}`} title={`Fine-Kinney ${p.fk_band}`}>FK&nbsp;{Math.round(p.fk_score)}</span>
-                  </td>
-                </tr>
-              )
-            })}
+            {pairs.map((p, i) => (
+              <tr key={i} className="border-b border-gray-100 dark:border-gray-700/50">
+                <td className="py-1 px-2 text-gray-700 dark:text-gray-300">{p.hazard_name || '—'}</td>
+                <td className="text-center text-gray-500">{p.gt_severity}</td>
+                <td className="text-center text-gray-500">{p.gt_frequency}</td>
+                <td className="text-center text-gray-500">{p.gt_probability}</td>
+                <td className="text-center text-gray-500">{p.gt_avoidance}</td>
+                <td className={`text-center font-bold rounded ${cellColor[ampelEN(p.gt_risk)]}`}>{p.gt_risk}</td>
+                <td className="text-center text-gray-500 border-l border-gray-200 dark:border-gray-700">{p.eng_severity}</td>
+                <td className="text-center text-gray-500">{p.eng_frequency}</td>
+                <td className="text-center text-gray-500">{p.eng_probability}</td>
+                <td className="text-center text-gray-500">{p.eng_avoidance}</td>
+                <td className="text-center border-l border-gray-200 dark:border-gray-700 whitespace-nowrap">
+                  <span
+                    className={`inline-block font-bold rounded px-1.5 ${cellColor[ampelEN(p.eng_risk_point)]}`}
+                    title={`Schätzbereich R ${p.eng_risk_low}–${p.eng_risk_high} (${p.eng_risk_level_range})`}
+                  >
+                    {p.eng_risk_low}–{p.eng_risk_high}
+                  </span>
+                  <span className="ml-1 text-[10px] text-gray-400">≈{p.eng_risk_point}</span>
+                  <span className={`ml-1 inline-block rounded px-1 ${cellColor[ampelBand(p.fk_band)]}`} title={`Fine-Kinney ${p.fk_band}`}>FK&nbsp;{Math.round(p.fk_score)}</span>
+                  <div className="text-[9px] text-gray-400 mt-0.5">{p.eng_risk_level_range}</div>
+                </td>
+                <td className="text-center border-l border-gray-200 dark:border-gray-700">
+                  <span className={`inline-block rounded px-1.5 py-0.5 text-[10px] font-medium ${cellColor[ampelConfidence(p.confidence)]}`}>
+                    {p.confidence}
+                  </span>
+                </td>
+              </tr>
+            ))}
           </tbody>
         </table>
       </div>
diff --git a/admin-compliance/app/sdk/iace/[projectId]/benchmark/_hooks/useBenchmark.ts b/admin-compliance/app/sdk/iace/[projectId]/benchmark/_hooks/useBenchmark.ts
index a9668067..f793dc81 100644
--- a/admin-compliance/app/sdk/iace/[projectId]/benchmark/_hooks/useBenchmark.ts
+++ b/admin-compliance/app/sdk/iace/[projectId]/benchmark/_hooks/useBenchmark.ts
@@ -53,6 +53,9 @@ export interface RiskComparisonPair {
   gt_severity: number; gt_frequency: number; gt_probability: number; gt_avoidance: number; gt_risk: number
   eng_severity: number; eng_frequency: number; eng_probability: number; eng_avoidance: number
   fk_score: number; fk_band: string
+  eng_risk_point: number; eng_risk_low: number; eng_risk_high: number
+  eng_risk_level: string; eng_risk_level_range: string
+  confidence: string // hoch | mittel | niedrig
 }
 
 export interface RiskAgreement {
@@ -60,6 +63,7 @@ export interface RiskAgreement {
   severity_within1: number; frequency_within1: number
   probability_within1: number; avoidance_within1: number
   rank_concordance: number
+  high_confidence_pct: number
 }
 
 export interface BenchmarkResult {
diff --git a/ai-compliance-sdk/internal/iace/risk_benchmark.go b/ai-compliance-sdk/internal/iace/risk_benchmark.go
index 5b3b4097..43d4e778 100644
--- a/ai-compliance-sdk/internal/iace/risk_benchmark.go
+++ b/ai-compliance-sdk/internal/iace/risk_benchmark.go
@@ -19,6 +19,15 @@ type RiskComparisonPair struct {
 	EngAvoidance   int     `json:"eng_avoidance"`
 	FKScore        float64 `json:"fk_score"`
 	FKBand         string  `json:"fk_band"`
+	// Confidence-aware risk: a point estimate plus a plausible low/high band and
+	// a confidence label, so the tool reports a RANGE (not a false-precision
+	// point) — the assessment is confirmed by the DSB / safety expert.
+	EngRiskPoint      int    `json:"eng_risk_point"`
+	EngRiskLow        int    `json:"eng_risk_low"`
+	EngRiskHigh       int    `json:"eng_risk_high"`
+	EngRiskLevel      string `json:"eng_risk_level"`       // band of the point value
+	EngRiskLevelRange string `json:"eng_risk_level_range"` // e.g. "mittel–hoch"
+	Confidence        string `json:"confidence"`          // hoch / mittel / niedrig
 }
 
 // RiskAgreement aggregates how close the tool's risk numbers are to the GT.
@@ -28,14 +37,15 @@ type RiskAgreement struct {
 	FrequencyWithin1   float64 `json:"frequency_within1"`
 	ProbabilityWithin1 float64 `json:"probability_within1"`
 	AvoidanceWithin1   float64 `json:"avoidance_within1"`
-	RankConcordance    float64 `json:"rank_concordance"` // Fine-Kinney vs GT R
+	RankConcordance    float64 `json:"rank_concordance"`   // Fine-Kinney vs GT R
+	HighConfidencePct  float64 `json:"high_confidence_pct"` // share of matched hazards with "hoch" confidence
 }
 
 // ComputeRiskComparison derives the tool's risk numbers for each matched hazard
 // and compares them to the professional's GT values.
 func ComputeRiskComparison(matched []HazardMatchPair) ([]RiskComparisonPair, RiskAgreement) {
 	pairs := make([]RiskComparisonPair, 0, len(matched))
-	var sevOK, freqOK, probOK, avoidOK, n int
+	var sevOK, freqOK, probOK, avoidOK, n, hiConf int
 	var engFK, gtR []float64
 
 	for _, m := range matched {
@@ -54,11 +64,20 @@ func ComputeRiskComparison(matched []HazardMatchPair) ([]RiskComparisonPair, Ris
 		fk := SuggestFineKinney(cats, scenario, lifecycle, 0)
 		gt := m.GTEntry.RiskIn
 
+		rLow, rPoint, rHigh := EstimateRiskRange(engS, engF, engW, engP)
+		rLevel, rLevelRange := RiskLevelRange(rLow, rPoint, rHigh)
+		conf := EstimateConfidence(cats, scenario)
+		if conf == "hoch" {
+			hiConf++
+		}
+
 		pairs = append(pairs, RiskComparisonPair{
 			HazardName:     m.GTEntry.HazardType,
 			GTSeverity:     gt.S, GTFrequency: gt.F, GTProbability: gt.W, GTAvoidance: gt.P, GTRisk: gt.R,
 			EngSeverity:    engS, EngFrequency: engF, EngProbability: engW, EngAvoidance: engP,
 			FKScore:        fk.Score, FKBand: fk.Band,
+			EngRiskPoint:   rPoint, EngRiskLow: rLow, EngRiskHigh: rHigh,
+			EngRiskLevel:   rLevel, EngRiskLevelRange: rLevelRange, Confidence: conf,
 		})
 
 		if gt.S > 0 {
@@ -88,6 +107,9 @@ func ComputeRiskComparison(matched []HazardMatchPair) ([]RiskComparisonPair, Ris
 		agg.AvoidanceWithin1 = pct(avoidOK, n)
 		agg.RankConcordance = rankConcordance(engFK, gtR)
 	}
+	if len(pairs) > 0 {
+		agg.HighConfidencePct = pct(hiConf, len(pairs))
+	}
 	return pairs, agg
 }
 
diff --git a/ai-compliance-sdk/internal/iace/risk_estimation.go b/ai-compliance-sdk/internal/iace/risk_estimation.go
index 604375c0..b8633d89 100644
--- a/ai-compliance-sdk/internal/iace/risk_estimation.go
+++ b/ai-compliance-sdk/internal/iace/risk_estimation.go
@@ -203,16 +203,92 @@ func EstimateRiskLevel(s, f, w, p int) (int, string) {
 		s = 1
 	}
 	idx := s * (f + w + p)
+	return idx, riskBandLabel(idx)
+}
+
+// riskBandLabel maps a risk index (3..75) to BreakPilot's German level band.
+// Single source of truth for the thresholds, shared by EstimateRiskLevel and
+// the confidence-range derivation.
+func riskBandLabel(idx int) string {
 	switch {
 	case idx >= 45:
-		return idx, "kritisch"
+		return "kritisch"
 	case idx >= 30:
-		return idx, "hoch"
+		return "hoch"
 	case idx >= 18:
-		return idx, "mittel"
+		return "mittel"
 	case idx >= 9:
-		return idx, "gering"
+		return "gering"
 	default:
-		return idx, "vernachlaessigbar"
+		return "vernachlaessigbar"
 	}
 }
+
+func clampRisk1to5(x int) int {
+	if x < 1 {
+		return 1
+	}
+	if x > 5 {
+		return 5
+	}
+	return x
+}
+
+// EstimateConfidence reports how well-anchored the tool's risk parameters are,
+// from HOW the injury mechanism (contact mode) was resolved: an explicit
+// scenario keyword → "hoch" (strong kinematic signal), a category fallback →
+// "mittel", nothing → "niedrig" (parameters fell back to neutral). This is an
+// honest signal that the point estimate is a heuristic, not a guarantee — the
+// final assessment stays with the DSB / safety expert.
+func EstimateConfidence(cats []string, scenario string) string {
+	text := normalizeDE(scenario)
+	for _, e := range contactModeKeywords {
+		for _, kw := range e.keywords {
+			if strings.Contains(text, kw) {
+				return "hoch"
+			}
+		}
+	}
+	for _, c := range cats {
+		if _, ok := categoryDefaultMode[c]; ok {
+			return "mittel"
+		}
+	}
+	return "niedrig"
+}
+
+// EstimateRiskRange returns the point risk index plus a plausible low/high band.
+// The band shifts severity S by ±1 and the aggregate likelihood L = F+W+P by ±1
+// (each within its domain). We move L as a whole rather than each of F/W/P
+// independently because the validation shows the per-parameter errors largely
+// cancel in the sum (W is within ±1 of the GT ~100% of the time). The result
+// communicates that the risk number is an ESTIMATE with uncertainty rather than
+// a false-precision point value — aligned with the confidence-aware tonality.
+func EstimateRiskRange(s, f, w, p int) (low, point, high int) {
+	s = clampRisk1to5(s)
+	l := clampRisk1to5(f) + clampRisk1to5(w) + clampRisk1to5(p) // 3..15
+	clampL := func(x int) int {
+		if x < 3 {
+			return 3
+		}
+		if x > 15 {
+			return 15
+		}
+		return x
+	}
+	point = s * l
+	low = clampRisk1to5(s-1) * clampL(l-1)
+	high = clampRisk1to5(s+1) * clampL(l+1)
+	return low, point, high
+}
+
+// RiskLevelRange returns the German level band for the point plus a combined
+// "low–high" range label (single label when low and high fall in the same band).
+func RiskLevelRange(low, point, high int) (level, levelRange string) {
+	level = riskBandLabel(point)
+	ll, lh := riskBandLabel(low), riskBandLabel(high)
+	if ll == lh {
+		return level, ll
+	}
+	return level, ll + "–" + lh // en dash
+}
diff --git a/ai-compliance-sdk/internal/iace/risk_estimation_range_test.go b/ai-compliance-sdk/internal/iace/risk_estimation_range_test.go
new file mode 100644
index 00000000..71c95657
--- /dev/null
+++ b/ai-compliance-sdk/internal/iace/risk_estimation_range_test.go
@@ -0,0 +1,58 @@
+package iace
+
+import "testing"
+
+func TestEstimateRiskRange(t *testing.T) {
+	tests := []struct {
+		name                  string
+		s, f, w, p            int
+		wantLow, wantP, wantH int
+	}{
+		// S=4, L=F+W+P=8 → point 32; low 3*clampL(7)=21; high 5*clampL(9)=45.
+		{"typical electrical", 4, 3, 2, 3, 21, 32, 45},
+		// Min likelihood: L=3; low clamps L to 3 (clampL(2)=3) and S to 1.
+		{"low end clamps", 2, 1, 1, 1, 3, 6, 12},
+		// Max: S=5, L=15 → point 75; high clamps S to 5 and L to 15.
+		{"high end clamps", 5, 5, 5, 5, 56, 75, 75},
+	}
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			low, point, high := EstimateRiskRange(tc.s, tc.f, tc.w, tc.p)
+			if low != tc.wantLow || point != tc.wantP || high != tc.wantH {
+				t.Errorf("EstimateRiskRange(%d,%d,%d,%d) = (%d,%d,%d), want (%d,%d,%d)",
+					tc.s, tc.f, tc.w, tc.p, low, point, high, tc.wantLow, tc.wantP, tc.wantH)
+			}
+			if low > point || point > high {
+				t.Errorf("range not ordered: low=%d point=%d high=%d", low, point, high)
+			}
+		})
+	}
+}
+
+func TestEstimateConfidence(t *testing.T) {
+	cases := []struct {
+		cats     []string
+		scenario string
+		want     string
+	}{
+		{[]string{"mechanical_hazard"}, "Quetschen der Hand im Werkzeugraum", "hoch"}, // keyword "quetsch"
+		{[]string{"electrical_hazard"}, "Elektrischer Schlag am Gehaeuse", "hoch"},     // keyword "elektrisch"
+		{[]string{"mechanical_hazard"}, "Allgemeine Restgefahr an der Anlage", "mittel"}, // category fallback
+		{[]string{"made_up_category"}, "Unspezifische Situation", "niedrig"},            // nothing
+	}
+	for _, tc := range cases {
+		if got := EstimateConfidence(tc.cats, tc.scenario); got != tc.want {
+			t.Errorf("EstimateConfidence(%v, %q) = %q, want %q", tc.cats, tc.scenario, got, tc.want)
+		}
+	}
+}
+
+func TestRiskLevelRange(t *testing.T) {
+	// Same band low+high → single label; spanning bands → "low–high".
+	if lvl, rng := RiskLevelRange(9, 12, 16); lvl != "gering" || rng != "gering" {
+		t.Errorf("single-band: got (%q,%q), want (gering,gering)", lvl, rng)
+	}
+	if lvl, rng := RiskLevelRange(21, 32, 45); lvl != "hoch" || rng != "mittel–kritisch" {
+		t.Errorf("multi-band: got (%q,%q), want (hoch, mittel–kritisch)", lvl, rng)
+	}
+}