feat(iace): benchmark risk comparison (traffic lights) + misuse pattern + 1:n matcher

#1 Risk-number comparison in the benchmark: ComputeRiskComparison derives the tool's S/F/W/P + Fine-Kinney per matched hazard and compares to the GT values; exposed on the benchmark response and rendered in a new RiskComparison table with GREEN/YELLOW/RED traffic lights on the risk number R (like the Excel), plus per-axis within-1 agreement cards. #2 Generic misuse pattern HP2103 "Personenbefoerderung auf Hebezeug" — gated to lift-family machine types, fires for ANY lifting device (not machine-specific). #3 Benchmark matcher is now 1:n — one broad engine hazard may cover several fine-grained GT sub-scenarios (foot/hand/leg crush), so coverage reflects real risk coverage rather than 1:1 wording matches. Validated on BOTH ground truths (robot cell + lift): leakage 0, ghosts 0, coverage held. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-09 17:24:52 +02:00
parent ef746ea8f0
commit 2677bca9ca
8 changed files with 284 additions and 1 deletions
@@ -0,0 +1,100 @@
+'use client'
+
+import type { RiskComparisonPair, RiskAgreement } from '../_hooks/useBenchmark'
+
+type Ampel = 'green' | 'yellow' | 'red'
+
+// EN-62061-style risk number R = S * (F + W + P) → traffic light (like the Excel).
+function ampelEN(r: number): Ampel {
+  if (r >= 30) return 'red'
+  if (r >= 18) return 'yellow'
+  return 'green'
+}
+
+function ampelBand(band: string): Ampel {
+  if (band === 'sehr hoch' || band === 'hoch') return 'red'
+  if (band === 'wesentlich' || band === 'moeglich') return 'yellow'
+  return 'green'
+}
+
+const cellColor: Record<Ampel, string> = {
+  red: 'bg-red-100 text-red-700 dark:bg-red-900/40 dark:text-red-300',
+  yellow: 'bg-yellow-100 text-yellow-700 dark:bg-yellow-900/40 dark:text-yellow-300',
+  green: 'bg-green-100 text-green-700 dark:bg-green-900/40 dark:text-green-300',
+}
+
+function pctColor(p: number): Ampel {
+  if (p >= 80) return 'green'
+  if (p >= 50) return 'yellow'
+  return 'red'
+}
+
+function Stat({ label, pct }: { label: string; pct: number }) {
+  const c = pctColor(pct)
+  return (
+    <div className={`rounded-lg border-2 p-3 text-center ${c === 'green' ? 'border-green-200 dark:border-green-800' : c === 'yellow' ? 'border-yellow-200 dark:border-yellow-800' : 'border-red-200 dark:border-red-800'}`}>
+      <div className={`text-xl font-bold ${c === 'green' ? 'text-green-600' : c === 'yellow' ? 'text-yellow-600' : 'text-red-600'}`}>{Math.round(pct)}%</div>
+      <div className="text-[10px] text-gray-500 mt-0.5">{label}</div>
+    </div>
+  )
+}
+
+export function RiskComparison({ pairs, agreement }: { pairs?: RiskComparisonPair[]; agreement?: RiskAgreement }) {
+  if (!pairs || pairs.length === 0) return null
+
+  return (
+    <div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-4 space-y-4">
+      <div>
+        <h3 className="text-sm font-semibold text-gray-700 dark:text-gray-300">Risikozahlen-Vergleich (Fachmann vs. Tool)</h3>
+        <p className="text-xs text-gray-500 mt-0.5">
+          R = S × (F + W + P), Ampel wie in der Excel. Fine-Kinney (P×E×C) als zweite, US-anerkannte Bewertung.
+        </p>
+      </div>
+
+      {agreement && agreement.n > 0 && (
+        <div className="grid grid-cols-2 md:grid-cols-5 gap-3">
+          <Stat label="Schwere S ±1" pct={agreement.severity_within1} />
+          <Stat label="Haeufigkeit F ±1" pct={agreement.frequency_within1} />
+          <Stat label="Wahrsch. W ±1" pct={agreement.probability_within1} />
+          <Stat label="Vermeidb. P ±1" pct={agreement.avoidance_within1} />
+          <Stat label="Ranking (FK)" pct={agreement.rank_concordance} />
+        </div>
+      )}
+
+      <div className="overflow-x-auto">
+        <table className="w-full text-xs">
+          <thead>
+            <tr className="text-gray-500 border-b border-gray-200 dark:border-gray-700">
+              <th className="text-left py-1.5 px-2">Gefaehrdung</th>
+              <th className="px-1 text-center" colSpan={5}>Fachmann · S F W P <strong>R</strong></th>
+              <th className="px-1 text-center border-l border-gray-200 dark:border-gray-700" colSpan={5}>Tool · S F W P <strong>R</strong> / FK</th>
+            </tr>
+          </thead>
+          <tbody>
+            {pairs.map((p, i) => {
+              const engR = p.eng_severity * (p.eng_frequency + p.eng_probability + p.eng_avoidance)
+              return (
+                <tr key={i} className="border-b border-gray-100 dark:border-gray-700/50">
+                  <td className="py-1 px-2 text-gray-700 dark:text-gray-300">{p.hazard_name || '—'}</td>
+                  <td className="text-center text-gray-500">{p.gt_severity}</td>
+                  <td className="text-center text-gray-500">{p.gt_frequency}</td>
+                  <td className="text-center text-gray-500">{p.gt_probability}</td>
+                  <td className="text-center text-gray-500">{p.gt_avoidance}</td>
+                  <td className={`text-center font-bold rounded ${cellColor[ampelEN(p.gt_risk)]}`}>{p.gt_risk}</td>
+                  <td className="text-center text-gray-500 border-l border-gray-200 dark:border-gray-700">{p.eng_severity}</td>
+                  <td className="text-center text-gray-500">{p.eng_frequency}</td>
+                  <td className="text-center text-gray-500">{p.eng_probability}</td>
+                  <td className="text-center text-gray-500">{p.eng_avoidance}</td>
+                  <td className="text-center">
+                    <span className={`inline-block font-bold rounded px-1.5 ${cellColor[ampelEN(engR)]}`}>{engR}</span>
+                    <span className={`ml-1 inline-block rounded px-1 ${cellColor[ampelBand(p.fk_band)]}`} title={`Fine-Kinney ${p.fk_band}`}>FK&nbsp;{Math.round(p.fk_score)}</span>
+                  </td>
+                </tr>
+              )
+            })}
+          </tbody>
+        </table>
+      </div>
+    </div>
+  )
+}
@@ -48,6 +48,20 @@ export interface CategoryScore {
  category: string; gt_count: number; match_count: number; coverage: number
 }

+export interface RiskComparisonPair {
+  hazard_name: string
+  gt_severity: number; gt_frequency: number; gt_probability: number; gt_avoidance: number; gt_risk: number
+  eng_severity: number; eng_frequency: number; eng_probability: number; eng_avoidance: number
+  fk_score: number; fk_band: string
+}
+
+export interface RiskAgreement {
+  n: number
+  severity_within1: number; frequency_within1: number
+  probability_within1: number; avoidance_within1: number
+  rank_concordance: number
+}
+
 export interface BenchmarkResult {
  coverage_score: number
  measure_coverage: number
@@ -58,6 +72,8 @@ export interface BenchmarkResult {
  extra_in_engine: HazardSummary[]
  category_breakdown: CategoryScore[]
  risk_rank_pairs: { gt_rank: number; engine_rank: number; hazard_name: string; gt_risk_score: number }[]
+  risk_comparison?: RiskComparisonPair[]
+  risk_agreement?: RiskAgreement
 }

 interface UseBenchmarkReturn {
@@ -6,6 +6,7 @@ import { useBenchmark } from './_hooks/useBenchmark'
 import { GTImportForm } from './_components/GTImportForm'
 import { HazardComparisonTable } from './_components/HazardComparisonTable'
 import { CategoryBreakdown } from './_components/CategoryBreakdown'
+import { RiskComparison } from './_components/RiskComparison'

 export default function BenchmarkPage() {
  const { projectId } = useParams<{ projectId: string }>()
@@ -102,6 +103,9 @@ export default function BenchmarkPage() {
          {/* Category Breakdown */}
          <CategoryBreakdown breakdown={result.category_breakdown || []} />

+          {/* Risk-number comparison (tool vs professional) with traffic lights */}
+          <RiskComparison pairs={result.risk_comparison} agreement={result.risk_agreement} />
+
          {/* Hazard Comparison Table */}
          <HazardComparisonTable
            matched={result.matched_pairs || []}
@@ -105,6 +105,7 @@ func (h *IACEHandler) RunBenchmark(c *gin.Context) {
 	}

 	result := iace.CompareBenchmark(gt, hazards, mitigations)
+	result.RiskComparison, result.RiskAgreement = iace.ComputeRiskComparison(result.MatchedPairs)
 	c.JSON(http.StatusOK, result)
 }

@@ -74,8 +74,12 @@ func CompareBenchmark(gt *GroundTruth, hazards []Hazard, mitigations []Mitigatio
 	usedEng := make(map[int]bool)
 	var matched []HazardMatchPair

+	// 1:n matching: a single broad engine hazard may legitimately cover several
+	// fine-grained GT sub-scenarios (e.g. one "crush under descending load"
+	// pattern covers the GT's separate foot / hand / leg crush rows). We only
+	// block a GT entry from matching twice; an engine hazard may match several.
 	for _, p := range pairs {
-		if usedGT[p.gtIdx] || usedEng[p.engIdx] {
+		if usedGT[p.gtIdx] {
 			continue
 		}
 		usedGT[p.gtIdx] = true
@@ -80,6 +80,9 @@ type BenchmarkResult struct {
 	ExtraInEngine     []HazardSummary    `json:"extra_in_engine"`
 	CategoryBreakdown []CategoryScore    `json:"category_breakdown"`
 	RiskRankPairs     []RiskRankPair     `json:"risk_rank_pairs"`
+	// Risk-number comparison (tool vs professional) per matched hazard + aggregate.
+	RiskComparison    []RiskComparisonPair `json:"risk_comparison,omitempty"`
+	RiskAgreement     RiskAgreement        `json:"risk_agreement"`
 }

 // HazardMatchPair links a GT entry to an engine hazard.
@@ -40,6 +40,32 @@ func GetLiftEndstopPatterns() []HazardPattern {
 				"Verhindert ein Trittblech / Unterfahrschutz das Hineinfahren von Fuessen?",
 			},
 		},
+		{
+			ID:                    "HP2103",
+			NameDE:                "Bestimmungswidrige Personenbefoerderung auf Hebezeug",
+			NameEN:                "Misuse: transporting persons on a lifting device",
+			RequiredComponentTags: []string{"gravity_risk"},
+			RequiredEnergyTags:    []string{"gravitational"},
+			MachineTypes:          liftTypes,
+			GeneratedHazardCats:   []string{"mechanical_hazard"},
+			SuggestedMeasureIDs:   []string{"M601", "M141"},
+			Priority:              90,
+			ScenarioDE: "Die Hebevorrichtung wird bestimmungswidrig zum Heben oder Befoerdern von " +
+				"Personen verwendet (z.B. Mitfahren auf der Plattform). Absturz aus der Hoehe oder " +
+				"Quetschen bei unkontrollierter Bewegung.",
+			TriggerDE:           "Fehlendes Verbotsschild, keine konstruktive Verhinderung (z.B. zu kleine Standflaeche/Haltepunkte), unzureichende Unterweisung",
+			HarmDE:              "Absturz aus der Hoehe, schwere Verletzungen, Tod",
+			AffectedDE:          "Bediener, Dritte",
+			ZoneDE:              "Hubplattform / Lastaufnahme",
+			DefaultSeverity:     4,
+			DefaultExposure:     1,
+			DefaultAvoidability: 2,
+			ISO12100Section:     "6.4.5 Vernuenftigerweise vorhersehbare Fehlanwendung",
+			ClarificationQuestionsDE: []string{
+				"Ist ein Verbotsschild 'Personenbefoerderung verboten' (EN ISO 7010 P-Zeichen) angebracht?",
+				"Verhindert die Konstruktion das Mitfahren (z.B. zu kleine Standflaeche, keine Haltepunkte)?",
+			},
+		},
 		{
 			ID:                    "HP2101",
 			NameDE:                "Hand- oder Koerper-Quetschung gegen feste Struktur beim Hochfahren der Hubeinheit",
@@ -0,0 +1,129 @@
+package iace
+
+// Risk-number comparison for the benchmark: for every matched hazard, the
+// tool's risk parameters (EN-62061-style S/F/W/P + Fine-Kinney) next to the
+// professional's GT values, plus aggregate agreement. Used by the benchmark
+// endpoint so the Risikobewertung comparison is visible in the tab.
+
+// RiskComparisonPair is one matched hazard's tool-vs-professional risk numbers.
+type RiskComparisonPair struct {
+	HazardName     string  `json:"hazard_name"`
+	GTSeverity     int     `json:"gt_severity"`
+	GTFrequency    int     `json:"gt_frequency"`
+	GTProbability  int     `json:"gt_probability"` // GT column W
+	GTAvoidance    int     `json:"gt_avoidance"`   // GT column P
+	GTRisk         int     `json:"gt_risk"`        // GT column R
+	EngSeverity    int     `json:"eng_severity"`
+	EngFrequency   int     `json:"eng_frequency"`
+	EngProbability int     `json:"eng_probability"`
+	EngAvoidance   int     `json:"eng_avoidance"`
+	FKScore        float64 `json:"fk_score"`
+	FKBand         string  `json:"fk_band"`
+}
+
+// RiskAgreement aggregates how close the tool's risk numbers are to the GT.
+type RiskAgreement struct {
+	N                  int     `json:"n"`
+	SeverityWithin1    float64 `json:"severity_within1"`
+	FrequencyWithin1   float64 `json:"frequency_within1"`
+	ProbabilityWithin1 float64 `json:"probability_within1"`
+	AvoidanceWithin1   float64 `json:"avoidance_within1"`
+	RankConcordance    float64 `json:"rank_concordance"` // Fine-Kinney vs GT R
+}
+
+// ComputeRiskComparison derives the tool's risk numbers for each matched hazard
+// and compares them to the professional's GT values.
+func ComputeRiskComparison(matched []HazardMatchPair) ([]RiskComparisonPair, RiskAgreement) {
+	pairs := make([]RiskComparisonPair, 0, len(matched))
+	var sevOK, freqOK, probOK, avoidOK, n int
+	var engFK, gtR []float64
+
+	for _, m := range matched {
+		eh := m.EngineHazard
+		cats := []string{eh.Category}
+		scenario := eh.Scenario
+		if scenario == "" {
+			scenario = eh.Name
+		}
+		lifecycle := splitLifecyclePhases(eh.LifecyclePhase)
+
+		engS := EstimateSeverity(cats, scenario, 0)
+		engF := EstimateFrequency(lifecycle)
+		engW := EstimateProbabilityW(cats, scenario)
+		engP := EstimateAvoidabilityP(cats, scenario)
+		fk := SuggestFineKinney(cats, scenario, lifecycle, 0)
+		gt := m.GTEntry.RiskIn
+
+		pairs = append(pairs, RiskComparisonPair{
+			HazardName:     m.GTEntry.HazardType,
+			GTSeverity:     gt.S, GTFrequency: gt.F, GTProbability: gt.W, GTAvoidance: gt.P, GTRisk: gt.R,
+			EngSeverity:    engS, EngFrequency: engF, EngProbability: engW, EngAvoidance: engP,
+			FKScore:        fk.Score, FKBand: fk.Band,
+		})
+
+		if gt.S > 0 {
+			n++
+			if abs(engS-gt.S) <= 1 {
+				sevOK++
+			}
+			if gt.F > 0 && abs(engF-gt.F) <= 1 {
+				freqOK++
+			}
+			if gt.W > 0 && abs(engW-gt.W) <= 1 {
+				probOK++
+			}
+			if gt.P > 0 && abs(engP-gt.P) <= 1 {
+				avoidOK++
+			}
+			engFK = append(engFK, fk.Score)
+			gtR = append(gtR, float64(gt.R))
+		}
+	}
+
+	agg := RiskAgreement{N: n}
+	if n > 0 {
+		agg.SeverityWithin1 = pct(sevOK, n)
+		agg.FrequencyWithin1 = pct(freqOK, n)
+		agg.ProbabilityWithin1 = pct(probOK, n)
+		agg.AvoidanceWithin1 = pct(avoidOK, n)
+		agg.RankConcordance = rankConcordance(engFK, gtR)
+	}
+	return pairs, agg
+}
+
+func abs(x int) int {
+	if x < 0 {
+		return -x
+	}
+	return x
+}
+
+func pct(x, total int) float64 {
+	if total == 0 {
+		return 0
+	}
+	return 100 * float64(x) / float64(total)
+}
+
+// rankConcordance returns the fraction of comparable hazard pairs the tool
+// orders the same way the professional does (scale-invariant, 0.5 = random).
+func rankConcordance(a, b []float64) float64 {
+	concordant, discordant := 0, 0
+	for i := 0; i < len(a); i++ {
+		for j := i + 1; j < len(a); j++ {
+			da, db := a[i]-a[j], b[i]-b[j]
+			if da == 0 || db == 0 {
+				continue
+			}
+			if (da > 0) == (db > 0) {
+				concordant++
+			} else {
+				discordant++
+			}
+		}
+	}
+	if concordant+discordant == 0 {
+		return 0
+	}
+	return 100 * float64(concordant) / float64(concordant+discordant)
+}