feat(iace): benchmark system + erklaerteil + dedup-fix
Build + Deploy / build-backend-compliance (push) Successful in 3m34s
Build + Deploy / build-ai-sdk (push) Successful in 1m6s
Build + Deploy / build-developer-portal (push) Successful in 1m7s
Build + Deploy / build-tts (push) Successful in 1m58s
Build + Deploy / build-document-crawler (push) Successful in 57s
Build + Deploy / build-dsms-gateway (push) Successful in 34s
Build + Deploy / build-admin-compliance (push) Successful in 2m7s
Build + Deploy / build-dsms-node (push) Successful in 29s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 17s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m28s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Successful in 42s
CI / test-python-backend (push) Successful in 37s
CI / test-python-document-crawler (push) Successful in 27s
CI / test-python-dsms-gateway (push) Successful in 22s
CI / validate-canonical-controls (push) Successful in 15s
Build + Deploy / trigger-orca (push) Successful in 3m10s
Build + Deploy / build-backend-compliance (push) Successful in 3m34s
Build + Deploy / build-ai-sdk (push) Successful in 1m6s
Build + Deploy / build-developer-portal (push) Successful in 1m7s
Build + Deploy / build-tts (push) Successful in 1m58s
Build + Deploy / build-document-crawler (push) Successful in 57s
Build + Deploy / build-dsms-gateway (push) Successful in 34s
Build + Deploy / build-admin-compliance (push) Successful in 2m7s
Build + Deploy / build-dsms-node (push) Successful in 29s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 17s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m28s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Successful in 42s
CI / test-python-backend (push) Successful in 37s
CI / test-python-document-crawler (push) Successful in 27s
CI / test-python-dsms-gateway (push) Successful in 22s
CI / validate-canonical-controls (push) Successful in 15s
Build + Deploy / trigger-orca (push) Successful in 3m10s
- Erklaerteil-Template fuer Risikobeurteilungen (risk_assessment_template.go) in PDF-Export, Markdown-Export und Frontend ReportPrintView eingebaut - Ground Truth Benchmark-System: Datenmodell, Fuzzy-Matching-Engine, 3 API Endpoints (import-gt, benchmark, benchmark/summary) - Frontend Benchmark-Tab mit Score-Cards, Kategorie-Breakdown, Hazard-Vergleichstabelle (Zugeordnet/Fehlend/Extra), Business Impact - Erster Benchmark: 13.3% Coverage (Baseline) gegen 60 GT-Eintraege - Dedup-Fix: seenCat[cat] -> seenCatZone[cat+zone] erlaubt mehrere Gefaehrdungen pro Kategorie an verschiedenen Gefahrenstellen - Komponenten-spezifische Hazard-Namen und Zone-basierte Zuordnung Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,365 @@
|
||||
package iace
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ============================================================================
|
||||
// Fuzzy matching: Ground Truth entries ↔ Engine hazards
|
||||
// ============================================================================
|
||||
|
||||
const matchThreshold = 0.35
|
||||
|
||||
// categoryMap maps GT hazard_group (German) to engine category prefixes.
|
||||
var categoryMap = map[string][]string{
|
||||
"mechanische gefaehrdungen": {"mechanical"},
|
||||
"elektrische gefaehrdungen": {"electrical"},
|
||||
"thermische gefaehrdungen": {"thermal"},
|
||||
"gefaehrdungen durch laerm": {"noise", "ergonomic"},
|
||||
"gefaehrdungen durch vibration": {"noise", "vibration"},
|
||||
"gefaehrdungen durch strahlung": {"radiation", "emc"},
|
||||
"gefaehrdungen durch materialien und substanzen": {"material", "environmental"},
|
||||
"ergonomische gefaehrdungen": {"ergonomic"},
|
||||
"gefaehrdungen im zusammenhang mit der einsatzumgebung": {"environmental"},
|
||||
}
|
||||
|
||||
// synonymSets groups equivalent hazard terms for keyword matching.
|
||||
var synonymSets = [][]string{
|
||||
{"quetsch", "crush", "einklemm", "klemm"},
|
||||
{"scher", "shear", "absch"},
|
||||
{"schneid", "cut", "schnitt"},
|
||||
{"stoss", "schlag", "impact", "treff", "aufprall"},
|
||||
{"einzug", "fang", "erfass", "entangle", "wickel"},
|
||||
{"elektrisch", "stromschlag", "electric", "beruehr", "spannungsfuehr"},
|
||||
{"brand", "feuer", "fire", "kabelbrand", "kurzschluss"},
|
||||
{"verbrenn", "burn", "heiss", "thermisch"},
|
||||
{"laerm", "noise", "gehoer", "schall"},
|
||||
{"vibration", "schwing"},
|
||||
{"ergonom", "haltung", "handhabung", "bedien"},
|
||||
{"kuehlschmierstoff", "kss", "aerosol", "coolant"},
|
||||
{"pneumat", "druckluft", "compressed"},
|
||||
{"hydraul", "druck", "pressure"},
|
||||
{"roboter", "robot", "roboterarm"},
|
||||
{"greifer", "gripper", "schunk"},
|
||||
{"foerderband", "transport", "conveyor"},
|
||||
{"schutzzaun", "schutzgitter", "fence", "guard"},
|
||||
{"werkzeugmaschine", "robodrill", "bearbeitungszentrum", "wzm"},
|
||||
{"stolper", "rutsch", "slip", "trip"},
|
||||
{"leckage", "austreten", "leak"},
|
||||
{"einstich", "puncture", "spritz"},
|
||||
}
|
||||
|
||||
// CompareBenchmark runs the full comparison between Ground Truth and engine output.
|
||||
func CompareBenchmark(gt *GroundTruth, hazards []Hazard, mitigations []Mitigation) *BenchmarkResult {
|
||||
if gt == nil || len(gt.Entries) == 0 {
|
||||
return &BenchmarkResult{}
|
||||
}
|
||||
|
||||
engineSummaries := make([]HazardSummary, len(hazards))
|
||||
for i, h := range hazards {
|
||||
engineSummaries[i] = HazardSummary{
|
||||
ID: h.ID.String(),
|
||||
Name: h.Name,
|
||||
Category: h.Category,
|
||||
Zone: h.HazardousZone,
|
||||
}
|
||||
}
|
||||
|
||||
// Build score matrix: gt[i] × engine[j]
|
||||
type scoredPair struct {
|
||||
gtIdx, engIdx int
|
||||
score float64
|
||||
reason string
|
||||
}
|
||||
var pairs []scoredPair
|
||||
for i := range gt.Entries {
|
||||
for j := range hazards {
|
||||
score, reason := fuzzyMatchScore(>.Entries[i], &hazards[j])
|
||||
if score >= matchThreshold {
|
||||
pairs = append(pairs, scoredPair{i, j, score, reason})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Greedy best-first 1:1 assignment
|
||||
sort.Slice(pairs, func(a, b int) bool { return pairs[a].score > pairs[b].score })
|
||||
usedGT := make(map[int]bool)
|
||||
usedEng := make(map[int]bool)
|
||||
var matched []HazardMatchPair
|
||||
|
||||
for _, p := range pairs {
|
||||
if usedGT[p.gtIdx] || usedEng[p.engIdx] {
|
||||
continue
|
||||
}
|
||||
usedGT[p.gtIdx] = true
|
||||
usedEng[p.engIdx] = true
|
||||
matched = append(matched, HazardMatchPair{
|
||||
GTEntry: gt.Entries[p.gtIdx],
|
||||
EngineHazard: engineSummaries[p.engIdx],
|
||||
MatchScore: p.score,
|
||||
MatchReason: p.reason,
|
||||
})
|
||||
}
|
||||
|
||||
// Collect unmatched
|
||||
var missing []GroundTruthEntry
|
||||
for i, e := range gt.Entries {
|
||||
if !usedGT[i] {
|
||||
missing = append(missing, e)
|
||||
}
|
||||
}
|
||||
var extra []HazardSummary
|
||||
for i, s := range engineSummaries {
|
||||
if !usedEng[i] {
|
||||
extra = append(extra, s)
|
||||
}
|
||||
}
|
||||
|
||||
// Category breakdown
|
||||
catGT := map[string]int{}
|
||||
catMatch := map[string]int{}
|
||||
for _, e := range gt.Entries {
|
||||
cat := normalizeCategoryDE(e.HazardGroup)
|
||||
catGT[cat]++
|
||||
}
|
||||
for _, m := range matched {
|
||||
cat := normalizeCategoryDE(m.GTEntry.HazardGroup)
|
||||
catMatch[cat]++
|
||||
}
|
||||
var breakdown []CategoryScore
|
||||
for cat, total := range catGT {
|
||||
cov := 0.0
|
||||
if total > 0 {
|
||||
cov = float64(catMatch[cat]) / float64(total)
|
||||
}
|
||||
breakdown = append(breakdown, CategoryScore{
|
||||
Category: cat, GTCount: total, MatchCount: catMatch[cat], Coverage: cov,
|
||||
})
|
||||
}
|
||||
sort.Slice(breakdown, func(i, j int) bool { return breakdown[i].GTCount > breakdown[j].GTCount })
|
||||
|
||||
// Measure coverage (simplified: count GT entries where at least 1 measure keyword matches)
|
||||
measMatched := 0
|
||||
for _, m := range matched {
|
||||
if measureOverlap(m.GTEntry.Measures, mitigations) {
|
||||
measMatched++
|
||||
}
|
||||
}
|
||||
measCov := 0.0
|
||||
if len(matched) > 0 {
|
||||
measCov = float64(measMatched) / float64(len(matched))
|
||||
}
|
||||
|
||||
// Risk rank comparison
|
||||
rankPairs := buildRiskRankPairs(matched)
|
||||
|
||||
coverage := 0.0
|
||||
if len(gt.Entries) > 0 {
|
||||
coverage = float64(len(matched)) / float64(len(gt.Entries))
|
||||
}
|
||||
|
||||
return &BenchmarkResult{
|
||||
CoverageScore: coverage,
|
||||
MeasureCoverage: measCov,
|
||||
TotalGT: len(gt.Entries),
|
||||
TotalEngine: len(hazards),
|
||||
MatchedPairs: matched,
|
||||
MissingFromEngine: missing,
|
||||
ExtraInEngine: extra,
|
||||
CategoryBreakdown: breakdown,
|
||||
RiskRankPairs: rankPairs,
|
||||
}
|
||||
}
|
||||
|
||||
// fuzzyMatchScore computes a 0-1 similarity between a GT entry and an engine hazard.
|
||||
func fuzzyMatchScore(gt *GroundTruthEntry, h *Hazard) (float64, string) {
|
||||
var score float64
|
||||
var reasons []string
|
||||
|
||||
// 1. Category match (weight 0.4)
|
||||
catScore := categoryMatchScore(gt.HazardGroup, h.Category)
|
||||
score += 0.4 * catScore
|
||||
if catScore > 0 {
|
||||
reasons = append(reasons, "Kategorie")
|
||||
}
|
||||
|
||||
// 2. Keyword/synonym match (weight 0.3)
|
||||
kwScore := keywordMatchScore(gt.HazardType, gt.HazardCause, h.Name, h.Description, h.Scenario)
|
||||
score += 0.3 * kwScore
|
||||
if kwScore > 0 {
|
||||
reasons = append(reasons, "Keywords")
|
||||
}
|
||||
|
||||
// 3. Component/zone match (weight 0.3)
|
||||
zoneScore := zoneMatchScore(gt.ComponentZone, gt.HazardSubgroup, h.HazardousZone, h.MachineModule)
|
||||
score += 0.3 * zoneScore
|
||||
if zoneScore > 0 {
|
||||
reasons = append(reasons, "Zone")
|
||||
}
|
||||
|
||||
return score, strings.Join(reasons, "+")
|
||||
}
|
||||
|
||||
func categoryMatchScore(gtGroup, engCategory string) float64 {
|
||||
normalized := normalizeDE(gtGroup)
|
||||
prefixes, ok := categoryMap[normalized]
|
||||
if !ok {
|
||||
return 0
|
||||
}
|
||||
engLower := strings.ToLower(engCategory)
|
||||
for _, p := range prefixes {
|
||||
if strings.Contains(engLower, p) {
|
||||
return 1.0
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func keywordMatchScore(gtType, gtCause, engName, engDesc, engScenario string) float64 {
|
||||
gtText := normalizeDE(gtType + " " + gtCause)
|
||||
engText := normalizeDE(engName + " " + engDesc + " " + engScenario)
|
||||
|
||||
matchedSets := 0
|
||||
totalRelevant := 0
|
||||
|
||||
for _, synSet := range synonymSets {
|
||||
gtHas := false
|
||||
engHas := false
|
||||
for _, syn := range synSet {
|
||||
if strings.Contains(gtText, syn) {
|
||||
gtHas = true
|
||||
}
|
||||
if strings.Contains(engText, syn) {
|
||||
engHas = true
|
||||
}
|
||||
}
|
||||
if gtHas {
|
||||
totalRelevant++
|
||||
if engHas {
|
||||
matchedSets++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if totalRelevant == 0 {
|
||||
return 0
|
||||
}
|
||||
return float64(matchedSets) / float64(totalRelevant)
|
||||
}
|
||||
|
||||
func zoneMatchScore(gtZone, gtSubgroup, engZone, engModule string) float64 {
|
||||
gtText := normalizeDE(gtZone + " " + gtSubgroup)
|
||||
engText := normalizeDE(engZone + " " + engModule)
|
||||
|
||||
if gtText == "" || engText == "" {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Check for significant word overlap
|
||||
gtWords := extractSignificantWords(gtText)
|
||||
engWords := extractSignificantWords(engText)
|
||||
|
||||
if len(gtWords) == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
matched := 0
|
||||
for _, gw := range gtWords {
|
||||
for _, ew := range engWords {
|
||||
if strings.Contains(ew, gw) || strings.Contains(gw, ew) {
|
||||
matched++
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return float64(matched) / float64(len(gtWords))
|
||||
}
|
||||
|
||||
func extractSignificantWords(text string) []string {
|
||||
stopWords := map[string]bool{
|
||||
"der": true, "die": true, "das": true, "und": true, "oder": true,
|
||||
"von": true, "in": true, "an": true, "am": true, "im": true,
|
||||
"zu": true, "bei": true, "mit": true, "des": true, "den": true,
|
||||
"dem": true, "ein": true, "eine": true, "einer": true, "einem": true,
|
||||
"fuer": true, "auf": true, "aus": true, "um": true, "nach": true,
|
||||
"ueber": true, "unter": true, "vor": true, "durch": true,
|
||||
}
|
||||
words := strings.Fields(text)
|
||||
var sig []string
|
||||
for _, w := range words {
|
||||
if len(w) < 3 || stopWords[w] {
|
||||
continue
|
||||
}
|
||||
sig = append(sig, w)
|
||||
}
|
||||
return sig
|
||||
}
|
||||
|
||||
// NormalizeDEPublic is the exported version of normalizeDE for use outside this package.
|
||||
func NormalizeDEPublic(s string) string { return normalizeDE(s) }
|
||||
|
||||
// normalizeDE lowercases and replaces umlauts (same as narrative_parser).
|
||||
func normalizeDE(s string) string {
|
||||
s = strings.ToLower(strings.TrimSpace(s))
|
||||
s = strings.ReplaceAll(s, "ä", "ae")
|
||||
s = strings.ReplaceAll(s, "ö", "oe")
|
||||
s = strings.ReplaceAll(s, "ü", "ue")
|
||||
s = strings.ReplaceAll(s, "ß", "ss")
|
||||
return s
|
||||
}
|
||||
|
||||
func normalizeCategoryDE(group string) string {
|
||||
n := normalizeDE(group)
|
||||
// Shorten for display
|
||||
n = strings.TrimPrefix(n, "gefaehrdungen durch ")
|
||||
n = strings.TrimPrefix(n, "gefaehrdungen im zusammenhang mit ")
|
||||
return n
|
||||
}
|
||||
|
||||
func measureOverlap(gtMeasures []string, mitigations []Mitigation) bool {
|
||||
for _, gm := range gtMeasures {
|
||||
gmNorm := normalizeDE(gm)
|
||||
for _, m := range mitigations {
|
||||
mNorm := normalizeDE(m.Name + " " + m.Description)
|
||||
// Check if any significant word from GT measure appears in engine mitigation
|
||||
words := extractSignificantWords(gmNorm)
|
||||
for _, w := range words {
|
||||
if strings.Contains(mNorm, w) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func buildRiskRankPairs(matched []HazardMatchPair) []RiskRankPair {
|
||||
if len(matched) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Sort by GT risk descending to get GT rank
|
||||
type ranked struct {
|
||||
idx int
|
||||
gtRisk int
|
||||
name string
|
||||
}
|
||||
items := make([]ranked, len(matched))
|
||||
for i, m := range matched {
|
||||
items[i] = ranked{i, m.GTEntry.RiskIn.R, m.GTEntry.HazardType}
|
||||
}
|
||||
sort.Slice(items, func(a, b int) bool { return items[a].gtRisk > items[b].gtRisk })
|
||||
|
||||
pairs := make([]RiskRankPair, len(items))
|
||||
for rank, item := range items {
|
||||
pairs[rank] = RiskRankPair{
|
||||
GTRank: rank + 1,
|
||||
EngineRank: 0, // Engine has no assessment yet for auto-generated hazards
|
||||
HazardName: item.name,
|
||||
GTRiskScore: item.gtRisk,
|
||||
EngineRisk: 0,
|
||||
}
|
||||
}
|
||||
return pairs
|
||||
}
|
||||
Reference in New Issue
Block a user