Files
breakpilot-compliance/ai-compliance-sdk/internal/iace/benchmark_matcher.go
T
Benjamin Admin 8bb90d73e5
Build + Deploy / build-backend-compliance (push) Successful in 3m34s
Build + Deploy / build-ai-sdk (push) Successful in 1m6s
Build + Deploy / build-developer-portal (push) Successful in 1m7s
Build + Deploy / build-tts (push) Successful in 1m58s
Build + Deploy / build-document-crawler (push) Successful in 57s
Build + Deploy / build-dsms-gateway (push) Successful in 34s
Build + Deploy / build-admin-compliance (push) Successful in 2m7s
Build + Deploy / build-dsms-node (push) Successful in 29s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 17s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m28s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Successful in 42s
CI / test-python-backend (push) Successful in 37s
CI / test-python-document-crawler (push) Successful in 27s
CI / test-python-dsms-gateway (push) Successful in 22s
CI / validate-canonical-controls (push) Successful in 15s
Build + Deploy / trigger-orca (push) Successful in 3m10s
feat(iace): benchmark system + erklaerteil + dedup-fix
- Erklaerteil-Template fuer Risikobeurteilungen (risk_assessment_template.go)
  in PDF-Export, Markdown-Export und Frontend ReportPrintView eingebaut
- Ground Truth Benchmark-System: Datenmodell, Fuzzy-Matching-Engine,
  3 API Endpoints (import-gt, benchmark, benchmark/summary)
- Frontend Benchmark-Tab mit Score-Cards, Kategorie-Breakdown,
  Hazard-Vergleichstabelle (Zugeordnet/Fehlend/Extra), Business Impact
- Erster Benchmark: 13.3% Coverage (Baseline) gegen 60 GT-Eintraege
- Dedup-Fix: seenCat[cat] -> seenCatZone[cat+zone] erlaubt mehrere
  Gefaehrdungen pro Kategorie an verschiedenen Gefahrenstellen
- Komponenten-spezifische Hazard-Namen und Zone-basierte Zuordnung

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-13 01:02:33 +02:00

366 lines
10 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package iace
import (
"sort"
"strings"
)
// ============================================================================
// Fuzzy matching: Ground Truth entries ↔ Engine hazards
// ============================================================================
const matchThreshold = 0.35
// categoryMap maps GT hazard_group (German) to engine category prefixes.
var categoryMap = map[string][]string{
"mechanische gefaehrdungen": {"mechanical"},
"elektrische gefaehrdungen": {"electrical"},
"thermische gefaehrdungen": {"thermal"},
"gefaehrdungen durch laerm": {"noise", "ergonomic"},
"gefaehrdungen durch vibration": {"noise", "vibration"},
"gefaehrdungen durch strahlung": {"radiation", "emc"},
"gefaehrdungen durch materialien und substanzen": {"material", "environmental"},
"ergonomische gefaehrdungen": {"ergonomic"},
"gefaehrdungen im zusammenhang mit der einsatzumgebung": {"environmental"},
}
// synonymSets groups equivalent hazard terms for keyword matching.
var synonymSets = [][]string{
{"quetsch", "crush", "einklemm", "klemm"},
{"scher", "shear", "absch"},
{"schneid", "cut", "schnitt"},
{"stoss", "schlag", "impact", "treff", "aufprall"},
{"einzug", "fang", "erfass", "entangle", "wickel"},
{"elektrisch", "stromschlag", "electric", "beruehr", "spannungsfuehr"},
{"brand", "feuer", "fire", "kabelbrand", "kurzschluss"},
{"verbrenn", "burn", "heiss", "thermisch"},
{"laerm", "noise", "gehoer", "schall"},
{"vibration", "schwing"},
{"ergonom", "haltung", "handhabung", "bedien"},
{"kuehlschmierstoff", "kss", "aerosol", "coolant"},
{"pneumat", "druckluft", "compressed"},
{"hydraul", "druck", "pressure"},
{"roboter", "robot", "roboterarm"},
{"greifer", "gripper", "schunk"},
{"foerderband", "transport", "conveyor"},
{"schutzzaun", "schutzgitter", "fence", "guard"},
{"werkzeugmaschine", "robodrill", "bearbeitungszentrum", "wzm"},
{"stolper", "rutsch", "slip", "trip"},
{"leckage", "austreten", "leak"},
{"einstich", "puncture", "spritz"},
}
// CompareBenchmark runs the full comparison between Ground Truth and engine output.
func CompareBenchmark(gt *GroundTruth, hazards []Hazard, mitigations []Mitigation) *BenchmarkResult {
if gt == nil || len(gt.Entries) == 0 {
return &BenchmarkResult{}
}
engineSummaries := make([]HazardSummary, len(hazards))
for i, h := range hazards {
engineSummaries[i] = HazardSummary{
ID: h.ID.String(),
Name: h.Name,
Category: h.Category,
Zone: h.HazardousZone,
}
}
// Build score matrix: gt[i] × engine[j]
type scoredPair struct {
gtIdx, engIdx int
score float64
reason string
}
var pairs []scoredPair
for i := range gt.Entries {
for j := range hazards {
score, reason := fuzzyMatchScore(&gt.Entries[i], &hazards[j])
if score >= matchThreshold {
pairs = append(pairs, scoredPair{i, j, score, reason})
}
}
}
// Greedy best-first 1:1 assignment
sort.Slice(pairs, func(a, b int) bool { return pairs[a].score > pairs[b].score })
usedGT := make(map[int]bool)
usedEng := make(map[int]bool)
var matched []HazardMatchPair
for _, p := range pairs {
if usedGT[p.gtIdx] || usedEng[p.engIdx] {
continue
}
usedGT[p.gtIdx] = true
usedEng[p.engIdx] = true
matched = append(matched, HazardMatchPair{
GTEntry: gt.Entries[p.gtIdx],
EngineHazard: engineSummaries[p.engIdx],
MatchScore: p.score,
MatchReason: p.reason,
})
}
// Collect unmatched
var missing []GroundTruthEntry
for i, e := range gt.Entries {
if !usedGT[i] {
missing = append(missing, e)
}
}
var extra []HazardSummary
for i, s := range engineSummaries {
if !usedEng[i] {
extra = append(extra, s)
}
}
// Category breakdown
catGT := map[string]int{}
catMatch := map[string]int{}
for _, e := range gt.Entries {
cat := normalizeCategoryDE(e.HazardGroup)
catGT[cat]++
}
for _, m := range matched {
cat := normalizeCategoryDE(m.GTEntry.HazardGroup)
catMatch[cat]++
}
var breakdown []CategoryScore
for cat, total := range catGT {
cov := 0.0
if total > 0 {
cov = float64(catMatch[cat]) / float64(total)
}
breakdown = append(breakdown, CategoryScore{
Category: cat, GTCount: total, MatchCount: catMatch[cat], Coverage: cov,
})
}
sort.Slice(breakdown, func(i, j int) bool { return breakdown[i].GTCount > breakdown[j].GTCount })
// Measure coverage (simplified: count GT entries where at least 1 measure keyword matches)
measMatched := 0
for _, m := range matched {
if measureOverlap(m.GTEntry.Measures, mitigations) {
measMatched++
}
}
measCov := 0.0
if len(matched) > 0 {
measCov = float64(measMatched) / float64(len(matched))
}
// Risk rank comparison
rankPairs := buildRiskRankPairs(matched)
coverage := 0.0
if len(gt.Entries) > 0 {
coverage = float64(len(matched)) / float64(len(gt.Entries))
}
return &BenchmarkResult{
CoverageScore: coverage,
MeasureCoverage: measCov,
TotalGT: len(gt.Entries),
TotalEngine: len(hazards),
MatchedPairs: matched,
MissingFromEngine: missing,
ExtraInEngine: extra,
CategoryBreakdown: breakdown,
RiskRankPairs: rankPairs,
}
}
// fuzzyMatchScore computes a 0-1 similarity between a GT entry and an engine hazard.
func fuzzyMatchScore(gt *GroundTruthEntry, h *Hazard) (float64, string) {
var score float64
var reasons []string
// 1. Category match (weight 0.4)
catScore := categoryMatchScore(gt.HazardGroup, h.Category)
score += 0.4 * catScore
if catScore > 0 {
reasons = append(reasons, "Kategorie")
}
// 2. Keyword/synonym match (weight 0.3)
kwScore := keywordMatchScore(gt.HazardType, gt.HazardCause, h.Name, h.Description, h.Scenario)
score += 0.3 * kwScore
if kwScore > 0 {
reasons = append(reasons, "Keywords")
}
// 3. Component/zone match (weight 0.3)
zoneScore := zoneMatchScore(gt.ComponentZone, gt.HazardSubgroup, h.HazardousZone, h.MachineModule)
score += 0.3 * zoneScore
if zoneScore > 0 {
reasons = append(reasons, "Zone")
}
return score, strings.Join(reasons, "+")
}
func categoryMatchScore(gtGroup, engCategory string) float64 {
normalized := normalizeDE(gtGroup)
prefixes, ok := categoryMap[normalized]
if !ok {
return 0
}
engLower := strings.ToLower(engCategory)
for _, p := range prefixes {
if strings.Contains(engLower, p) {
return 1.0
}
}
return 0
}
func keywordMatchScore(gtType, gtCause, engName, engDesc, engScenario string) float64 {
gtText := normalizeDE(gtType + " " + gtCause)
engText := normalizeDE(engName + " " + engDesc + " " + engScenario)
matchedSets := 0
totalRelevant := 0
for _, synSet := range synonymSets {
gtHas := false
engHas := false
for _, syn := range synSet {
if strings.Contains(gtText, syn) {
gtHas = true
}
if strings.Contains(engText, syn) {
engHas = true
}
}
if gtHas {
totalRelevant++
if engHas {
matchedSets++
}
}
}
if totalRelevant == 0 {
return 0
}
return float64(matchedSets) / float64(totalRelevant)
}
func zoneMatchScore(gtZone, gtSubgroup, engZone, engModule string) float64 {
gtText := normalizeDE(gtZone + " " + gtSubgroup)
engText := normalizeDE(engZone + " " + engModule)
if gtText == "" || engText == "" {
return 0
}
// Check for significant word overlap
gtWords := extractSignificantWords(gtText)
engWords := extractSignificantWords(engText)
if len(gtWords) == 0 {
return 0
}
matched := 0
for _, gw := range gtWords {
for _, ew := range engWords {
if strings.Contains(ew, gw) || strings.Contains(gw, ew) {
matched++
break
}
}
}
return float64(matched) / float64(len(gtWords))
}
func extractSignificantWords(text string) []string {
stopWords := map[string]bool{
"der": true, "die": true, "das": true, "und": true, "oder": true,
"von": true, "in": true, "an": true, "am": true, "im": true,
"zu": true, "bei": true, "mit": true, "des": true, "den": true,
"dem": true, "ein": true, "eine": true, "einer": true, "einem": true,
"fuer": true, "auf": true, "aus": true, "um": true, "nach": true,
"ueber": true, "unter": true, "vor": true, "durch": true,
}
words := strings.Fields(text)
var sig []string
for _, w := range words {
if len(w) < 3 || stopWords[w] {
continue
}
sig = append(sig, w)
}
return sig
}
// NormalizeDEPublic is the exported version of normalizeDE for use outside this package.
func NormalizeDEPublic(s string) string { return normalizeDE(s) }
// normalizeDE lowercases and replaces umlauts (same as narrative_parser).
func normalizeDE(s string) string {
s = strings.ToLower(strings.TrimSpace(s))
s = strings.ReplaceAll(s, "ä", "ae")
s = strings.ReplaceAll(s, "ö", "oe")
s = strings.ReplaceAll(s, "ü", "ue")
s = strings.ReplaceAll(s, "ß", "ss")
return s
}
func normalizeCategoryDE(group string) string {
n := normalizeDE(group)
// Shorten for display
n = strings.TrimPrefix(n, "gefaehrdungen durch ")
n = strings.TrimPrefix(n, "gefaehrdungen im zusammenhang mit ")
return n
}
func measureOverlap(gtMeasures []string, mitigations []Mitigation) bool {
for _, gm := range gtMeasures {
gmNorm := normalizeDE(gm)
for _, m := range mitigations {
mNorm := normalizeDE(m.Name + " " + m.Description)
// Check if any significant word from GT measure appears in engine mitigation
words := extractSignificantWords(gmNorm)
for _, w := range words {
if strings.Contains(mNorm, w) {
return true
}
}
}
}
return false
}
func buildRiskRankPairs(matched []HazardMatchPair) []RiskRankPair {
if len(matched) == 0 {
return nil
}
// Sort by GT risk descending to get GT rank
type ranked struct {
idx int
gtRisk int
name string
}
items := make([]ranked, len(matched))
for i, m := range matched {
items[i] = ranked{i, m.GTEntry.RiskIn.R, m.GTEntry.HazardType}
}
sort.Slice(items, func(a, b int) bool { return items[a].gtRisk > items[b].gtRisk })
pairs := make([]RiskRankPair, len(items))
for rank, item := range items {
pairs[rank] = RiskRankPair{
GTRank: rank + 1,
EngineRank: 0, // Engine has no assessment yet for auto-generated hazards
HazardName: item.name,
GTRiskScore: item.gtRisk,
EngineRisk: 0,
}
}
return pairs
}