diff --git a/ai-compliance-sdk/internal/iace/benchmark_matcher.go b/ai-compliance-sdk/internal/iace/benchmark_matcher.go index 55783df7..94382875 100644 --- a/ai-compliance-sdk/internal/iace/benchmark_matcher.go +++ b/ai-compliance-sdk/internal/iace/benchmark_matcher.go @@ -207,72 +207,80 @@ func fuzzyMatchScore(gt *GroundTruthEntry, h *Hazard) (float64, string) { } // scenarioSimilarity compares the GT cause description with the engine scenario. -// It extracts "action words" (verbs/descriptors that define WHAT happens) and -// checks overlap. This differentiates "eingeklemmt" from "herabfallend" from "durchschlägt". +// Uses action words + synonym-set cross-matching for robust comparison. func scenarioSimilarity(gtCause, engScenario, engName string) float64 { gtText := normalizeDE(gtCause) engText := normalizeDE(engScenario + " " + engName) - // Extract action/event words that describe the specific scenario gtActions := extractActionWords(gtText) engActions := extractActionWords(engText) if len(gtActions) == 0 { - return 0 + // Fallback: use significant word overlap + return significantWordOverlap(gtText, engText) } matched := 0 for _, ga := range gtActions { + // Direct match + directFound := false for _, ea := range engActions { if ga == ea || strings.HasPrefix(ea, ga) || strings.HasPrefix(ga, ea) { - matched++ + directFound = true break } } + if directFound { + matched++ + continue + } + // Synonym-set match: if GT action and any engine action are in the same synonym set + for _, synSet := range synonymSets { + gaInSet := false + for _, syn := range synSet { + if strings.Contains(ga, syn) || strings.Contains(syn, ga) { + gaInSet = true + break + } + } + if !gaInSet { + continue + } + // Check if any engine action is in this same set + for _, ea := range engActions { + for _, syn := range synSet { + if strings.Contains(ea, syn) || strings.Contains(syn, ea) { + matched++ + goto nextAction + } + } + } + // Also check full engine text for synonym hit + for _, syn := range synSet { + if strings.Contains(engText, syn) { + matched++ + goto nextAction + } + } + } + nextAction: } return float64(matched) / float64(len(gtActions)) } -// extractActionWords pulls out verbs and descriptors that define the hazard event. -func extractActionWords(text string) []string { - // These are the differentiating words between similar-looking hazards - actionTerms := []string{ - "eingeklemmt", "einklemm", "eingeschlossen", "eingesperrt", - "herabfall", "herunterfal", "faellt", - "durchschlaegt", "durchbrech", "durchschlag", - "springt ab", "abspring", "bersten", "platzen", - "weggeschleudert", "schleuder", - "getroffen", "treff", - "greift", "eingreif", "durchgreif", "uebergreif", - "beruehrt", "beruehr", "kontakt", - "einzug", "erfass", "aufwickel", - "stolper", "rutsch", "ausrutsch", "gleiten", - "verbren", "heiss", - "spritzer", "augenver", - "kurzschluss", "ueberstrom", "ueberlast", - "isolat", "schutzleiter", "kriechstrom", "kriechstreck", - "potentialausgleich", "potentialunter", - "emv", "stoereinfluss", "elektromagnet", - "leckage", "austret", "undicht", - "schutzzaun", "einhausung", "schutztuer", - "wiederanlauf", "anlauf", "startet", - "teach", "einricht", "programmier", - "spannvorricht", "spannfutter", "greiferbacken", - "druckluft", "pneumatik", "restdruck", - "beladetuer", "werkzeugmaschine", "bearbeitungszelle", - "ergonom", "einlege", "bedienelement", - "tragfaehig", "boden", "einbrech", +// significantWordOverlap is a fallback when no action words are found. +func significantWordOverlap(gtText, engText string) float64 { + gtWords := extractSignificantWords(gtText) + if len(gtWords) == 0 { + return 0 } - - var found []string - seen := make(map[string]bool) - for _, term := range actionTerms { - if strings.Contains(text, term) && !seen[term] { - seen[term] = true - found = append(found, term) + matched := 0 + for _, w := range gtWords { + if strings.Contains(engText, w) { + matched++ } } - return found + return float64(matched) / float64(len(gtWords)) } func hasWrongMachineTerm(engName, engScenario, gtCause, gtZone string) bool { diff --git a/ai-compliance-sdk/internal/iace/benchmark_synonyms.go b/ai-compliance-sdk/internal/iace/benchmark_synonyms.go index d7402f02..163bb5ca 100644 --- a/ai-compliance-sdk/internal/iace/benchmark_synonyms.go +++ b/ai-compliance-sdk/internal/iace/benchmark_synonyms.go @@ -1,5 +1,7 @@ package iace +import "strings" + // synonymSets groups equivalent hazard terms for keyword matching. var synonymSets = [][]string{ {"quetsch", "crush", "einklemm", "klemm"}, @@ -66,3 +68,52 @@ var categoryMap = map[string][]string{ "ergonomische gefaehrdungen": {"ergonomic"}, "gefaehrdungen im zusammenhang mit der einsatzumgebung": {"environmental"}, } + +// extractActionWords pulls out verbs and descriptors that define the hazard event. +func extractActionWords(text string) []string { + // These are the differentiating words between similar-looking hazards + actionTerms := []string{ + "eingeklemmt", "einklemm", "eingeschlossen", "eingesperrt", + "herabfall", "herunterfal", "faellt", + "durchschlaegt", "durchbrech", "durchschlag", + "springt ab", "abspring", "bersten", "platzen", + "weggeschleudert", "schleuder", + "getroffen", "treff", + "greift", "eingreif", "durchgreif", "uebergreif", + "beruehrt", "beruehr", "kontakt", + "einzug", "erfass", "aufwickel", + "stolper", "rutsch", "ausrutsch", "gleiten", + "verbren", "heiss", + "spritzer", "augenver", + "kurzschluss", "ueberstrom", "ueberlast", + "isolat", "schutzleiter", "kriechstrom", "kriechstreck", + "potentialausgleich", "potentialunter", "bezugspotential", "potential", + "emv", "stoereinfluss", "elektromagnet", "stoerung", + "leckage", "austret", "undicht", + "schutzzaun", "einhausung", "schutztuer", + "wiederanlauf", "anlauf", "startet", + "teach", "einricht", "programmier", + "spannvorricht", "spannfutter", "greiferbacken", + "druckluft", "pneumatik", "restdruck", + "beladetuer", "werkzeugmaschine", "bearbeitungszelle", + "ergonom", "einlege", "bedienelement", + "tragfaehig", "boden", "einbrech", + // Additional terms for remaining GT gaps + "schlauch", "druck", "kuehlschmierstoff", + "bettspuel", "pumpe", "niederdruck", + "luft-", "dimensionie", + "anlagenteile", "energieversorgung", + "greifer", "werkzeug", + } + + var found []string + seen := make(map[string]bool) + for _, term := range actionTerms { + if strings.Contains(text, term) && !seen[term] { + seen[term] = true + found = append(found, term) + } + } + return found +} +