fix(iace): synonym-cross-matching + expanded action words

scenarioSimilarity now uses synonym-set cross-matching: if GT says
"durchschlaegt" and Engine says "schleuder", the synonym set recognizes
them as related. Added significantWordOverlap fallback when no action
words found. Extended action terms: schlauch/druck/kuehlschmierstoff,
pumpe/bettspuel, potential/bezugspotential, stoerung/emv.

Moved extractActionWords to benchmark_synonyms.go (458+119 lines).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-05-15 10:03:23 +02:00
parent b82853a95b
commit 003eafa75d
2 changed files with 101 additions and 42 deletions
@@ -207,72 +207,80 @@ func fuzzyMatchScore(gt *GroundTruthEntry, h *Hazard) (float64, string) {
} }
// scenarioSimilarity compares the GT cause description with the engine scenario. // scenarioSimilarity compares the GT cause description with the engine scenario.
// It extracts "action words" (verbs/descriptors that define WHAT happens) and // Uses action words + synonym-set cross-matching for robust comparison.
// checks overlap. This differentiates "eingeklemmt" from "herabfallend" from "durchschlägt".
func scenarioSimilarity(gtCause, engScenario, engName string) float64 { func scenarioSimilarity(gtCause, engScenario, engName string) float64 {
gtText := normalizeDE(gtCause) gtText := normalizeDE(gtCause)
engText := normalizeDE(engScenario + " " + engName) engText := normalizeDE(engScenario + " " + engName)
// Extract action/event words that describe the specific scenario
gtActions := extractActionWords(gtText) gtActions := extractActionWords(gtText)
engActions := extractActionWords(engText) engActions := extractActionWords(engText)
if len(gtActions) == 0 { if len(gtActions) == 0 {
return 0 // Fallback: use significant word overlap
return significantWordOverlap(gtText, engText)
} }
matched := 0 matched := 0
for _, ga := range gtActions { for _, ga := range gtActions {
// Direct match
directFound := false
for _, ea := range engActions { for _, ea := range engActions {
if ga == ea || strings.HasPrefix(ea, ga) || strings.HasPrefix(ga, ea) { if ga == ea || strings.HasPrefix(ea, ga) || strings.HasPrefix(ga, ea) {
matched++ directFound = true
break break
} }
} }
if directFound {
matched++
continue
}
// Synonym-set match: if GT action and any engine action are in the same synonym set
for _, synSet := range synonymSets {
gaInSet := false
for _, syn := range synSet {
if strings.Contains(ga, syn) || strings.Contains(syn, ga) {
gaInSet = true
break
}
}
if !gaInSet {
continue
}
// Check if any engine action is in this same set
for _, ea := range engActions {
for _, syn := range synSet {
if strings.Contains(ea, syn) || strings.Contains(syn, ea) {
matched++
goto nextAction
}
}
}
// Also check full engine text for synonym hit
for _, syn := range synSet {
if strings.Contains(engText, syn) {
matched++
goto nextAction
}
}
}
nextAction:
} }
return float64(matched) / float64(len(gtActions)) return float64(matched) / float64(len(gtActions))
} }
// extractActionWords pulls out verbs and descriptors that define the hazard event. // significantWordOverlap is a fallback when no action words are found.
func extractActionWords(text string) []string { func significantWordOverlap(gtText, engText string) float64 {
// These are the differentiating words between similar-looking hazards gtWords := extractSignificantWords(gtText)
actionTerms := []string{ if len(gtWords) == 0 {
"eingeklemmt", "einklemm", "eingeschlossen", "eingesperrt", return 0
"herabfall", "herunterfal", "faellt",
"durchschlaegt", "durchbrech", "durchschlag",
"springt ab", "abspring", "bersten", "platzen",
"weggeschleudert", "schleuder",
"getroffen", "treff",
"greift", "eingreif", "durchgreif", "uebergreif",
"beruehrt", "beruehr", "kontakt",
"einzug", "erfass", "aufwickel",
"stolper", "rutsch", "ausrutsch", "gleiten",
"verbren", "heiss",
"spritzer", "augenver",
"kurzschluss", "ueberstrom", "ueberlast",
"isolat", "schutzleiter", "kriechstrom", "kriechstreck",
"potentialausgleich", "potentialunter",
"emv", "stoereinfluss", "elektromagnet",
"leckage", "austret", "undicht",
"schutzzaun", "einhausung", "schutztuer",
"wiederanlauf", "anlauf", "startet",
"teach", "einricht", "programmier",
"spannvorricht", "spannfutter", "greiferbacken",
"druckluft", "pneumatik", "restdruck",
"beladetuer", "werkzeugmaschine", "bearbeitungszelle",
"ergonom", "einlege", "bedienelement",
"tragfaehig", "boden", "einbrech",
} }
matched := 0
var found []string for _, w := range gtWords {
seen := make(map[string]bool) if strings.Contains(engText, w) {
for _, term := range actionTerms { matched++
if strings.Contains(text, term) && !seen[term] {
seen[term] = true
found = append(found, term)
} }
} }
return found return float64(matched) / float64(len(gtWords))
} }
func hasWrongMachineTerm(engName, engScenario, gtCause, gtZone string) bool { func hasWrongMachineTerm(engName, engScenario, gtCause, gtZone string) bool {
@@ -1,5 +1,7 @@
package iace package iace
import "strings"
// synonymSets groups equivalent hazard terms for keyword matching. // synonymSets groups equivalent hazard terms for keyword matching.
var synonymSets = [][]string{ var synonymSets = [][]string{
{"quetsch", "crush", "einklemm", "klemm"}, {"quetsch", "crush", "einklemm", "klemm"},
@@ -66,3 +68,52 @@ var categoryMap = map[string][]string{
"ergonomische gefaehrdungen": {"ergonomic"}, "ergonomische gefaehrdungen": {"ergonomic"},
"gefaehrdungen im zusammenhang mit der einsatzumgebung": {"environmental"}, "gefaehrdungen im zusammenhang mit der einsatzumgebung": {"environmental"},
} }
// extractActionWords pulls out verbs and descriptors that define the hazard event.
func extractActionWords(text string) []string {
// These are the differentiating words between similar-looking hazards
actionTerms := []string{
"eingeklemmt", "einklemm", "eingeschlossen", "eingesperrt",
"herabfall", "herunterfal", "faellt",
"durchschlaegt", "durchbrech", "durchschlag",
"springt ab", "abspring", "bersten", "platzen",
"weggeschleudert", "schleuder",
"getroffen", "treff",
"greift", "eingreif", "durchgreif", "uebergreif",
"beruehrt", "beruehr", "kontakt",
"einzug", "erfass", "aufwickel",
"stolper", "rutsch", "ausrutsch", "gleiten",
"verbren", "heiss",
"spritzer", "augenver",
"kurzschluss", "ueberstrom", "ueberlast",
"isolat", "schutzleiter", "kriechstrom", "kriechstreck",
"potentialausgleich", "potentialunter", "bezugspotential", "potential",
"emv", "stoereinfluss", "elektromagnet", "stoerung",
"leckage", "austret", "undicht",
"schutzzaun", "einhausung", "schutztuer",
"wiederanlauf", "anlauf", "startet",
"teach", "einricht", "programmier",
"spannvorricht", "spannfutter", "greiferbacken",
"druckluft", "pneumatik", "restdruck",
"beladetuer", "werkzeugmaschine", "bearbeitungszelle",
"ergonom", "einlege", "bedienelement",
"tragfaehig", "boden", "einbrech",
// Additional terms for remaining GT gaps
"schlauch", "druck", "kuehlschmierstoff",
"bettspuel", "pumpe", "niederdruck",
"luft-", "dimensionie",
"anlagenteile", "energieversorgung",
"greifer", "werkzeug",
}
var found []string
seen := make(map[string]bool)
for _, term := range actionTerms {
if strings.Contains(text, term) && !seen[term] {
seen[term] = true
found = append(found, term)
}
}
return found
}