fix(iace): synonym-cross-matching + expanded action words

scenarioSimilarity now uses synonym-set cross-matching: if GT says
"durchschlaegt" and Engine says "schleuder", the synonym set recognizes
them as related. Added significantWordOverlap fallback when no action
words found. Extended action terms: schlauch/druck/kuehlschmierstoff,
pumpe/bettspuel, potential/bezugspotential, stoerung/emv.

Moved extractActionWords to benchmark_synonyms.go (458+119 lines).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-05-15 10:03:23 +02:00
parent b82853a95b
commit 003eafa75d
2 changed files with 101 additions and 42 deletions
@@ -207,72 +207,80 @@ func fuzzyMatchScore(gt *GroundTruthEntry, h *Hazard) (float64, string) {
}
// scenarioSimilarity compares the GT cause description with the engine scenario.
// It extracts "action words" (verbs/descriptors that define WHAT happens) and
// checks overlap. This differentiates "eingeklemmt" from "herabfallend" from "durchschlägt".
// Uses action words + synonym-set cross-matching for robust comparison.
func scenarioSimilarity(gtCause, engScenario, engName string) float64 {
gtText := normalizeDE(gtCause)
engText := normalizeDE(engScenario + " " + engName)
// Extract action/event words that describe the specific scenario
gtActions := extractActionWords(gtText)
engActions := extractActionWords(engText)
if len(gtActions) == 0 {
return 0
// Fallback: use significant word overlap
return significantWordOverlap(gtText, engText)
}
matched := 0
for _, ga := range gtActions {
// Direct match
directFound := false
for _, ea := range engActions {
if ga == ea || strings.HasPrefix(ea, ga) || strings.HasPrefix(ga, ea) {
matched++
directFound = true
break
}
}
if directFound {
matched++
continue
}
// Synonym-set match: if GT action and any engine action are in the same synonym set
for _, synSet := range synonymSets {
gaInSet := false
for _, syn := range synSet {
if strings.Contains(ga, syn) || strings.Contains(syn, ga) {
gaInSet = true
break
}
}
if !gaInSet {
continue
}
// Check if any engine action is in this same set
for _, ea := range engActions {
for _, syn := range synSet {
if strings.Contains(ea, syn) || strings.Contains(syn, ea) {
matched++
goto nextAction
}
}
}
// Also check full engine text for synonym hit
for _, syn := range synSet {
if strings.Contains(engText, syn) {
matched++
goto nextAction
}
}
}
nextAction:
}
return float64(matched) / float64(len(gtActions))
}
// extractActionWords pulls out verbs and descriptors that define the hazard event.
func extractActionWords(text string) []string {
// These are the differentiating words between similar-looking hazards
actionTerms := []string{
"eingeklemmt", "einklemm", "eingeschlossen", "eingesperrt",
"herabfall", "herunterfal", "faellt",
"durchschlaegt", "durchbrech", "durchschlag",
"springt ab", "abspring", "bersten", "platzen",
"weggeschleudert", "schleuder",
"getroffen", "treff",
"greift", "eingreif", "durchgreif", "uebergreif",
"beruehrt", "beruehr", "kontakt",
"einzug", "erfass", "aufwickel",
"stolper", "rutsch", "ausrutsch", "gleiten",
"verbren", "heiss",
"spritzer", "augenver",
"kurzschluss", "ueberstrom", "ueberlast",
"isolat", "schutzleiter", "kriechstrom", "kriechstreck",
"potentialausgleich", "potentialunter",
"emv", "stoereinfluss", "elektromagnet",
"leckage", "austret", "undicht",
"schutzzaun", "einhausung", "schutztuer",
"wiederanlauf", "anlauf", "startet",
"teach", "einricht", "programmier",
"spannvorricht", "spannfutter", "greiferbacken",
"druckluft", "pneumatik", "restdruck",
"beladetuer", "werkzeugmaschine", "bearbeitungszelle",
"ergonom", "einlege", "bedienelement",
"tragfaehig", "boden", "einbrech",
// significantWordOverlap is a fallback when no action words are found.
func significantWordOverlap(gtText, engText string) float64 {
gtWords := extractSignificantWords(gtText)
if len(gtWords) == 0 {
return 0
}
var found []string
seen := make(map[string]bool)
for _, term := range actionTerms {
if strings.Contains(text, term) && !seen[term] {
seen[term] = true
found = append(found, term)
matched := 0
for _, w := range gtWords {
if strings.Contains(engText, w) {
matched++
}
}
return found
return float64(matched) / float64(len(gtWords))
}
func hasWrongMachineTerm(engName, engScenario, gtCause, gtZone string) bool {
@@ -1,5 +1,7 @@
package iace
import "strings"
// synonymSets groups equivalent hazard terms for keyword matching.
var synonymSets = [][]string{
{"quetsch", "crush", "einklemm", "klemm"},
@@ -66,3 +68,52 @@ var categoryMap = map[string][]string{
"ergonomische gefaehrdungen": {"ergonomic"},
"gefaehrdungen im zusammenhang mit der einsatzumgebung": {"environmental"},
}
// extractActionWords pulls out verbs and descriptors that define the hazard event.
func extractActionWords(text string) []string {
// These are the differentiating words between similar-looking hazards
actionTerms := []string{
"eingeklemmt", "einklemm", "eingeschlossen", "eingesperrt",
"herabfall", "herunterfal", "faellt",
"durchschlaegt", "durchbrech", "durchschlag",
"springt ab", "abspring", "bersten", "platzen",
"weggeschleudert", "schleuder",
"getroffen", "treff",
"greift", "eingreif", "durchgreif", "uebergreif",
"beruehrt", "beruehr", "kontakt",
"einzug", "erfass", "aufwickel",
"stolper", "rutsch", "ausrutsch", "gleiten",
"verbren", "heiss",
"spritzer", "augenver",
"kurzschluss", "ueberstrom", "ueberlast",
"isolat", "schutzleiter", "kriechstrom", "kriechstreck",
"potentialausgleich", "potentialunter", "bezugspotential", "potential",
"emv", "stoereinfluss", "elektromagnet", "stoerung",
"leckage", "austret", "undicht",
"schutzzaun", "einhausung", "schutztuer",
"wiederanlauf", "anlauf", "startet",
"teach", "einricht", "programmier",
"spannvorricht", "spannfutter", "greiferbacken",
"druckluft", "pneumatik", "restdruck",
"beladetuer", "werkzeugmaschine", "bearbeitungszelle",
"ergonom", "einlege", "bedienelement",
"tragfaehig", "boden", "einbrech",
// Additional terms for remaining GT gaps
"schlauch", "druck", "kuehlschmierstoff",
"bettspuel", "pumpe", "niederdruck",
"luft-", "dimensionie",
"anlagenteile", "energieversorgung",
"greifer", "werkzeug",
}
var found []string
seen := make(map[string]bool)
for _, term := range actionTerms {
if strings.Contains(text, term) && !seen[term] {
seen[term] = true
found = append(found, term)
}
}
return found
}