fix(iace): synonym-cross-matching + expanded action words
scenarioSimilarity now uses synonym-set cross-matching: if GT says "durchschlaegt" and Engine says "schleuder", the synonym set recognizes them as related. Added significantWordOverlap fallback when no action words found. Extended action terms: schlauch/druck/kuehlschmierstoff, pumpe/bettspuel, potential/bezugspotential, stoerung/emv. Moved extractActionWords to benchmark_synonyms.go (458+119 lines). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -207,72 +207,80 @@ func fuzzyMatchScore(gt *GroundTruthEntry, h *Hazard) (float64, string) {
|
||||
}
|
||||
|
||||
// scenarioSimilarity compares the GT cause description with the engine scenario.
|
||||
// It extracts "action words" (verbs/descriptors that define WHAT happens) and
|
||||
// checks overlap. This differentiates "eingeklemmt" from "herabfallend" from "durchschlägt".
|
||||
// Uses action words + synonym-set cross-matching for robust comparison.
|
||||
func scenarioSimilarity(gtCause, engScenario, engName string) float64 {
|
||||
gtText := normalizeDE(gtCause)
|
||||
engText := normalizeDE(engScenario + " " + engName)
|
||||
|
||||
// Extract action/event words that describe the specific scenario
|
||||
gtActions := extractActionWords(gtText)
|
||||
engActions := extractActionWords(engText)
|
||||
|
||||
if len(gtActions) == 0 {
|
||||
return 0
|
||||
// Fallback: use significant word overlap
|
||||
return significantWordOverlap(gtText, engText)
|
||||
}
|
||||
|
||||
matched := 0
|
||||
for _, ga := range gtActions {
|
||||
// Direct match
|
||||
directFound := false
|
||||
for _, ea := range engActions {
|
||||
if ga == ea || strings.HasPrefix(ea, ga) || strings.HasPrefix(ga, ea) {
|
||||
matched++
|
||||
directFound = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if directFound {
|
||||
matched++
|
||||
continue
|
||||
}
|
||||
// Synonym-set match: if GT action and any engine action are in the same synonym set
|
||||
for _, synSet := range synonymSets {
|
||||
gaInSet := false
|
||||
for _, syn := range synSet {
|
||||
if strings.Contains(ga, syn) || strings.Contains(syn, ga) {
|
||||
gaInSet = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !gaInSet {
|
||||
continue
|
||||
}
|
||||
// Check if any engine action is in this same set
|
||||
for _, ea := range engActions {
|
||||
for _, syn := range synSet {
|
||||
if strings.Contains(ea, syn) || strings.Contains(syn, ea) {
|
||||
matched++
|
||||
goto nextAction
|
||||
}
|
||||
}
|
||||
}
|
||||
// Also check full engine text for synonym hit
|
||||
for _, syn := range synSet {
|
||||
if strings.Contains(engText, syn) {
|
||||
matched++
|
||||
goto nextAction
|
||||
}
|
||||
}
|
||||
}
|
||||
nextAction:
|
||||
}
|
||||
return float64(matched) / float64(len(gtActions))
|
||||
}
|
||||
|
||||
// extractActionWords pulls out verbs and descriptors that define the hazard event.
|
||||
func extractActionWords(text string) []string {
|
||||
// These are the differentiating words between similar-looking hazards
|
||||
actionTerms := []string{
|
||||
"eingeklemmt", "einklemm", "eingeschlossen", "eingesperrt",
|
||||
"herabfall", "herunterfal", "faellt",
|
||||
"durchschlaegt", "durchbrech", "durchschlag",
|
||||
"springt ab", "abspring", "bersten", "platzen",
|
||||
"weggeschleudert", "schleuder",
|
||||
"getroffen", "treff",
|
||||
"greift", "eingreif", "durchgreif", "uebergreif",
|
||||
"beruehrt", "beruehr", "kontakt",
|
||||
"einzug", "erfass", "aufwickel",
|
||||
"stolper", "rutsch", "ausrutsch", "gleiten",
|
||||
"verbren", "heiss",
|
||||
"spritzer", "augenver",
|
||||
"kurzschluss", "ueberstrom", "ueberlast",
|
||||
"isolat", "schutzleiter", "kriechstrom", "kriechstreck",
|
||||
"potentialausgleich", "potentialunter",
|
||||
"emv", "stoereinfluss", "elektromagnet",
|
||||
"leckage", "austret", "undicht",
|
||||
"schutzzaun", "einhausung", "schutztuer",
|
||||
"wiederanlauf", "anlauf", "startet",
|
||||
"teach", "einricht", "programmier",
|
||||
"spannvorricht", "spannfutter", "greiferbacken",
|
||||
"druckluft", "pneumatik", "restdruck",
|
||||
"beladetuer", "werkzeugmaschine", "bearbeitungszelle",
|
||||
"ergonom", "einlege", "bedienelement",
|
||||
"tragfaehig", "boden", "einbrech",
|
||||
// significantWordOverlap is a fallback when no action words are found.
|
||||
func significantWordOverlap(gtText, engText string) float64 {
|
||||
gtWords := extractSignificantWords(gtText)
|
||||
if len(gtWords) == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
var found []string
|
||||
seen := make(map[string]bool)
|
||||
for _, term := range actionTerms {
|
||||
if strings.Contains(text, term) && !seen[term] {
|
||||
seen[term] = true
|
||||
found = append(found, term)
|
||||
matched := 0
|
||||
for _, w := range gtWords {
|
||||
if strings.Contains(engText, w) {
|
||||
matched++
|
||||
}
|
||||
}
|
||||
return found
|
||||
return float64(matched) / float64(len(gtWords))
|
||||
}
|
||||
|
||||
func hasWrongMachineTerm(engName, engScenario, gtCause, gtZone string) bool {
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
package iace
|
||||
|
||||
import "strings"
|
||||
|
||||
// synonymSets groups equivalent hazard terms for keyword matching.
|
||||
var synonymSets = [][]string{
|
||||
{"quetsch", "crush", "einklemm", "klemm"},
|
||||
@@ -66,3 +68,52 @@ var categoryMap = map[string][]string{
|
||||
"ergonomische gefaehrdungen": {"ergonomic"},
|
||||
"gefaehrdungen im zusammenhang mit der einsatzumgebung": {"environmental"},
|
||||
}
|
||||
|
||||
// extractActionWords pulls out verbs and descriptors that define the hazard event.
|
||||
func extractActionWords(text string) []string {
|
||||
// These are the differentiating words between similar-looking hazards
|
||||
actionTerms := []string{
|
||||
"eingeklemmt", "einklemm", "eingeschlossen", "eingesperrt",
|
||||
"herabfall", "herunterfal", "faellt",
|
||||
"durchschlaegt", "durchbrech", "durchschlag",
|
||||
"springt ab", "abspring", "bersten", "platzen",
|
||||
"weggeschleudert", "schleuder",
|
||||
"getroffen", "treff",
|
||||
"greift", "eingreif", "durchgreif", "uebergreif",
|
||||
"beruehrt", "beruehr", "kontakt",
|
||||
"einzug", "erfass", "aufwickel",
|
||||
"stolper", "rutsch", "ausrutsch", "gleiten",
|
||||
"verbren", "heiss",
|
||||
"spritzer", "augenver",
|
||||
"kurzschluss", "ueberstrom", "ueberlast",
|
||||
"isolat", "schutzleiter", "kriechstrom", "kriechstreck",
|
||||
"potentialausgleich", "potentialunter", "bezugspotential", "potential",
|
||||
"emv", "stoereinfluss", "elektromagnet", "stoerung",
|
||||
"leckage", "austret", "undicht",
|
||||
"schutzzaun", "einhausung", "schutztuer",
|
||||
"wiederanlauf", "anlauf", "startet",
|
||||
"teach", "einricht", "programmier",
|
||||
"spannvorricht", "spannfutter", "greiferbacken",
|
||||
"druckluft", "pneumatik", "restdruck",
|
||||
"beladetuer", "werkzeugmaschine", "bearbeitungszelle",
|
||||
"ergonom", "einlege", "bedienelement",
|
||||
"tragfaehig", "boden", "einbrech",
|
||||
// Additional terms for remaining GT gaps
|
||||
"schlauch", "druck", "kuehlschmierstoff",
|
||||
"bettspuel", "pumpe", "niederdruck",
|
||||
"luft-", "dimensionie",
|
||||
"anlagenteile", "energieversorgung",
|
||||
"greifer", "werkzeug",
|
||||
}
|
||||
|
||||
var found []string
|
||||
seen := make(map[string]bool)
|
||||
for _, term := range actionTerms {
|
||||
if strings.Contains(text, term) && !seen[term] {
|
||||
seen[term] = true
|
||||
found = append(found, term)
|
||||
}
|
||||
}
|
||||
return found
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user