fix(iace): synonym-cross-matching + expanded action words
scenarioSimilarity now uses synonym-set cross-matching: if GT says "durchschlaegt" and Engine says "schleuder", the synonym set recognizes them as related. Added significantWordOverlap fallback when no action words found. Extended action terms: schlauch/druck/kuehlschmierstoff, pumpe/bettspuel, potential/bezugspotential, stoerung/emv. Moved extractActionWords to benchmark_synonyms.go (458+119 lines). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -207,72 +207,80 @@ func fuzzyMatchScore(gt *GroundTruthEntry, h *Hazard) (float64, string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// scenarioSimilarity compares the GT cause description with the engine scenario.
|
// scenarioSimilarity compares the GT cause description with the engine scenario.
|
||||||
// It extracts "action words" (verbs/descriptors that define WHAT happens) and
|
// Uses action words + synonym-set cross-matching for robust comparison.
|
||||||
// checks overlap. This differentiates "eingeklemmt" from "herabfallend" from "durchschlägt".
|
|
||||||
func scenarioSimilarity(gtCause, engScenario, engName string) float64 {
|
func scenarioSimilarity(gtCause, engScenario, engName string) float64 {
|
||||||
gtText := normalizeDE(gtCause)
|
gtText := normalizeDE(gtCause)
|
||||||
engText := normalizeDE(engScenario + " " + engName)
|
engText := normalizeDE(engScenario + " " + engName)
|
||||||
|
|
||||||
// Extract action/event words that describe the specific scenario
|
|
||||||
gtActions := extractActionWords(gtText)
|
gtActions := extractActionWords(gtText)
|
||||||
engActions := extractActionWords(engText)
|
engActions := extractActionWords(engText)
|
||||||
|
|
||||||
if len(gtActions) == 0 {
|
if len(gtActions) == 0 {
|
||||||
return 0
|
// Fallback: use significant word overlap
|
||||||
|
return significantWordOverlap(gtText, engText)
|
||||||
}
|
}
|
||||||
|
|
||||||
matched := 0
|
matched := 0
|
||||||
for _, ga := range gtActions {
|
for _, ga := range gtActions {
|
||||||
|
// Direct match
|
||||||
|
directFound := false
|
||||||
for _, ea := range engActions {
|
for _, ea := range engActions {
|
||||||
if ga == ea || strings.HasPrefix(ea, ga) || strings.HasPrefix(ga, ea) {
|
if ga == ea || strings.HasPrefix(ea, ga) || strings.HasPrefix(ga, ea) {
|
||||||
matched++
|
directFound = true
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if directFound {
|
||||||
|
matched++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Synonym-set match: if GT action and any engine action are in the same synonym set
|
||||||
|
for _, synSet := range synonymSets {
|
||||||
|
gaInSet := false
|
||||||
|
for _, syn := range synSet {
|
||||||
|
if strings.Contains(ga, syn) || strings.Contains(syn, ga) {
|
||||||
|
gaInSet = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !gaInSet {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Check if any engine action is in this same set
|
||||||
|
for _, ea := range engActions {
|
||||||
|
for _, syn := range synSet {
|
||||||
|
if strings.Contains(ea, syn) || strings.Contains(syn, ea) {
|
||||||
|
matched++
|
||||||
|
goto nextAction
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Also check full engine text for synonym hit
|
||||||
|
for _, syn := range synSet {
|
||||||
|
if strings.Contains(engText, syn) {
|
||||||
|
matched++
|
||||||
|
goto nextAction
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nextAction:
|
||||||
}
|
}
|
||||||
return float64(matched) / float64(len(gtActions))
|
return float64(matched) / float64(len(gtActions))
|
||||||
}
|
}
|
||||||
|
|
||||||
// extractActionWords pulls out verbs and descriptors that define the hazard event.
|
// significantWordOverlap is a fallback when no action words are found.
|
||||||
func extractActionWords(text string) []string {
|
func significantWordOverlap(gtText, engText string) float64 {
|
||||||
// These are the differentiating words between similar-looking hazards
|
gtWords := extractSignificantWords(gtText)
|
||||||
actionTerms := []string{
|
if len(gtWords) == 0 {
|
||||||
"eingeklemmt", "einklemm", "eingeschlossen", "eingesperrt",
|
return 0
|
||||||
"herabfall", "herunterfal", "faellt",
|
|
||||||
"durchschlaegt", "durchbrech", "durchschlag",
|
|
||||||
"springt ab", "abspring", "bersten", "platzen",
|
|
||||||
"weggeschleudert", "schleuder",
|
|
||||||
"getroffen", "treff",
|
|
||||||
"greift", "eingreif", "durchgreif", "uebergreif",
|
|
||||||
"beruehrt", "beruehr", "kontakt",
|
|
||||||
"einzug", "erfass", "aufwickel",
|
|
||||||
"stolper", "rutsch", "ausrutsch", "gleiten",
|
|
||||||
"verbren", "heiss",
|
|
||||||
"spritzer", "augenver",
|
|
||||||
"kurzschluss", "ueberstrom", "ueberlast",
|
|
||||||
"isolat", "schutzleiter", "kriechstrom", "kriechstreck",
|
|
||||||
"potentialausgleich", "potentialunter",
|
|
||||||
"emv", "stoereinfluss", "elektromagnet",
|
|
||||||
"leckage", "austret", "undicht",
|
|
||||||
"schutzzaun", "einhausung", "schutztuer",
|
|
||||||
"wiederanlauf", "anlauf", "startet",
|
|
||||||
"teach", "einricht", "programmier",
|
|
||||||
"spannvorricht", "spannfutter", "greiferbacken",
|
|
||||||
"druckluft", "pneumatik", "restdruck",
|
|
||||||
"beladetuer", "werkzeugmaschine", "bearbeitungszelle",
|
|
||||||
"ergonom", "einlege", "bedienelement",
|
|
||||||
"tragfaehig", "boden", "einbrech",
|
|
||||||
}
|
}
|
||||||
|
matched := 0
|
||||||
var found []string
|
for _, w := range gtWords {
|
||||||
seen := make(map[string]bool)
|
if strings.Contains(engText, w) {
|
||||||
for _, term := range actionTerms {
|
matched++
|
||||||
if strings.Contains(text, term) && !seen[term] {
|
|
||||||
seen[term] = true
|
|
||||||
found = append(found, term)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return found
|
return float64(matched) / float64(len(gtWords))
|
||||||
}
|
}
|
||||||
|
|
||||||
func hasWrongMachineTerm(engName, engScenario, gtCause, gtZone string) bool {
|
func hasWrongMachineTerm(engName, engScenario, gtCause, gtZone string) bool {
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
package iace
|
package iace
|
||||||
|
|
||||||
|
import "strings"
|
||||||
|
|
||||||
// synonymSets groups equivalent hazard terms for keyword matching.
|
// synonymSets groups equivalent hazard terms for keyword matching.
|
||||||
var synonymSets = [][]string{
|
var synonymSets = [][]string{
|
||||||
{"quetsch", "crush", "einklemm", "klemm"},
|
{"quetsch", "crush", "einklemm", "klemm"},
|
||||||
@@ -66,3 +68,52 @@ var categoryMap = map[string][]string{
|
|||||||
"ergonomische gefaehrdungen": {"ergonomic"},
|
"ergonomische gefaehrdungen": {"ergonomic"},
|
||||||
"gefaehrdungen im zusammenhang mit der einsatzumgebung": {"environmental"},
|
"gefaehrdungen im zusammenhang mit der einsatzumgebung": {"environmental"},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// extractActionWords pulls out verbs and descriptors that define the hazard event.
|
||||||
|
func extractActionWords(text string) []string {
|
||||||
|
// These are the differentiating words between similar-looking hazards
|
||||||
|
actionTerms := []string{
|
||||||
|
"eingeklemmt", "einklemm", "eingeschlossen", "eingesperrt",
|
||||||
|
"herabfall", "herunterfal", "faellt",
|
||||||
|
"durchschlaegt", "durchbrech", "durchschlag",
|
||||||
|
"springt ab", "abspring", "bersten", "platzen",
|
||||||
|
"weggeschleudert", "schleuder",
|
||||||
|
"getroffen", "treff",
|
||||||
|
"greift", "eingreif", "durchgreif", "uebergreif",
|
||||||
|
"beruehrt", "beruehr", "kontakt",
|
||||||
|
"einzug", "erfass", "aufwickel",
|
||||||
|
"stolper", "rutsch", "ausrutsch", "gleiten",
|
||||||
|
"verbren", "heiss",
|
||||||
|
"spritzer", "augenver",
|
||||||
|
"kurzschluss", "ueberstrom", "ueberlast",
|
||||||
|
"isolat", "schutzleiter", "kriechstrom", "kriechstreck",
|
||||||
|
"potentialausgleich", "potentialunter", "bezugspotential", "potential",
|
||||||
|
"emv", "stoereinfluss", "elektromagnet", "stoerung",
|
||||||
|
"leckage", "austret", "undicht",
|
||||||
|
"schutzzaun", "einhausung", "schutztuer",
|
||||||
|
"wiederanlauf", "anlauf", "startet",
|
||||||
|
"teach", "einricht", "programmier",
|
||||||
|
"spannvorricht", "spannfutter", "greiferbacken",
|
||||||
|
"druckluft", "pneumatik", "restdruck",
|
||||||
|
"beladetuer", "werkzeugmaschine", "bearbeitungszelle",
|
||||||
|
"ergonom", "einlege", "bedienelement",
|
||||||
|
"tragfaehig", "boden", "einbrech",
|
||||||
|
// Additional terms for remaining GT gaps
|
||||||
|
"schlauch", "druck", "kuehlschmierstoff",
|
||||||
|
"bettspuel", "pumpe", "niederdruck",
|
||||||
|
"luft-", "dimensionie",
|
||||||
|
"anlagenteile", "energieversorgung",
|
||||||
|
"greifer", "werkzeug",
|
||||||
|
}
|
||||||
|
|
||||||
|
var found []string
|
||||||
|
seen := make(map[string]bool)
|
||||||
|
for _, term := range actionTerms {
|
||||||
|
if strings.Contains(text, term) && !seen[term] {
|
||||||
|
seen[term] = true
|
||||||
|
found = append(found, term)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return found
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user