feat(iace): scenario-based matching + split benchmark_synonyms.go
4-signal matcher: category (0.2), keywords (0.2), zone (0.3), scenario similarity (0.3). Scenario signal extracts action words (eingeklemmt vs herabfallend vs durchschlaegt) to differentiate similar-looking hazards at the same component. Split benchmark_synonyms.go (70 lines) from benchmark_matcher.go (516→450 lines) to stay under 500-line cap. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -11,60 +11,7 @@ import (
|
|||||||
|
|
||||||
const matchThreshold = 0.20
|
const matchThreshold = 0.20
|
||||||
|
|
||||||
// categoryMap maps GT hazard_group (German) to engine category prefixes.
|
// categoryMap, synonymSets, wrongMachineTerms → benchmark_synonyms.go
|
||||||
var categoryMap = map[string][]string{
|
|
||||||
"mechanische gefaehrdungen": {"mechanical"},
|
|
||||||
"elektrische gefaehrdungen": {"electrical"},
|
|
||||||
"thermische gefaehrdungen": {"thermal"},
|
|
||||||
"gefaehrdungen durch laerm": {"noise", "ergonomic"},
|
|
||||||
"gefaehrdungen durch vibration": {"noise", "vibration"},
|
|
||||||
"gefaehrdungen durch strahlung": {"radiation", "emc"},
|
|
||||||
"gefaehrdungen durch materialien und substanzen": {"material", "environmental"},
|
|
||||||
"ergonomische gefaehrdungen": {"ergonomic"},
|
|
||||||
"gefaehrdungen im zusammenhang mit der einsatzumgebung": {"environmental"},
|
|
||||||
}
|
|
||||||
|
|
||||||
// synonymSets groups equivalent hazard terms for keyword matching.
|
|
||||||
var synonymSets = [][]string{
|
|
||||||
{"quetsch", "crush", "einklemm", "klemm"},
|
|
||||||
{"scher", "shear", "absch"},
|
|
||||||
{"schneid", "cut", "schnitt"},
|
|
||||||
{"stoss", "schlag", "impact", "treff", "aufprall"},
|
|
||||||
{"einzug", "fang", "erfass", "entangle", "wickel"},
|
|
||||||
{"elektrisch", "stromschlag", "electric", "beruehr", "spannungsfuehr", "koerperdurchstroemung"},
|
|
||||||
{"brand", "feuer", "fire", "kabelbrand", "kurzschluss", "ueberlast", "ueberstrom"},
|
|
||||||
{"verbrenn", "burn", "heiss", "thermisch", "lichtbogen"},
|
|
||||||
{"laerm", "noise", "gehoer", "schall", "dezibel"},
|
|
||||||
{"vibration", "schwing"},
|
|
||||||
{"ergonom", "haltung", "handhabung", "bedien", "bewegungsapparat"},
|
|
||||||
{"kuehlschmierstoff", "kss", "aerosol", "coolant"},
|
|
||||||
{"pneumat", "druckluft", "compressed"},
|
|
||||||
{"hydraul", "druck", "pressure"},
|
|
||||||
{"roboter", "robot", "roboterarm"},
|
|
||||||
{"greifer", "gripper", "schunk"},
|
|
||||||
{"foerderband", "transport", "conveyor"},
|
|
||||||
{"schutzzaun", "schutzgitter", "fence", "guard"},
|
|
||||||
{"werkzeugmaschine", "robodrill", "bearbeitungszentrum", "wzm"},
|
|
||||||
{"stolper", "rutsch", "slip", "trip"},
|
|
||||||
{"leckage", "austreten", "leak"},
|
|
||||||
{"einstich", "puncture", "spritz"},
|
|
||||||
{"isolat", "kriechstrom", "schutzleiter", "erdung", "indirekt"},
|
|
||||||
{"luft", "kriechstreck", "beruehrer", "oberflaeche", "leitfaehig"},
|
|
||||||
{"emv", "strahlung", "radiation", "elektromagnet", "stoereinfluss"},
|
|
||||||
{"eingeschlossen", "eingesperrt", "wiederanlauf", "quittier"},
|
|
||||||
{"zentriergreifer", "zentriereinheit", "zentrieren"},
|
|
||||||
{"beladetuer", "schutztuer", "zugangstuer", "tuerposition"},
|
|
||||||
{"werkstueck", "rohteil", "rohling"},
|
|
||||||
{"ergonom", "einlege", "bedienelemente", "arbeitshoehe", "haltung"},
|
|
||||||
{"boden", "tragfaehig", "einbrech", "fundamentierr"},
|
|
||||||
{"spritzer", "auge", "augenverletz"},
|
|
||||||
{"bersten", "platzen", "abspring"},
|
|
||||||
{"durchschlag", "durchbrech", "begrenz", "bewegungsbereich"},
|
|
||||||
{"potentialausgleich", "potentialunter", "bezugspotential"},
|
|
||||||
{"kriechstreck", "luft-", "kriechst", "dimensionie"},
|
|
||||||
{"kuehlschmierstoff", "kss", "bettspuel", "kuehlung"},
|
|
||||||
{"rutsch", "ausrutsch", "stolper", "gleiten", "nassrutsch"},
|
|
||||||
}
|
|
||||||
|
|
||||||
// CompareBenchmark runs the full comparison between Ground Truth and engine output.
|
// CompareBenchmark runs the full comparison between Ground Truth and engine output.
|
||||||
func CompareBenchmark(gt *GroundTruth, hazards []Hazard, mitigations []Mitigation) *BenchmarkResult {
|
func CompareBenchmark(gt *GroundTruth, hazards []Hazard, mitigations []Mitigation) *BenchmarkResult {
|
||||||
@@ -211,59 +158,121 @@ func CompareBenchmark(gt *GroundTruth, hazards []Hazard, mitigations []Mitigatio
|
|||||||
}
|
}
|
||||||
|
|
||||||
// fuzzyMatchScore computes a 0-1 similarity between a GT entry and an engine hazard.
|
// fuzzyMatchScore computes a 0-1 similarity between a GT entry and an engine hazard.
|
||||||
|
// 4 signals: category (0.2), keywords (0.2), zone (0.3), scenario similarity (0.3).
|
||||||
func fuzzyMatchScore(gt *GroundTruthEntry, h *Hazard) (float64, string) {
|
func fuzzyMatchScore(gt *GroundTruthEntry, h *Hazard) (float64, string) {
|
||||||
var score float64
|
var score float64
|
||||||
var reasons []string
|
var reasons []string
|
||||||
|
|
||||||
// 1. Category match (weight 0.3)
|
// 1. Category match (weight 0.2)
|
||||||
catScore := categoryMatchScore(gt.HazardGroup, h.Category)
|
catScore := categoryMatchScore(gt.HazardGroup, h.Category)
|
||||||
score += 0.3 * catScore
|
score += 0.2 * catScore
|
||||||
if catScore > 0 {
|
if catScore > 0 {
|
||||||
reasons = append(reasons, "Kategorie")
|
reasons = append(reasons, "Kategorie")
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2. Keyword/synonym match on hazard TYPE (weight 0.3)
|
// 2. Keyword/synonym match on hazard TYPE (weight 0.2)
|
||||||
kwScore := keywordMatchScore(gt.HazardType, gt.HazardCause, h.Name, h.Description, h.Scenario)
|
kwScore := keywordMatchScore(gt.HazardType, gt.HazardCause, h.Name, h.Description, h.Scenario)
|
||||||
score += 0.3 * kwScore
|
score += 0.2 * kwScore
|
||||||
if kwScore > 0 {
|
if kwScore > 0 {
|
||||||
reasons = append(reasons, "Keywords")
|
reasons = append(reasons, "Keywords")
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. Component/zone match (weight 0.4 — most important for specificity)
|
// 3. Component/zone match (weight 0.3)
|
||||||
zoneScore := zoneMatchScore(gt.ComponentZone, gt.HazardSubgroup, h.HazardousZone, h.MachineModule)
|
zoneScore := zoneMatchScore(gt.ComponentZone, gt.HazardSubgroup, h.HazardousZone, h.MachineModule)
|
||||||
score += 0.4 * zoneScore
|
score += 0.3 * zoneScore
|
||||||
if zoneScore > 0 {
|
if zoneScore > 0 {
|
||||||
reasons = append(reasons, "Zone")
|
reasons = append(reasons, "Zone")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Penalty: if engine hazard mentions a machine-specific term not in the GT context,
|
// 4. Scenario similarity (weight 0.3) — compares the actual event description
|
||||||
// it's likely a wrong-machine match (e.g. "Spielplatz" for a robot cell GT entry)
|
scenScore := scenarioSimilarity(gt.HazardCause, h.Scenario, h.Name)
|
||||||
|
score += 0.3 * scenScore
|
||||||
|
if scenScore > 0 {
|
||||||
|
reasons = append(reasons, "Szenario")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Penalty: wrong machine term
|
||||||
if hasWrongMachineTerm(h.Name, h.Scenario, gt.HazardCause, gt.ComponentZone) {
|
if hasWrongMachineTerm(h.Name, h.Scenario, gt.HazardCause, gt.ComponentZone) {
|
||||||
score *= 0.3
|
score *= 0.3
|
||||||
reasons = append(reasons, "Strafabzug:FremdMaschine")
|
reasons = append(reasons, "Strafabzug:FremdMaschine")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Minimum keyword overlap required: if GT and Engine share no hazard-type
|
// Penalty: no keyword AND no scenario overlap → unreliable
|
||||||
// keywords at all, the match is unreliable regardless of category/zone score
|
if kwScore == 0 && scenScore == 0 && zoneScore < 0.5 {
|
||||||
if kwScore == 0 && zoneScore < 0.5 {
|
score *= 0.4
|
||||||
score *= 0.5
|
reasons = append(reasons, "Strafabzug:KeinInhalt")
|
||||||
reasons = append(reasons, "Strafabzug:KeineKeywords")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return score, strings.Join(reasons, "+")
|
return score, strings.Join(reasons, "+")
|
||||||
}
|
}
|
||||||
|
|
||||||
// wrongMachineTerms are words in an engine hazard that indicate it's about
|
// scenarioSimilarity compares the GT cause description with the engine scenario.
|
||||||
// a completely different machine type. If the GT entry doesn't mention these,
|
// It extracts "action words" (verbs/descriptors that define WHAT happens) and
|
||||||
// the match is penalized.
|
// checks overlap. This differentiates "eingeklemmt" from "herabfallend" from "durchschlägt".
|
||||||
var wrongMachineTerms = []string{
|
func scenarioSimilarity(gtCause, engScenario, engName string) float64 {
|
||||||
"spielplatz", "fahrtreppe", "trommelwaschmaschine", "umreifungsband",
|
gtText := normalizeDE(gtCause)
|
||||||
"drehteller", "rundtaktanlage", "exzentrisch", "webstuhl",
|
engText := normalizeDE(engScenario + " " + engName)
|
||||||
"aufzug", "rolltreppe", "bagger", "kettensaege", "kreissaege",
|
|
||||||
"druckmaschine", "zentrifuge", "autoklav", "hobel",
|
// Extract action/event words that describe the specific scenario
|
||||||
"naehmaschine", "strickmaschine", "schleifmaschine",
|
gtActions := extractActionWords(gtText)
|
||||||
"gabelstapler", "flurfoerder", "erntemaschine",
|
engActions := extractActionWords(engText)
|
||||||
"kollision zweier roboter",
|
|
||||||
|
if len(gtActions) == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
matched := 0
|
||||||
|
for _, ga := range gtActions {
|
||||||
|
for _, ea := range engActions {
|
||||||
|
if ga == ea || strings.HasPrefix(ea, ga) || strings.HasPrefix(ga, ea) {
|
||||||
|
matched++
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return float64(matched) / float64(len(gtActions))
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractActionWords pulls out verbs and descriptors that define the hazard event.
|
||||||
|
func extractActionWords(text string) []string {
|
||||||
|
// These are the differentiating words between similar-looking hazards
|
||||||
|
actionTerms := []string{
|
||||||
|
"eingeklemmt", "einklemm", "eingeschlossen", "eingesperrt",
|
||||||
|
"herabfall", "herunterfal", "faellt",
|
||||||
|
"durchschlaegt", "durchbrech", "durchschlag",
|
||||||
|
"springt ab", "abspring", "bersten", "platzen",
|
||||||
|
"weggeschleudert", "schleuder",
|
||||||
|
"getroffen", "treff",
|
||||||
|
"greift", "eingreif", "durchgreif", "uebergreif",
|
||||||
|
"beruehrt", "beruehr", "kontakt",
|
||||||
|
"einzug", "erfass", "aufwickel",
|
||||||
|
"stolper", "rutsch", "ausrutsch", "gleiten",
|
||||||
|
"verbren", "heiss",
|
||||||
|
"spritzer", "augenver",
|
||||||
|
"kurzschluss", "ueberstrom", "ueberlast",
|
||||||
|
"isolat", "schutzleiter", "kriechstrom", "kriechstreck",
|
||||||
|
"potentialausgleich", "potentialunter",
|
||||||
|
"emv", "stoereinfluss", "elektromagnet",
|
||||||
|
"leckage", "austret", "undicht",
|
||||||
|
"schutzzaun", "einhausung", "schutztuer",
|
||||||
|
"wiederanlauf", "anlauf", "startet",
|
||||||
|
"teach", "einricht", "programmier",
|
||||||
|
"spannvorricht", "spannfutter", "greiferbacken",
|
||||||
|
"druckluft", "pneumatik", "restdruck",
|
||||||
|
"beladetuer", "werkzeugmaschine", "bearbeitungszelle",
|
||||||
|
"ergonom", "einlege", "bedienelement",
|
||||||
|
"tragfaehig", "boden", "einbrech",
|
||||||
|
}
|
||||||
|
|
||||||
|
var found []string
|
||||||
|
seen := make(map[string]bool)
|
||||||
|
for _, term := range actionTerms {
|
||||||
|
if strings.Contains(text, term) && !seen[term] {
|
||||||
|
seen[term] = true
|
||||||
|
found = append(found, term)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return found
|
||||||
}
|
}
|
||||||
|
|
||||||
func hasWrongMachineTerm(engName, engScenario, gtCause, gtZone string) bool {
|
func hasWrongMachineTerm(engName, engScenario, gtCause, gtZone string) bool {
|
||||||
|
|||||||
@@ -0,0 +1,68 @@
|
|||||||
|
package iace
|
||||||
|
|
||||||
|
// synonymSets groups equivalent hazard terms for keyword matching.
|
||||||
|
var synonymSets = [][]string{
|
||||||
|
{"quetsch", "crush", "einklemm", "klemm"},
|
||||||
|
{"scher", "shear", "absch"},
|
||||||
|
{"schneid", "cut", "schnitt"},
|
||||||
|
{"stoss", "schlag", "impact", "treff", "aufprall"},
|
||||||
|
{"einzug", "fang", "erfass", "entangle", "wickel"},
|
||||||
|
{"elektrisch", "stromschlag", "electric", "beruehr", "spannungsfuehr", "koerperdurchstroemung"},
|
||||||
|
{"brand", "feuer", "fire", "kabelbrand", "kurzschluss", "ueberlast", "ueberstrom"},
|
||||||
|
{"verbrenn", "burn", "heiss", "thermisch", "lichtbogen"},
|
||||||
|
{"laerm", "noise", "gehoer", "schall", "dezibel"},
|
||||||
|
{"vibration", "schwing"},
|
||||||
|
{"ergonom", "haltung", "handhabung", "bedien", "bewegungsapparat"},
|
||||||
|
{"kuehlschmierstoff", "kss", "aerosol", "coolant"},
|
||||||
|
{"pneumat", "druckluft", "compressed"},
|
||||||
|
{"hydraul", "druck", "pressure"},
|
||||||
|
{"roboter", "robot", "roboterarm"},
|
||||||
|
{"greifer", "gripper", "schunk"},
|
||||||
|
{"foerderband", "transport", "conveyor"},
|
||||||
|
{"schutzzaun", "schutzgitter", "fence", "guard"},
|
||||||
|
{"werkzeugmaschine", "robodrill", "bearbeitungszentrum", "wzm"},
|
||||||
|
{"stolper", "rutsch", "slip", "trip"},
|
||||||
|
{"leckage", "austreten", "leak"},
|
||||||
|
{"einstich", "puncture", "spritz"},
|
||||||
|
{"isolat", "kriechstrom", "schutzleiter", "erdung", "indirekt"},
|
||||||
|
{"luft", "kriechstreck", "beruehrer", "oberflaeche", "leitfaehig"},
|
||||||
|
{"emv", "strahlung", "radiation", "elektromagnet", "stoereinfluss"},
|
||||||
|
{"eingeschlossen", "eingesperrt", "wiederanlauf", "quittier"},
|
||||||
|
{"zentriergreifer", "zentriereinheit", "zentrieren"},
|
||||||
|
{"beladetuer", "schutztuer", "zugangstuer", "tuerposition"},
|
||||||
|
{"werkstueck", "rohteil", "rohling"},
|
||||||
|
{"ergonom", "einlege", "bedienelemente", "arbeitshoehe", "haltung"},
|
||||||
|
{"boden", "tragfaehig", "einbrech", "fundamentierr"},
|
||||||
|
{"spritzer", "auge", "augenverletz"},
|
||||||
|
{"bersten", "platzen", "abspring"},
|
||||||
|
{"durchschlag", "durchbrech", "begrenz", "bewegungsbereich"},
|
||||||
|
{"potentialausgleich", "potentialunter", "bezugspotential"},
|
||||||
|
{"kriechstreck", "luft-", "kriechst", "dimensionie"},
|
||||||
|
{"kuehlschmierstoff", "kss", "bettspuel", "kuehlung"},
|
||||||
|
{"rutsch", "ausrutsch", "stolper", "gleiten", "nassrutsch"},
|
||||||
|
}
|
||||||
|
|
||||||
|
// wrongMachineTerms are words in an engine hazard that indicate it's about
|
||||||
|
// a completely different machine type.
|
||||||
|
var wrongMachineTerms = []string{
|
||||||
|
"spielplatz", "fahrtreppe", "trommelwaschmaschine", "umreifungsband",
|
||||||
|
"drehteller", "rundtaktanlage", "exzentrisch", "webstuhl",
|
||||||
|
"aufzug", "rolltreppe", "bagger", "kettensaege", "kreissaege",
|
||||||
|
"druckmaschine", "zentrifuge", "autoklav", "hobel",
|
||||||
|
"naehmaschine", "strickmaschine", "schleifmaschine",
|
||||||
|
"gabelstapler", "flurfoerder", "erntemaschine",
|
||||||
|
"kollision zweier roboter",
|
||||||
|
}
|
||||||
|
|
||||||
|
// categoryMap maps GT hazard_group (German) to engine category prefixes.
|
||||||
|
var categoryMap = map[string][]string{
|
||||||
|
"mechanische gefaehrdungen": {"mechanical"},
|
||||||
|
"elektrische gefaehrdungen": {"electrical"},
|
||||||
|
"thermische gefaehrdungen": {"thermal"},
|
||||||
|
"gefaehrdungen durch laerm": {"noise", "ergonomic"},
|
||||||
|
"gefaehrdungen durch vibration": {"noise", "vibration"},
|
||||||
|
"gefaehrdungen durch strahlung": {"radiation", "emc"},
|
||||||
|
"gefaehrdungen durch materialien und substanzen": {"material", "environmental"},
|
||||||
|
"ergonomische gefaehrdungen": {"ergonomic"},
|
||||||
|
"gefaehrdungen im zusammenhang mit der einsatzumgebung": {"environmental"},
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user