feat(iace): scenario-based matching + split benchmark_synonyms.go
4-signal matcher: category (0.2), keywords (0.2), zone (0.3), scenario similarity (0.3). Scenario signal extracts action words (eingeklemmt vs herabfallend vs durchschlaegt) to differentiate similar-looking hazards at the same component. Split benchmark_synonyms.go (70 lines) from benchmark_matcher.go (516→450 lines) to stay under 500-line cap. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -11,60 +11,7 @@ import (
|
||||
|
||||
const matchThreshold = 0.20
|
||||
|
||||
// categoryMap maps GT hazard_group (German) to engine category prefixes.
|
||||
var categoryMap = map[string][]string{
|
||||
"mechanische gefaehrdungen": {"mechanical"},
|
||||
"elektrische gefaehrdungen": {"electrical"},
|
||||
"thermische gefaehrdungen": {"thermal"},
|
||||
"gefaehrdungen durch laerm": {"noise", "ergonomic"},
|
||||
"gefaehrdungen durch vibration": {"noise", "vibration"},
|
||||
"gefaehrdungen durch strahlung": {"radiation", "emc"},
|
||||
"gefaehrdungen durch materialien und substanzen": {"material", "environmental"},
|
||||
"ergonomische gefaehrdungen": {"ergonomic"},
|
||||
"gefaehrdungen im zusammenhang mit der einsatzumgebung": {"environmental"},
|
||||
}
|
||||
|
||||
// synonymSets groups equivalent hazard terms for keyword matching.
|
||||
var synonymSets = [][]string{
|
||||
{"quetsch", "crush", "einklemm", "klemm"},
|
||||
{"scher", "shear", "absch"},
|
||||
{"schneid", "cut", "schnitt"},
|
||||
{"stoss", "schlag", "impact", "treff", "aufprall"},
|
||||
{"einzug", "fang", "erfass", "entangle", "wickel"},
|
||||
{"elektrisch", "stromschlag", "electric", "beruehr", "spannungsfuehr", "koerperdurchstroemung"},
|
||||
{"brand", "feuer", "fire", "kabelbrand", "kurzschluss", "ueberlast", "ueberstrom"},
|
||||
{"verbrenn", "burn", "heiss", "thermisch", "lichtbogen"},
|
||||
{"laerm", "noise", "gehoer", "schall", "dezibel"},
|
||||
{"vibration", "schwing"},
|
||||
{"ergonom", "haltung", "handhabung", "bedien", "bewegungsapparat"},
|
||||
{"kuehlschmierstoff", "kss", "aerosol", "coolant"},
|
||||
{"pneumat", "druckluft", "compressed"},
|
||||
{"hydraul", "druck", "pressure"},
|
||||
{"roboter", "robot", "roboterarm"},
|
||||
{"greifer", "gripper", "schunk"},
|
||||
{"foerderband", "transport", "conveyor"},
|
||||
{"schutzzaun", "schutzgitter", "fence", "guard"},
|
||||
{"werkzeugmaschine", "robodrill", "bearbeitungszentrum", "wzm"},
|
||||
{"stolper", "rutsch", "slip", "trip"},
|
||||
{"leckage", "austreten", "leak"},
|
||||
{"einstich", "puncture", "spritz"},
|
||||
{"isolat", "kriechstrom", "schutzleiter", "erdung", "indirekt"},
|
||||
{"luft", "kriechstreck", "beruehrer", "oberflaeche", "leitfaehig"},
|
||||
{"emv", "strahlung", "radiation", "elektromagnet", "stoereinfluss"},
|
||||
{"eingeschlossen", "eingesperrt", "wiederanlauf", "quittier"},
|
||||
{"zentriergreifer", "zentriereinheit", "zentrieren"},
|
||||
{"beladetuer", "schutztuer", "zugangstuer", "tuerposition"},
|
||||
{"werkstueck", "rohteil", "rohling"},
|
||||
{"ergonom", "einlege", "bedienelemente", "arbeitshoehe", "haltung"},
|
||||
{"boden", "tragfaehig", "einbrech", "fundamentierr"},
|
||||
{"spritzer", "auge", "augenverletz"},
|
||||
{"bersten", "platzen", "abspring"},
|
||||
{"durchschlag", "durchbrech", "begrenz", "bewegungsbereich"},
|
||||
{"potentialausgleich", "potentialunter", "bezugspotential"},
|
||||
{"kriechstreck", "luft-", "kriechst", "dimensionie"},
|
||||
{"kuehlschmierstoff", "kss", "bettspuel", "kuehlung"},
|
||||
{"rutsch", "ausrutsch", "stolper", "gleiten", "nassrutsch"},
|
||||
}
|
||||
// categoryMap, synonymSets, wrongMachineTerms → benchmark_synonyms.go
|
||||
|
||||
// CompareBenchmark runs the full comparison between Ground Truth and engine output.
|
||||
func CompareBenchmark(gt *GroundTruth, hazards []Hazard, mitigations []Mitigation) *BenchmarkResult {
|
||||
@@ -211,59 +158,121 @@ func CompareBenchmark(gt *GroundTruth, hazards []Hazard, mitigations []Mitigatio
|
||||
}
|
||||
|
||||
// fuzzyMatchScore computes a 0-1 similarity between a GT entry and an engine hazard.
|
||||
// 4 signals: category (0.2), keywords (0.2), zone (0.3), scenario similarity (0.3).
|
||||
func fuzzyMatchScore(gt *GroundTruthEntry, h *Hazard) (float64, string) {
|
||||
var score float64
|
||||
var reasons []string
|
||||
|
||||
// 1. Category match (weight 0.3)
|
||||
// 1. Category match (weight 0.2)
|
||||
catScore := categoryMatchScore(gt.HazardGroup, h.Category)
|
||||
score += 0.3 * catScore
|
||||
score += 0.2 * catScore
|
||||
if catScore > 0 {
|
||||
reasons = append(reasons, "Kategorie")
|
||||
}
|
||||
|
||||
// 2. Keyword/synonym match on hazard TYPE (weight 0.3)
|
||||
// 2. Keyword/synonym match on hazard TYPE (weight 0.2)
|
||||
kwScore := keywordMatchScore(gt.HazardType, gt.HazardCause, h.Name, h.Description, h.Scenario)
|
||||
score += 0.3 * kwScore
|
||||
score += 0.2 * kwScore
|
||||
if kwScore > 0 {
|
||||
reasons = append(reasons, "Keywords")
|
||||
}
|
||||
|
||||
// 3. Component/zone match (weight 0.4 — most important for specificity)
|
||||
// 3. Component/zone match (weight 0.3)
|
||||
zoneScore := zoneMatchScore(gt.ComponentZone, gt.HazardSubgroup, h.HazardousZone, h.MachineModule)
|
||||
score += 0.4 * zoneScore
|
||||
score += 0.3 * zoneScore
|
||||
if zoneScore > 0 {
|
||||
reasons = append(reasons, "Zone")
|
||||
}
|
||||
|
||||
// Penalty: if engine hazard mentions a machine-specific term not in the GT context,
|
||||
// it's likely a wrong-machine match (e.g. "Spielplatz" for a robot cell GT entry)
|
||||
// 4. Scenario similarity (weight 0.3) — compares the actual event description
|
||||
scenScore := scenarioSimilarity(gt.HazardCause, h.Scenario, h.Name)
|
||||
score += 0.3 * scenScore
|
||||
if scenScore > 0 {
|
||||
reasons = append(reasons, "Szenario")
|
||||
}
|
||||
|
||||
// Penalty: wrong machine term
|
||||
if hasWrongMachineTerm(h.Name, h.Scenario, gt.HazardCause, gt.ComponentZone) {
|
||||
score *= 0.3
|
||||
reasons = append(reasons, "Strafabzug:FremdMaschine")
|
||||
}
|
||||
|
||||
// Minimum keyword overlap required: if GT and Engine share no hazard-type
|
||||
// keywords at all, the match is unreliable regardless of category/zone score
|
||||
if kwScore == 0 && zoneScore < 0.5 {
|
||||
score *= 0.5
|
||||
reasons = append(reasons, "Strafabzug:KeineKeywords")
|
||||
// Penalty: no keyword AND no scenario overlap → unreliable
|
||||
if kwScore == 0 && scenScore == 0 && zoneScore < 0.5 {
|
||||
score *= 0.4
|
||||
reasons = append(reasons, "Strafabzug:KeinInhalt")
|
||||
}
|
||||
|
||||
return score, strings.Join(reasons, "+")
|
||||
}
|
||||
|
||||
// wrongMachineTerms are words in an engine hazard that indicate it's about
|
||||
// a completely different machine type. If the GT entry doesn't mention these,
|
||||
// the match is penalized.
|
||||
var wrongMachineTerms = []string{
|
||||
"spielplatz", "fahrtreppe", "trommelwaschmaschine", "umreifungsband",
|
||||
"drehteller", "rundtaktanlage", "exzentrisch", "webstuhl",
|
||||
"aufzug", "rolltreppe", "bagger", "kettensaege", "kreissaege",
|
||||
"druckmaschine", "zentrifuge", "autoklav", "hobel",
|
||||
"naehmaschine", "strickmaschine", "schleifmaschine",
|
||||
"gabelstapler", "flurfoerder", "erntemaschine",
|
||||
"kollision zweier roboter",
|
||||
// scenarioSimilarity compares the GT cause description with the engine scenario.
|
||||
// It extracts "action words" (verbs/descriptors that define WHAT happens) and
|
||||
// checks overlap. This differentiates "eingeklemmt" from "herabfallend" from "durchschlägt".
|
||||
func scenarioSimilarity(gtCause, engScenario, engName string) float64 {
|
||||
gtText := normalizeDE(gtCause)
|
||||
engText := normalizeDE(engScenario + " " + engName)
|
||||
|
||||
// Extract action/event words that describe the specific scenario
|
||||
gtActions := extractActionWords(gtText)
|
||||
engActions := extractActionWords(engText)
|
||||
|
||||
if len(gtActions) == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
matched := 0
|
||||
for _, ga := range gtActions {
|
||||
for _, ea := range engActions {
|
||||
if ga == ea || strings.HasPrefix(ea, ga) || strings.HasPrefix(ga, ea) {
|
||||
matched++
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return float64(matched) / float64(len(gtActions))
|
||||
}
|
||||
|
||||
// extractActionWords pulls out verbs and descriptors that define the hazard event.
|
||||
func extractActionWords(text string) []string {
|
||||
// These are the differentiating words between similar-looking hazards
|
||||
actionTerms := []string{
|
||||
"eingeklemmt", "einklemm", "eingeschlossen", "eingesperrt",
|
||||
"herabfall", "herunterfal", "faellt",
|
||||
"durchschlaegt", "durchbrech", "durchschlag",
|
||||
"springt ab", "abspring", "bersten", "platzen",
|
||||
"weggeschleudert", "schleuder",
|
||||
"getroffen", "treff",
|
||||
"greift", "eingreif", "durchgreif", "uebergreif",
|
||||
"beruehrt", "beruehr", "kontakt",
|
||||
"einzug", "erfass", "aufwickel",
|
||||
"stolper", "rutsch", "ausrutsch", "gleiten",
|
||||
"verbren", "heiss",
|
||||
"spritzer", "augenver",
|
||||
"kurzschluss", "ueberstrom", "ueberlast",
|
||||
"isolat", "schutzleiter", "kriechstrom", "kriechstreck",
|
||||
"potentialausgleich", "potentialunter",
|
||||
"emv", "stoereinfluss", "elektromagnet",
|
||||
"leckage", "austret", "undicht",
|
||||
"schutzzaun", "einhausung", "schutztuer",
|
||||
"wiederanlauf", "anlauf", "startet",
|
||||
"teach", "einricht", "programmier",
|
||||
"spannvorricht", "spannfutter", "greiferbacken",
|
||||
"druckluft", "pneumatik", "restdruck",
|
||||
"beladetuer", "werkzeugmaschine", "bearbeitungszelle",
|
||||
"ergonom", "einlege", "bedienelement",
|
||||
"tragfaehig", "boden", "einbrech",
|
||||
}
|
||||
|
||||
var found []string
|
||||
seen := make(map[string]bool)
|
||||
for _, term := range actionTerms {
|
||||
if strings.Contains(text, term) && !seen[term] {
|
||||
seen[term] = true
|
||||
found = append(found, term)
|
||||
}
|
||||
}
|
||||
return found
|
||||
}
|
||||
|
||||
func hasWrongMachineTerm(engName, engScenario, gtCause, gtZone string) bool {
|
||||
|
||||
@@ -0,0 +1,68 @@
|
||||
package iace
|
||||
|
||||
// synonymSets groups equivalent hazard terms for keyword matching.
|
||||
var synonymSets = [][]string{
|
||||
{"quetsch", "crush", "einklemm", "klemm"},
|
||||
{"scher", "shear", "absch"},
|
||||
{"schneid", "cut", "schnitt"},
|
||||
{"stoss", "schlag", "impact", "treff", "aufprall"},
|
||||
{"einzug", "fang", "erfass", "entangle", "wickel"},
|
||||
{"elektrisch", "stromschlag", "electric", "beruehr", "spannungsfuehr", "koerperdurchstroemung"},
|
||||
{"brand", "feuer", "fire", "kabelbrand", "kurzschluss", "ueberlast", "ueberstrom"},
|
||||
{"verbrenn", "burn", "heiss", "thermisch", "lichtbogen"},
|
||||
{"laerm", "noise", "gehoer", "schall", "dezibel"},
|
||||
{"vibration", "schwing"},
|
||||
{"ergonom", "haltung", "handhabung", "bedien", "bewegungsapparat"},
|
||||
{"kuehlschmierstoff", "kss", "aerosol", "coolant"},
|
||||
{"pneumat", "druckluft", "compressed"},
|
||||
{"hydraul", "druck", "pressure"},
|
||||
{"roboter", "robot", "roboterarm"},
|
||||
{"greifer", "gripper", "schunk"},
|
||||
{"foerderband", "transport", "conveyor"},
|
||||
{"schutzzaun", "schutzgitter", "fence", "guard"},
|
||||
{"werkzeugmaschine", "robodrill", "bearbeitungszentrum", "wzm"},
|
||||
{"stolper", "rutsch", "slip", "trip"},
|
||||
{"leckage", "austreten", "leak"},
|
||||
{"einstich", "puncture", "spritz"},
|
||||
{"isolat", "kriechstrom", "schutzleiter", "erdung", "indirekt"},
|
||||
{"luft", "kriechstreck", "beruehrer", "oberflaeche", "leitfaehig"},
|
||||
{"emv", "strahlung", "radiation", "elektromagnet", "stoereinfluss"},
|
||||
{"eingeschlossen", "eingesperrt", "wiederanlauf", "quittier"},
|
||||
{"zentriergreifer", "zentriereinheit", "zentrieren"},
|
||||
{"beladetuer", "schutztuer", "zugangstuer", "tuerposition"},
|
||||
{"werkstueck", "rohteil", "rohling"},
|
||||
{"ergonom", "einlege", "bedienelemente", "arbeitshoehe", "haltung"},
|
||||
{"boden", "tragfaehig", "einbrech", "fundamentierr"},
|
||||
{"spritzer", "auge", "augenverletz"},
|
||||
{"bersten", "platzen", "abspring"},
|
||||
{"durchschlag", "durchbrech", "begrenz", "bewegungsbereich"},
|
||||
{"potentialausgleich", "potentialunter", "bezugspotential"},
|
||||
{"kriechstreck", "luft-", "kriechst", "dimensionie"},
|
||||
{"kuehlschmierstoff", "kss", "bettspuel", "kuehlung"},
|
||||
{"rutsch", "ausrutsch", "stolper", "gleiten", "nassrutsch"},
|
||||
}
|
||||
|
||||
// wrongMachineTerms are words in an engine hazard that indicate it's about
|
||||
// a completely different machine type.
|
||||
var wrongMachineTerms = []string{
|
||||
"spielplatz", "fahrtreppe", "trommelwaschmaschine", "umreifungsband",
|
||||
"drehteller", "rundtaktanlage", "exzentrisch", "webstuhl",
|
||||
"aufzug", "rolltreppe", "bagger", "kettensaege", "kreissaege",
|
||||
"druckmaschine", "zentrifuge", "autoklav", "hobel",
|
||||
"naehmaschine", "strickmaschine", "schleifmaschine",
|
||||
"gabelstapler", "flurfoerder", "erntemaschine",
|
||||
"kollision zweier roboter",
|
||||
}
|
||||
|
||||
// categoryMap maps GT hazard_group (German) to engine category prefixes.
|
||||
var categoryMap = map[string][]string{
|
||||
"mechanische gefaehrdungen": {"mechanical"},
|
||||
"elektrische gefaehrdungen": {"electrical"},
|
||||
"thermische gefaehrdungen": {"thermal"},
|
||||
"gefaehrdungen durch laerm": {"noise", "ergonomic"},
|
||||
"gefaehrdungen durch vibration": {"noise", "vibration"},
|
||||
"gefaehrdungen durch strahlung": {"radiation", "emc"},
|
||||
"gefaehrdungen durch materialien und substanzen": {"material", "environmental"},
|
||||
"ergonomische gefaehrdungen": {"ergonomic"},
|
||||
"gefaehrdungen im zusammenhang mit der einsatzumgebung": {"environmental"},
|
||||
}
|
||||
Reference in New Issue
Block a user