feat(iace): scenario-based matching + split benchmark_synonyms.go

4-signal matcher: category (0.2), keywords (0.2), zone (0.3),
scenario similarity (0.3). Scenario signal extracts action words
(eingeklemmt vs herabfallend vs durchschlaegt) to differentiate
similar-looking hazards at the same component.

Split benchmark_synonyms.go (70 lines) from benchmark_matcher.go
(516→450 lines) to stay under 500-line cap.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-05-15 09:58:12 +02:00
parent c060ac222a
commit b82853a95b
2 changed files with 155 additions and 78 deletions
@@ -11,60 +11,7 @@ import (
const matchThreshold = 0.20 const matchThreshold = 0.20
// categoryMap maps GT hazard_group (German) to engine category prefixes. // categoryMap, synonymSets, wrongMachineTerms → benchmark_synonyms.go
var categoryMap = map[string][]string{
"mechanische gefaehrdungen": {"mechanical"},
"elektrische gefaehrdungen": {"electrical"},
"thermische gefaehrdungen": {"thermal"},
"gefaehrdungen durch laerm": {"noise", "ergonomic"},
"gefaehrdungen durch vibration": {"noise", "vibration"},
"gefaehrdungen durch strahlung": {"radiation", "emc"},
"gefaehrdungen durch materialien und substanzen": {"material", "environmental"},
"ergonomische gefaehrdungen": {"ergonomic"},
"gefaehrdungen im zusammenhang mit der einsatzumgebung": {"environmental"},
}
// synonymSets groups equivalent hazard terms for keyword matching.
var synonymSets = [][]string{
{"quetsch", "crush", "einklemm", "klemm"},
{"scher", "shear", "absch"},
{"schneid", "cut", "schnitt"},
{"stoss", "schlag", "impact", "treff", "aufprall"},
{"einzug", "fang", "erfass", "entangle", "wickel"},
{"elektrisch", "stromschlag", "electric", "beruehr", "spannungsfuehr", "koerperdurchstroemung"},
{"brand", "feuer", "fire", "kabelbrand", "kurzschluss", "ueberlast", "ueberstrom"},
{"verbrenn", "burn", "heiss", "thermisch", "lichtbogen"},
{"laerm", "noise", "gehoer", "schall", "dezibel"},
{"vibration", "schwing"},
{"ergonom", "haltung", "handhabung", "bedien", "bewegungsapparat"},
{"kuehlschmierstoff", "kss", "aerosol", "coolant"},
{"pneumat", "druckluft", "compressed"},
{"hydraul", "druck", "pressure"},
{"roboter", "robot", "roboterarm"},
{"greifer", "gripper", "schunk"},
{"foerderband", "transport", "conveyor"},
{"schutzzaun", "schutzgitter", "fence", "guard"},
{"werkzeugmaschine", "robodrill", "bearbeitungszentrum", "wzm"},
{"stolper", "rutsch", "slip", "trip"},
{"leckage", "austreten", "leak"},
{"einstich", "puncture", "spritz"},
{"isolat", "kriechstrom", "schutzleiter", "erdung", "indirekt"},
{"luft", "kriechstreck", "beruehrer", "oberflaeche", "leitfaehig"},
{"emv", "strahlung", "radiation", "elektromagnet", "stoereinfluss"},
{"eingeschlossen", "eingesperrt", "wiederanlauf", "quittier"},
{"zentriergreifer", "zentriereinheit", "zentrieren"},
{"beladetuer", "schutztuer", "zugangstuer", "tuerposition"},
{"werkstueck", "rohteil", "rohling"},
{"ergonom", "einlege", "bedienelemente", "arbeitshoehe", "haltung"},
{"boden", "tragfaehig", "einbrech", "fundamentierr"},
{"spritzer", "auge", "augenverletz"},
{"bersten", "platzen", "abspring"},
{"durchschlag", "durchbrech", "begrenz", "bewegungsbereich"},
{"potentialausgleich", "potentialunter", "bezugspotential"},
{"kriechstreck", "luft-", "kriechst", "dimensionie"},
{"kuehlschmierstoff", "kss", "bettspuel", "kuehlung"},
{"rutsch", "ausrutsch", "stolper", "gleiten", "nassrutsch"},
}
// CompareBenchmark runs the full comparison between Ground Truth and engine output. // CompareBenchmark runs the full comparison between Ground Truth and engine output.
func CompareBenchmark(gt *GroundTruth, hazards []Hazard, mitigations []Mitigation) *BenchmarkResult { func CompareBenchmark(gt *GroundTruth, hazards []Hazard, mitigations []Mitigation) *BenchmarkResult {
@@ -211,59 +158,121 @@ func CompareBenchmark(gt *GroundTruth, hazards []Hazard, mitigations []Mitigatio
} }
// fuzzyMatchScore computes a 0-1 similarity between a GT entry and an engine hazard. // fuzzyMatchScore computes a 0-1 similarity between a GT entry and an engine hazard.
// 4 signals: category (0.2), keywords (0.2), zone (0.3), scenario similarity (0.3).
func fuzzyMatchScore(gt *GroundTruthEntry, h *Hazard) (float64, string) { func fuzzyMatchScore(gt *GroundTruthEntry, h *Hazard) (float64, string) {
var score float64 var score float64
var reasons []string var reasons []string
// 1. Category match (weight 0.3) // 1. Category match (weight 0.2)
catScore := categoryMatchScore(gt.HazardGroup, h.Category) catScore := categoryMatchScore(gt.HazardGroup, h.Category)
score += 0.3 * catScore score += 0.2 * catScore
if catScore > 0 { if catScore > 0 {
reasons = append(reasons, "Kategorie") reasons = append(reasons, "Kategorie")
} }
// 2. Keyword/synonym match on hazard TYPE (weight 0.3) // 2. Keyword/synonym match on hazard TYPE (weight 0.2)
kwScore := keywordMatchScore(gt.HazardType, gt.HazardCause, h.Name, h.Description, h.Scenario) kwScore := keywordMatchScore(gt.HazardType, gt.HazardCause, h.Name, h.Description, h.Scenario)
score += 0.3 * kwScore score += 0.2 * kwScore
if kwScore > 0 { if kwScore > 0 {
reasons = append(reasons, "Keywords") reasons = append(reasons, "Keywords")
} }
// 3. Component/zone match (weight 0.4 — most important for specificity) // 3. Component/zone match (weight 0.3)
zoneScore := zoneMatchScore(gt.ComponentZone, gt.HazardSubgroup, h.HazardousZone, h.MachineModule) zoneScore := zoneMatchScore(gt.ComponentZone, gt.HazardSubgroup, h.HazardousZone, h.MachineModule)
score += 0.4 * zoneScore score += 0.3 * zoneScore
if zoneScore > 0 { if zoneScore > 0 {
reasons = append(reasons, "Zone") reasons = append(reasons, "Zone")
} }
// Penalty: if engine hazard mentions a machine-specific term not in the GT context, // 4. Scenario similarity (weight 0.3) — compares the actual event description
// it's likely a wrong-machine match (e.g. "Spielplatz" for a robot cell GT entry) scenScore := scenarioSimilarity(gt.HazardCause, h.Scenario, h.Name)
score += 0.3 * scenScore
if scenScore > 0 {
reasons = append(reasons, "Szenario")
}
// Penalty: wrong machine term
if hasWrongMachineTerm(h.Name, h.Scenario, gt.HazardCause, gt.ComponentZone) { if hasWrongMachineTerm(h.Name, h.Scenario, gt.HazardCause, gt.ComponentZone) {
score *= 0.3 score *= 0.3
reasons = append(reasons, "Strafabzug:FremdMaschine") reasons = append(reasons, "Strafabzug:FremdMaschine")
} }
// Minimum keyword overlap required: if GT and Engine share no hazard-type // Penalty: no keyword AND no scenario overlap → unreliable
// keywords at all, the match is unreliable regardless of category/zone score if kwScore == 0 && scenScore == 0 && zoneScore < 0.5 {
if kwScore == 0 && zoneScore < 0.5 { score *= 0.4
score *= 0.5 reasons = append(reasons, "Strafabzug:KeinInhalt")
reasons = append(reasons, "Strafabzug:KeineKeywords")
} }
return score, strings.Join(reasons, "+") return score, strings.Join(reasons, "+")
} }
// wrongMachineTerms are words in an engine hazard that indicate it's about // scenarioSimilarity compares the GT cause description with the engine scenario.
// a completely different machine type. If the GT entry doesn't mention these, // It extracts "action words" (verbs/descriptors that define WHAT happens) and
// the match is penalized. // checks overlap. This differentiates "eingeklemmt" from "herabfallend" from "durchschlägt".
var wrongMachineTerms = []string{ func scenarioSimilarity(gtCause, engScenario, engName string) float64 {
"spielplatz", "fahrtreppe", "trommelwaschmaschine", "umreifungsband", gtText := normalizeDE(gtCause)
"drehteller", "rundtaktanlage", "exzentrisch", "webstuhl", engText := normalizeDE(engScenario + " " + engName)
"aufzug", "rolltreppe", "bagger", "kettensaege", "kreissaege",
"druckmaschine", "zentrifuge", "autoklav", "hobel", // Extract action/event words that describe the specific scenario
"naehmaschine", "strickmaschine", "schleifmaschine", gtActions := extractActionWords(gtText)
"gabelstapler", "flurfoerder", "erntemaschine", engActions := extractActionWords(engText)
"kollision zweier roboter",
if len(gtActions) == 0 {
return 0
}
matched := 0
for _, ga := range gtActions {
for _, ea := range engActions {
if ga == ea || strings.HasPrefix(ea, ga) || strings.HasPrefix(ga, ea) {
matched++
break
}
}
}
return float64(matched) / float64(len(gtActions))
}
// extractActionWords pulls out verbs and descriptors that define the hazard event.
func extractActionWords(text string) []string {
// These are the differentiating words between similar-looking hazards
actionTerms := []string{
"eingeklemmt", "einklemm", "eingeschlossen", "eingesperrt",
"herabfall", "herunterfal", "faellt",
"durchschlaegt", "durchbrech", "durchschlag",
"springt ab", "abspring", "bersten", "platzen",
"weggeschleudert", "schleuder",
"getroffen", "treff",
"greift", "eingreif", "durchgreif", "uebergreif",
"beruehrt", "beruehr", "kontakt",
"einzug", "erfass", "aufwickel",
"stolper", "rutsch", "ausrutsch", "gleiten",
"verbren", "heiss",
"spritzer", "augenver",
"kurzschluss", "ueberstrom", "ueberlast",
"isolat", "schutzleiter", "kriechstrom", "kriechstreck",
"potentialausgleich", "potentialunter",
"emv", "stoereinfluss", "elektromagnet",
"leckage", "austret", "undicht",
"schutzzaun", "einhausung", "schutztuer",
"wiederanlauf", "anlauf", "startet",
"teach", "einricht", "programmier",
"spannvorricht", "spannfutter", "greiferbacken",
"druckluft", "pneumatik", "restdruck",
"beladetuer", "werkzeugmaschine", "bearbeitungszelle",
"ergonom", "einlege", "bedienelement",
"tragfaehig", "boden", "einbrech",
}
var found []string
seen := make(map[string]bool)
for _, term := range actionTerms {
if strings.Contains(text, term) && !seen[term] {
seen[term] = true
found = append(found, term)
}
}
return found
} }
func hasWrongMachineTerm(engName, engScenario, gtCause, gtZone string) bool { func hasWrongMachineTerm(engName, engScenario, gtCause, gtZone string) bool {
@@ -0,0 +1,68 @@
package iace
// synonymSets groups equivalent hazard terms for keyword matching.
var synonymSets = [][]string{
{"quetsch", "crush", "einklemm", "klemm"},
{"scher", "shear", "absch"},
{"schneid", "cut", "schnitt"},
{"stoss", "schlag", "impact", "treff", "aufprall"},
{"einzug", "fang", "erfass", "entangle", "wickel"},
{"elektrisch", "stromschlag", "electric", "beruehr", "spannungsfuehr", "koerperdurchstroemung"},
{"brand", "feuer", "fire", "kabelbrand", "kurzschluss", "ueberlast", "ueberstrom"},
{"verbrenn", "burn", "heiss", "thermisch", "lichtbogen"},
{"laerm", "noise", "gehoer", "schall", "dezibel"},
{"vibration", "schwing"},
{"ergonom", "haltung", "handhabung", "bedien", "bewegungsapparat"},
{"kuehlschmierstoff", "kss", "aerosol", "coolant"},
{"pneumat", "druckluft", "compressed"},
{"hydraul", "druck", "pressure"},
{"roboter", "robot", "roboterarm"},
{"greifer", "gripper", "schunk"},
{"foerderband", "transport", "conveyor"},
{"schutzzaun", "schutzgitter", "fence", "guard"},
{"werkzeugmaschine", "robodrill", "bearbeitungszentrum", "wzm"},
{"stolper", "rutsch", "slip", "trip"},
{"leckage", "austreten", "leak"},
{"einstich", "puncture", "spritz"},
{"isolat", "kriechstrom", "schutzleiter", "erdung", "indirekt"},
{"luft", "kriechstreck", "beruehrer", "oberflaeche", "leitfaehig"},
{"emv", "strahlung", "radiation", "elektromagnet", "stoereinfluss"},
{"eingeschlossen", "eingesperrt", "wiederanlauf", "quittier"},
{"zentriergreifer", "zentriereinheit", "zentrieren"},
{"beladetuer", "schutztuer", "zugangstuer", "tuerposition"},
{"werkstueck", "rohteil", "rohling"},
{"ergonom", "einlege", "bedienelemente", "arbeitshoehe", "haltung"},
{"boden", "tragfaehig", "einbrech", "fundamentierr"},
{"spritzer", "auge", "augenverletz"},
{"bersten", "platzen", "abspring"},
{"durchschlag", "durchbrech", "begrenz", "bewegungsbereich"},
{"potentialausgleich", "potentialunter", "bezugspotential"},
{"kriechstreck", "luft-", "kriechst", "dimensionie"},
{"kuehlschmierstoff", "kss", "bettspuel", "kuehlung"},
{"rutsch", "ausrutsch", "stolper", "gleiten", "nassrutsch"},
}
// wrongMachineTerms are words in an engine hazard that indicate it's about
// a completely different machine type.
var wrongMachineTerms = []string{
"spielplatz", "fahrtreppe", "trommelwaschmaschine", "umreifungsband",
"drehteller", "rundtaktanlage", "exzentrisch", "webstuhl",
"aufzug", "rolltreppe", "bagger", "kettensaege", "kreissaege",
"druckmaschine", "zentrifuge", "autoklav", "hobel",
"naehmaschine", "strickmaschine", "schleifmaschine",
"gabelstapler", "flurfoerder", "erntemaschine",
"kollision zweier roboter",
}
// categoryMap maps GT hazard_group (German) to engine category prefixes.
var categoryMap = map[string][]string{
"mechanische gefaehrdungen": {"mechanical"},
"elektrische gefaehrdungen": {"electrical"},
"thermische gefaehrdungen": {"thermal"},
"gefaehrdungen durch laerm": {"noise", "ergonomic"},
"gefaehrdungen durch vibration": {"noise", "vibration"},
"gefaehrdungen durch strahlung": {"radiation", "emc"},
"gefaehrdungen durch materialien und substanzen": {"material", "environmental"},
"ergonomische gefaehrdungen": {"ergonomic"},
"gefaehrdungen im zusammenhang mit der einsatzumgebung": {"environmental"},
}