Files
breakpilot-compliance/ai-compliance-sdk/internal/iace/benchmark_synonyms.go
T
Benjamin Admin 003eafa75d fix(iace): synonym-cross-matching + expanded action words
scenarioSimilarity now uses synonym-set cross-matching: if GT says
"durchschlaegt" and Engine says "schleuder", the synonym set recognizes
them as related. Added significantWordOverlap fallback when no action
words found. Extended action terms: schlauch/druck/kuehlschmierstoff,
pumpe/bettspuel, potential/bezugspotential, stoerung/emv.

Moved extractActionWords to benchmark_synonyms.go (458+119 lines).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-15 10:03:23 +02:00

120 lines
5.1 KiB
Go

package iace
import "strings"
// synonymSets groups equivalent hazard terms for keyword matching.
var synonymSets = [][]string{
{"quetsch", "crush", "einklemm", "klemm"},
{"scher", "shear", "absch"},
{"schneid", "cut", "schnitt"},
{"stoss", "schlag", "impact", "treff", "aufprall"},
{"einzug", "fang", "erfass", "entangle", "wickel"},
{"elektrisch", "stromschlag", "electric", "beruehr", "spannungsfuehr", "koerperdurchstroemung"},
{"brand", "feuer", "fire", "kabelbrand", "kurzschluss", "ueberlast", "ueberstrom"},
{"verbrenn", "burn", "heiss", "thermisch", "lichtbogen"},
{"laerm", "noise", "gehoer", "schall", "dezibel"},
{"vibration", "schwing"},
{"ergonom", "haltung", "handhabung", "bedien", "bewegungsapparat"},
{"kuehlschmierstoff", "kss", "aerosol", "coolant"},
{"pneumat", "druckluft", "compressed"},
{"hydraul", "druck", "pressure"},
{"roboter", "robot", "roboterarm"},
{"greifer", "gripper", "schunk"},
{"foerderband", "transport", "conveyor"},
{"schutzzaun", "schutzgitter", "fence", "guard"},
{"werkzeugmaschine", "robodrill", "bearbeitungszentrum", "wzm"},
{"stolper", "rutsch", "slip", "trip"},
{"leckage", "austreten", "leak"},
{"einstich", "puncture", "spritz"},
{"isolat", "kriechstrom", "schutzleiter", "erdung", "indirekt"},
{"luft", "kriechstreck", "beruehrer", "oberflaeche", "leitfaehig"},
{"emv", "strahlung", "radiation", "elektromagnet", "stoereinfluss"},
{"eingeschlossen", "eingesperrt", "wiederanlauf", "quittier"},
{"zentriergreifer", "zentriereinheit", "zentrieren"},
{"beladetuer", "schutztuer", "zugangstuer", "tuerposition"},
{"werkstueck", "rohteil", "rohling"},
{"ergonom", "einlege", "bedienelemente", "arbeitshoehe", "haltung"},
{"boden", "tragfaehig", "einbrech", "fundamentierr"},
{"spritzer", "auge", "augenverletz"},
{"bersten", "platzen", "abspring"},
{"durchschlag", "durchbrech", "begrenz", "bewegungsbereich"},
{"potentialausgleich", "potentialunter", "bezugspotential"},
{"kriechstreck", "luft-", "kriechst", "dimensionie"},
{"kuehlschmierstoff", "kss", "bettspuel", "kuehlung"},
{"rutsch", "ausrutsch", "stolper", "gleiten", "nassrutsch"},
}
// wrongMachineTerms are words in an engine hazard that indicate it's about
// a completely different machine type.
var wrongMachineTerms = []string{
"spielplatz", "fahrtreppe", "trommelwaschmaschine", "umreifungsband",
"drehteller", "rundtaktanlage", "exzentrisch", "webstuhl",
"aufzug", "rolltreppe", "bagger", "kettensaege", "kreissaege",
"druckmaschine", "zentrifuge", "autoklav", "hobel",
"naehmaschine", "strickmaschine", "schleifmaschine",
"gabelstapler", "flurfoerder", "erntemaschine",
"kollision zweier roboter",
}
// categoryMap maps GT hazard_group (German) to engine category prefixes.
var categoryMap = map[string][]string{
"mechanische gefaehrdungen": {"mechanical"},
"elektrische gefaehrdungen": {"electrical"},
"thermische gefaehrdungen": {"thermal"},
"gefaehrdungen durch laerm": {"noise", "ergonomic"},
"gefaehrdungen durch vibration": {"noise", "vibration"},
"gefaehrdungen durch strahlung": {"radiation", "emc"},
"gefaehrdungen durch materialien und substanzen": {"material", "environmental"},
"ergonomische gefaehrdungen": {"ergonomic"},
"gefaehrdungen im zusammenhang mit der einsatzumgebung": {"environmental"},
}
// extractActionWords pulls out verbs and descriptors that define the hazard event.
func extractActionWords(text string) []string {
// These are the differentiating words between similar-looking hazards
actionTerms := []string{
"eingeklemmt", "einklemm", "eingeschlossen", "eingesperrt",
"herabfall", "herunterfal", "faellt",
"durchschlaegt", "durchbrech", "durchschlag",
"springt ab", "abspring", "bersten", "platzen",
"weggeschleudert", "schleuder",
"getroffen", "treff",
"greift", "eingreif", "durchgreif", "uebergreif",
"beruehrt", "beruehr", "kontakt",
"einzug", "erfass", "aufwickel",
"stolper", "rutsch", "ausrutsch", "gleiten",
"verbren", "heiss",
"spritzer", "augenver",
"kurzschluss", "ueberstrom", "ueberlast",
"isolat", "schutzleiter", "kriechstrom", "kriechstreck",
"potentialausgleich", "potentialunter", "bezugspotential", "potential",
"emv", "stoereinfluss", "elektromagnet", "stoerung",
"leckage", "austret", "undicht",
"schutzzaun", "einhausung", "schutztuer",
"wiederanlauf", "anlauf", "startet",
"teach", "einricht", "programmier",
"spannvorricht", "spannfutter", "greiferbacken",
"druckluft", "pneumatik", "restdruck",
"beladetuer", "werkzeugmaschine", "bearbeitungszelle",
"ergonom", "einlege", "bedienelement",
"tragfaehig", "boden", "einbrech",
// Additional terms for remaining GT gaps
"schlauch", "druck", "kuehlschmierstoff",
"bettspuel", "pumpe", "niederdruck",
"luft-", "dimensionie",
"anlagenteile", "energieversorgung",
"greifer", "werkzeug",
}
var found []string
seen := make(map[string]bool)
for _, term := range actionTerms {
if strings.Contains(text, term) && !seen[term] {
seen[term] = true
found = append(found, term)
}
}
return found
}