diff --git a/ai-compliance-sdk/internal/api/handlers/iace_handler_init_helpers.go b/ai-compliance-sdk/internal/api/handlers/iace_handler_init_helpers.go index f32fa7b..e469d9d 100644 --- a/ai-compliance-sdk/internal/api/handlers/iace_handler_init_helpers.go +++ b/ai-compliance-sdk/internal/api/handlers/iace_handler_init_helpers.go @@ -217,6 +217,13 @@ var genericSafetyTerms = map[string]bool{ "leitfaehig": true, "elektrisch": true, "mechanisch": true, "bedienfeld": true, "display": true, "anzeige": true, "energie": true, "druck": true, "temperatur": true, + // Common structural terms that don't indicate a specific machine + "gesamter": true, "gesamtes": true, "bereichs": true, "stelle": true, + "innen": true, "aussen": true, "transport": true, "seite": true, + "front": true, "rueck": true, "ober": true, "unter": true, + "fuehrung": true, "lager": true, "verschleiss": true, "welle": true, + "getriebe": true, "kette": true, "riemen": true, "feder": true, + "spindel": true, "werkzeug": true, "werkstueck": true, "flucht": true, } // isPatternRelevant checks whether a pattern match is relevant to the actual @@ -224,7 +231,7 @@ var genericSafetyTerms = map[string]bool{ // if the pattern's zone/scenario contains machine-specific words (not generic // safety terms) and NONE of them appear in the narrative → irrelevant. func isPatternRelevant(mp iace.PatternMatch, narrative string, compNames []string) bool { - patternText := iace.NormalizeDEPublic(mp.ZoneDE + " " + mp.ScenarioDE) + patternText := iace.NormalizeDEPublic(mp.ZoneDE + " " + mp.ScenarioDE + " " + mp.PatternName) narrativeNorm := iace.NormalizeDEPublic(narrative) // Extract machine-specific words from pattern (not generic safety terms) diff --git a/ai-compliance-sdk/internal/iace/benchmark_matcher.go b/ai-compliance-sdk/internal/iace/benchmark_matcher.go index 663ea69..358bb8e 100644 --- a/ai-compliance-sdk/internal/iace/benchmark_matcher.go +++ b/ai-compliance-sdk/internal/iace/benchmark_matcher.go @@ -191,30 +191,61 @@ func fuzzyMatchScore(gt *GroundTruthEntry, h *Hazard) (float64, string) { var score float64 var reasons []string - // 1. Category match (weight 0.4) + // 1. Category match (weight 0.3) catScore := categoryMatchScore(gt.HazardGroup, h.Category) - score += 0.4 * catScore + score += 0.3 * catScore if catScore > 0 { reasons = append(reasons, "Kategorie") } - // 2. Keyword/synonym match (weight 0.3) + // 2. Keyword/synonym match on hazard TYPE (weight 0.3) kwScore := keywordMatchScore(gt.HazardType, gt.HazardCause, h.Name, h.Description, h.Scenario) score += 0.3 * kwScore if kwScore > 0 { reasons = append(reasons, "Keywords") } - // 3. Component/zone match (weight 0.3) + // 3. Component/zone match (weight 0.4 — most important for specificity) zoneScore := zoneMatchScore(gt.ComponentZone, gt.HazardSubgroup, h.HazardousZone, h.MachineModule) - score += 0.3 * zoneScore + score += 0.4 * zoneScore if zoneScore > 0 { reasons = append(reasons, "Zone") } + // Penalty: if engine hazard mentions a machine-specific term not in the GT context, + // it's likely a wrong-machine match (e.g. "Spielplatz" for a robot cell GT entry) + if hasWrongMachineTerm(h.Name, h.Scenario, gt.HazardCause, gt.ComponentZone) { + score *= 0.3 // Heavy penalty + reasons = append(reasons, "Strafabzug:FremdMaschine") + } + return score, strings.Join(reasons, "+") } +// wrongMachineTerms are words in an engine hazard that indicate it's about +// a completely different machine type. If the GT entry doesn't mention these, +// the match is penalized. +var wrongMachineTerms = []string{ + "spielplatz", "fahrtreppe", "trommelwaschmaschine", "umreifungsband", + "drehteller", "rundtaktanlage", "exzentrisch", "webstuhl", + "aufzug", "rolltreppe", "bagger", "kettensaege", "kreissaege", + "druckmaschine", "zentrifuge", "autoklav", "hobel", + "naehmaschine", "strickmaschine", "schleifmaschine", + "gabelstapler", "flurfoerder", "erntemaschine", + "kollision zweier roboter", +} + +func hasWrongMachineTerm(engName, engScenario, gtCause, gtZone string) bool { + engText := normalizeDE(engName + " " + engScenario) + gtText := normalizeDE(gtCause + " " + gtZone) + for _, term := range wrongMachineTerms { + if strings.Contains(engText, term) && !strings.Contains(gtText, term) { + return true + } + } + return false +} + func categoryMatchScore(gtGroup, engCategory string) float64 { normalized := normalizeDE(gtGroup) prefixes, ok := categoryMap[normalized]