diff --git a/ai-compliance-sdk/internal/api/handlers/iace_handler_init_helpers.go b/ai-compliance-sdk/internal/api/handlers/iace_handler_init_helpers.go index 530b982..0f35569 100644 --- a/ai-compliance-sdk/internal/api/handlers/iace_handler_init_helpers.go +++ b/ai-compliance-sdk/internal/api/handlers/iace_handler_init_helpers.go @@ -199,53 +199,65 @@ func containsSubstring(haystack, needle string) bool { ) } -// machineSpecificTerms are words in a pattern's zone/scenario that indicate -// the pattern is specific to a particular machine type. If a pattern contains -// such a term but the machine narrative does NOT, the pattern is irrelevant. -var machineSpecificTerms = []string{ - "extruder", "spinnmaschine", "spielplatz", "aufzug", "elevator", - "kran", "crane", "bagger", "excavator", "traktor", "tractor", - "harvester", "druckmaschine", "printing", "webstuhl", "weaving", - "ofen", "furnace", "kessel", "boiler", "walzwerk", "rolling", - "zentrifuge", "centrifuge", "autoklav", "autoclave", "saege", - "kreissaege", "circular_saw", "hobel", "fraese", "drehmaschine", - "lathe", "schleifmaschine", "grinder", "stanze", "stanzpresse", - "infusion", "beatmung", "ventilator", "patient", - "lebensmittel", "food", "pharma", "verpackung", "packaging", - "seilnetz", "kletterseil", "schaukel", "rutsche", - "gabelstapler", "forklift", "flurfoerder", +// genericSafetyTerms are words that appear in almost all risk assessments +// and should NOT be used to determine machine-specificity. +var genericSafetyTerms = map[string]bool{ + "maschine": true, "anlage": true, "bereich": true, "gesamte": true, + "arbeitsplatz": true, "gefahrbereich": true, "gefahrstelle": true, + "gefahrenstelle": true, "person": true, "werker": true, "bediener": true, + "steuerung": true, "schutzeinrichtung": true, "sicherheit": true, + "betrieb": true, "wartung": true, "instandhaltung": true, "reinigung": true, + "bewegung": true, "beweglich": true, "feststehend": true, "teil": true, + "teile": true, "oeffnung": true, "zugang": true, "gefahr": true, + "verletzung": true, "quetsch": true, "scher": true, "schneid": true, + "stoss": true, "schlag": true, "einzug": true, "brand": true, + "motor": true, "antrieb": true, "achse": true, "achsen": true, + "kabel": true, "leitung": true, "schaltschrank": true, "spannung": true, + "schutz": true, "gehaeuse": true, "oberflaeche": true, "boden": true, + "leitfaehig": true, "elektrisch": true, "mechanisch": true, + "bedienfeld": true, "display": true, "anzeige": true, + "energie": true, "druck": true, "temperatur": true, } // isPatternRelevant checks whether a pattern match is relevant to the actual -// machine described in the narrative. A pattern is considered irrelevant if its -// zone or scenario contains machine-specific terms that don't appear in the -// narrative or component list. +// machine described in the narrative. Uses narrative vocabulary overlap: +// if the pattern's zone/scenario contains machine-specific words (not generic +// safety terms) and NONE of them appear in the narrative → irrelevant. func isPatternRelevant(mp iace.PatternMatch, narrative string, compNames []string) bool { - patternText := iace.NormalizeDEPublic(mp.ZoneDE + " " + mp.ScenarioDE + " " + mp.PatternName) + patternText := iace.NormalizeDEPublic(mp.ZoneDE + " " + mp.ScenarioDE) narrativeNorm := iace.NormalizeDEPublic(narrative) - // Check if pattern mentions machine-specific terms absent from narrative - for _, term := range machineSpecificTerms { - if !strings.Contains(patternText, term) { + // Extract machine-specific words from pattern (not generic safety terms) + patternWords := strings.Fields(patternText) + var specificWords []string + for _, w := range patternWords { + // Clean punctuation + w = strings.Trim(w, ".,;:!?()/-") + if len(w) < 5 || genericSafetyTerms[w] { continue } - // Pattern mentions this machine-specific term — check if machine has it - if strings.Contains(narrativeNorm, term) { - continue // Machine has this term, pattern is relevant + specificWords = append(specificWords, w) + } + + // If pattern has no specific words, it's generic → always relevant + if len(specificWords) == 0 { + return true + } + + // Check if at least one specific word appears in the narrative or components + for _, sw := range specificWords { + if strings.Contains(narrativeNorm, sw) { + return true } - // Also check component names - found := false for _, cn := range compNames { - if strings.Contains(cn, term) { - found = true - break + if strings.Contains(cn, sw) { + return true } } - if !found { - return false // Pattern mentions a machine type we don't have - } } - return true + + // No specific word found in narrative → pattern is for a different machine + return false } // normalizeZoneKey reduces a zone string to its core components for better dedup.