diff --git a/ai-compliance-sdk/internal/api/handlers/iace_handler_init.go b/ai-compliance-sdk/internal/api/handlers/iace_handler_init.go index 4f22b49..e255034 100644 --- a/ai-compliance-sdk/internal/api/handlers/iace_handler_init.go +++ b/ai-compliance-sdk/internal/api/handlers/iace_handler_init.go @@ -151,12 +151,24 @@ func (h *IACEHandler) InitializeProject(c *gin.Context) { } } + // Build component name set for relevance filtering + compNames := make([]string, 0, len(comps)) + for name := range compByName { + compNames = append(compNames, name) + } + created := 0 seenCatZone := make(map[string]bool) for _, mp := range matchOutput.MatchedPatterns { + // Narrative relevance filter: skip patterns whose zone/scenario + // mentions machine-specific terms that don't appear in our components + if !isPatternRelevant(mp, narrativeText, compNames) { + continue + } + for _, cat := range mp.HazardCats { - // Dedup by category + zone (allows multiple hazards per category at different zones) - zoneKey := mp.ZoneDE + // Dedup by category + normalized zone + zoneKey := normalizeZoneKey(mp.ZoneDE) if zoneKey == "" { zoneKey = mp.PatternID } @@ -170,12 +182,10 @@ func (h *IACEHandler) InitializeProject(c *gin.Context) { if name == "" { name = cat } - // Append zone to name for specificity if mp.ZoneDE != "" && !containsSubstring(name, mp.ZoneDE) { name = name + " (" + mp.ZoneDE + ")" } - // Find matching component by zone name compID := defaultCompID if mp.ZoneDE != "" { zoneNorm := iace.NormalizeDEPublic(mp.ZoneDE) diff --git a/ai-compliance-sdk/internal/api/handlers/iace_handler_init_helpers.go b/ai-compliance-sdk/internal/api/handlers/iace_handler_init_helpers.go index c8bdcd1..530b982 100644 --- a/ai-compliance-sdk/internal/api/handlers/iace_handler_init_helpers.go +++ b/ai-compliance-sdk/internal/api/handlers/iace_handler_init_helpers.go @@ -199,6 +199,96 @@ func containsSubstring(haystack, needle string) bool { ) } +// machineSpecificTerms are words in a pattern's zone/scenario that indicate +// the pattern is specific to a particular machine type. If a pattern contains +// such a term but the machine narrative does NOT, the pattern is irrelevant. +var machineSpecificTerms = []string{ + "extruder", "spinnmaschine", "spielplatz", "aufzug", "elevator", + "kran", "crane", "bagger", "excavator", "traktor", "tractor", + "harvester", "druckmaschine", "printing", "webstuhl", "weaving", + "ofen", "furnace", "kessel", "boiler", "walzwerk", "rolling", + "zentrifuge", "centrifuge", "autoklav", "autoclave", "saege", + "kreissaege", "circular_saw", "hobel", "fraese", "drehmaschine", + "lathe", "schleifmaschine", "grinder", "stanze", "stanzpresse", + "infusion", "beatmung", "ventilator", "patient", + "lebensmittel", "food", "pharma", "verpackung", "packaging", + "seilnetz", "kletterseil", "schaukel", "rutsche", + "gabelstapler", "forklift", "flurfoerder", +} + +// isPatternRelevant checks whether a pattern match is relevant to the actual +// machine described in the narrative. A pattern is considered irrelevant if its +// zone or scenario contains machine-specific terms that don't appear in the +// narrative or component list. +func isPatternRelevant(mp iace.PatternMatch, narrative string, compNames []string) bool { + patternText := iace.NormalizeDEPublic(mp.ZoneDE + " " + mp.ScenarioDE + " " + mp.PatternName) + narrativeNorm := iace.NormalizeDEPublic(narrative) + + // Check if pattern mentions machine-specific terms absent from narrative + for _, term := range machineSpecificTerms { + if !strings.Contains(patternText, term) { + continue + } + // Pattern mentions this machine-specific term — check if machine has it + if strings.Contains(narrativeNorm, term) { + continue // Machine has this term, pattern is relevant + } + // Also check component names + found := false + for _, cn := range compNames { + if strings.Contains(cn, term) { + found = true + break + } + } + if !found { + return false // Pattern mentions a machine type we don't have + } + } + return true +} + +// normalizeZoneKey reduces a zone string to its core components for better dedup. +// E.g. "Schaltschrank, Sammelschiene" and "Schaltschrank-Innenraum, Sammelschienen" +// should dedup to the same key. +func normalizeZoneKey(zone string) string { + if zone == "" { + return "" + } + norm := iace.NormalizeDEPublic(zone) + // Remove filler words and punctuation + for _, r := range []string{",", "/", "(", ")", "-", ".", ":", ";"} { + norm = strings.ReplaceAll(norm, r, " ") + } + // Extract significant words (>3 chars), sort for stable key + words := strings.Fields(norm) + var sig []string + seen := make(map[string]bool) + stopWords := map[string]bool{ + "der": true, "die": true, "das": true, "und": true, "oder": true, + "von": true, "des": true, "den": true, "dem": true, "ein": true, + "eine": true, "fuer": true, "bei": true, "mit": true, "nach": true, + "alle": true, "aller": true, "allem": true, "sowie": true, + "insbesondere": true, "bereich": true, "gesamte": true, "gesamter": true, + "innerhalb": true, "ausserhalb": true, "umgebung": true, + } + for _, w := range words { + if len(w) < 4 || stopWords[w] || seen[w] { + continue + } + seen[w] = true + sig = append(sig, w) + } + if len(sig) == 0 { + return norm + } + // Take first 3 significant words as key (enough for dedup) + if len(sig) > 3 { + sig = sig[:3] + } + return strings.Join(sig, "_") +} + // findHazardForMeasureByCategory finds a matching hazard for a measure. func findHazardForMeasureByCategory(measureCat string, hazardsByCategory map[string]uuid.UUID) uuid.UUID { if id, ok := hazardsByCategory[measureCat]; ok {