feat(iace): narrative relevance filter + zone normalization for precision
Build + Deploy / build-backend-compliance (push) Successful in 3m14s
Build + Deploy / build-ai-sdk (push) Successful in 1m18s
Build + Deploy / build-developer-portal (push) Successful in 1m8s
CI / loc-budget (push) Failing after 19s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
Build + Deploy / build-admin-compliance (push) Successful in 1m56s
Build + Deploy / build-tts (push) Successful in 1m35s
Build + Deploy / build-document-crawler (push) Successful in 47s
Build + Deploy / build-dsms-gateway (push) Successful in 35s
Build + Deploy / build-dsms-node (push) Successful in 19s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / test-go (push) Successful in 44s
CI / test-python-backend (push) Successful in 38s
CI / test-python-document-crawler (push) Successful in 26s
CI / nodejs-build (push) Successful in 2m28s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-python-dsms-gateway (push) Successful in 21s
CI / validate-canonical-controls (push) Successful in 13s
Build + Deploy / trigger-orca (push) Successful in 2m54s
Build + Deploy / build-backend-compliance (push) Successful in 3m14s
Build + Deploy / build-ai-sdk (push) Successful in 1m18s
Build + Deploy / build-developer-portal (push) Successful in 1m8s
CI / loc-budget (push) Failing after 19s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
Build + Deploy / build-admin-compliance (push) Successful in 1m56s
Build + Deploy / build-tts (push) Successful in 1m35s
Build + Deploy / build-document-crawler (push) Successful in 47s
Build + Deploy / build-dsms-gateway (push) Successful in 35s
Build + Deploy / build-dsms-node (push) Successful in 19s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / test-go (push) Successful in 44s
CI / test-python-backend (push) Successful in 38s
CI / test-python-document-crawler (push) Successful in 26s
CI / nodejs-build (push) Successful in 2m28s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-python-dsms-gateway (push) Successful in 21s
CI / validate-canonical-controls (push) Successful in 13s
Build + Deploy / trigger-orca (push) Successful in 2m54s
- isPatternRelevant() filters patterns whose zone/scenario mentions machine-specific terms (extruder, stanzpresse, spielplatz, etc.) absent from the actual machine narrative - normalizeZoneKey() clusters similar zones for smarter dedup (e.g. "Schaltschrank, Sammelschiene" = "Schaltschrank-Innenraum") - machineSpecificTerms list with 40+ terms for generic filtering Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -151,12 +151,24 @@ func (h *IACEHandler) InitializeProject(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
// Build component name set for relevance filtering
|
||||
compNames := make([]string, 0, len(comps))
|
||||
for name := range compByName {
|
||||
compNames = append(compNames, name)
|
||||
}
|
||||
|
||||
created := 0
|
||||
seenCatZone := make(map[string]bool)
|
||||
for _, mp := range matchOutput.MatchedPatterns {
|
||||
// Narrative relevance filter: skip patterns whose zone/scenario
|
||||
// mentions machine-specific terms that don't appear in our components
|
||||
if !isPatternRelevant(mp, narrativeText, compNames) {
|
||||
continue
|
||||
}
|
||||
|
||||
for _, cat := range mp.HazardCats {
|
||||
// Dedup by category + zone (allows multiple hazards per category at different zones)
|
||||
zoneKey := mp.ZoneDE
|
||||
// Dedup by category + normalized zone
|
||||
zoneKey := normalizeZoneKey(mp.ZoneDE)
|
||||
if zoneKey == "" {
|
||||
zoneKey = mp.PatternID
|
||||
}
|
||||
@@ -170,12 +182,10 @@ func (h *IACEHandler) InitializeProject(c *gin.Context) {
|
||||
if name == "" {
|
||||
name = cat
|
||||
}
|
||||
// Append zone to name for specificity
|
||||
if mp.ZoneDE != "" && !containsSubstring(name, mp.ZoneDE) {
|
||||
name = name + " (" + mp.ZoneDE + ")"
|
||||
}
|
||||
|
||||
// Find matching component by zone name
|
||||
compID := defaultCompID
|
||||
if mp.ZoneDE != "" {
|
||||
zoneNorm := iace.NormalizeDEPublic(mp.ZoneDE)
|
||||
|
||||
@@ -199,6 +199,96 @@ func containsSubstring(haystack, needle string) bool {
|
||||
)
|
||||
}
|
||||
|
||||
// machineSpecificTerms are words in a pattern's zone/scenario that indicate
|
||||
// the pattern is specific to a particular machine type. If a pattern contains
|
||||
// such a term but the machine narrative does NOT, the pattern is irrelevant.
|
||||
var machineSpecificTerms = []string{
|
||||
"extruder", "spinnmaschine", "spielplatz", "aufzug", "elevator",
|
||||
"kran", "crane", "bagger", "excavator", "traktor", "tractor",
|
||||
"harvester", "druckmaschine", "printing", "webstuhl", "weaving",
|
||||
"ofen", "furnace", "kessel", "boiler", "walzwerk", "rolling",
|
||||
"zentrifuge", "centrifuge", "autoklav", "autoclave", "saege",
|
||||
"kreissaege", "circular_saw", "hobel", "fraese", "drehmaschine",
|
||||
"lathe", "schleifmaschine", "grinder", "stanze", "stanzpresse",
|
||||
"infusion", "beatmung", "ventilator", "patient",
|
||||
"lebensmittel", "food", "pharma", "verpackung", "packaging",
|
||||
"seilnetz", "kletterseil", "schaukel", "rutsche",
|
||||
"gabelstapler", "forklift", "flurfoerder",
|
||||
}
|
||||
|
||||
// isPatternRelevant checks whether a pattern match is relevant to the actual
|
||||
// machine described in the narrative. A pattern is considered irrelevant if its
|
||||
// zone or scenario contains machine-specific terms that don't appear in the
|
||||
// narrative or component list.
|
||||
func isPatternRelevant(mp iace.PatternMatch, narrative string, compNames []string) bool {
|
||||
patternText := iace.NormalizeDEPublic(mp.ZoneDE + " " + mp.ScenarioDE + " " + mp.PatternName)
|
||||
narrativeNorm := iace.NormalizeDEPublic(narrative)
|
||||
|
||||
// Check if pattern mentions machine-specific terms absent from narrative
|
||||
for _, term := range machineSpecificTerms {
|
||||
if !strings.Contains(patternText, term) {
|
||||
continue
|
||||
}
|
||||
// Pattern mentions this machine-specific term — check if machine has it
|
||||
if strings.Contains(narrativeNorm, term) {
|
||||
continue // Machine has this term, pattern is relevant
|
||||
}
|
||||
// Also check component names
|
||||
found := false
|
||||
for _, cn := range compNames {
|
||||
if strings.Contains(cn, term) {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
return false // Pattern mentions a machine type we don't have
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// normalizeZoneKey reduces a zone string to its core components for better dedup.
|
||||
// E.g. "Schaltschrank, Sammelschiene" and "Schaltschrank-Innenraum, Sammelschienen"
|
||||
// should dedup to the same key.
|
||||
func normalizeZoneKey(zone string) string {
|
||||
if zone == "" {
|
||||
return ""
|
||||
}
|
||||
norm := iace.NormalizeDEPublic(zone)
|
||||
// Remove filler words and punctuation
|
||||
for _, r := range []string{",", "/", "(", ")", "-", ".", ":", ";"} {
|
||||
norm = strings.ReplaceAll(norm, r, " ")
|
||||
}
|
||||
// Extract significant words (>3 chars), sort for stable key
|
||||
words := strings.Fields(norm)
|
||||
var sig []string
|
||||
seen := make(map[string]bool)
|
||||
stopWords := map[string]bool{
|
||||
"der": true, "die": true, "das": true, "und": true, "oder": true,
|
||||
"von": true, "des": true, "den": true, "dem": true, "ein": true,
|
||||
"eine": true, "fuer": true, "bei": true, "mit": true, "nach": true,
|
||||
"alle": true, "aller": true, "allem": true, "sowie": true,
|
||||
"insbesondere": true, "bereich": true, "gesamte": true, "gesamter": true,
|
||||
"innerhalb": true, "ausserhalb": true, "umgebung": true,
|
||||
}
|
||||
for _, w := range words {
|
||||
if len(w) < 4 || stopWords[w] || seen[w] {
|
||||
continue
|
||||
}
|
||||
seen[w] = true
|
||||
sig = append(sig, w)
|
||||
}
|
||||
if len(sig) == 0 {
|
||||
return norm
|
||||
}
|
||||
// Take first 3 significant words as key (enough for dedup)
|
||||
if len(sig) > 3 {
|
||||
sig = sig[:3]
|
||||
}
|
||||
return strings.Join(sig, "_")
|
||||
}
|
||||
|
||||
// findHazardForMeasureByCategory finds a matching hazard for a measure.
|
||||
func findHazardForMeasureByCategory(measureCat string, hazardsByCategory map[string]uuid.UUID) uuid.UUID {
|
||||
if id, ok := hazardsByCategory[measureCat]; ok {
|
||||
|
||||
Reference in New Issue
Block a user