feat(iace): narrative relevance filter + zone normalization for precision
Build + Deploy / build-admin-compliance (push) Successful in 1m56s
Build + Deploy / build-backend-compliance (push) Successful in 3m14s
Build + Deploy / build-ai-sdk (push) Successful in 1m18s
Build + Deploy / build-developer-portal (push) Successful in 1m8s
Build + Deploy / build-tts (push) Successful in 1m35s
Build + Deploy / build-document-crawler (push) Successful in 47s
Build + Deploy / build-dsms-gateway (push) Successful in 35s
Build + Deploy / build-dsms-node (push) Successful in 19s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 19s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m28s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Successful in 44s
CI / test-python-backend (push) Successful in 38s
CI / test-python-document-crawler (push) Successful in 26s
CI / test-python-dsms-gateway (push) Successful in 21s
CI / validate-canonical-controls (push) Successful in 13s
Build + Deploy / trigger-orca (push) Successful in 2m54s
Build + Deploy / build-admin-compliance (push) Successful in 1m56s
Build + Deploy / build-backend-compliance (push) Successful in 3m14s
Build + Deploy / build-ai-sdk (push) Successful in 1m18s
Build + Deploy / build-developer-portal (push) Successful in 1m8s
Build + Deploy / build-tts (push) Successful in 1m35s
Build + Deploy / build-document-crawler (push) Successful in 47s
Build + Deploy / build-dsms-gateway (push) Successful in 35s
Build + Deploy / build-dsms-node (push) Successful in 19s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 19s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m28s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Successful in 44s
CI / test-python-backend (push) Successful in 38s
CI / test-python-document-crawler (push) Successful in 26s
CI / test-python-dsms-gateway (push) Successful in 21s
CI / validate-canonical-controls (push) Successful in 13s
Build + Deploy / trigger-orca (push) Successful in 2m54s
- isPatternRelevant() filters patterns whose zone/scenario mentions machine-specific terms (extruder, stanzpresse, spielplatz, etc.) absent from the actual machine narrative - normalizeZoneKey() clusters similar zones for smarter dedup (e.g. "Schaltschrank, Sammelschiene" = "Schaltschrank-Innenraum") - machineSpecificTerms list with 40+ terms for generic filtering Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -151,12 +151,24 @@ func (h *IACEHandler) InitializeProject(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Build component name set for relevance filtering
|
||||||
|
compNames := make([]string, 0, len(comps))
|
||||||
|
for name := range compByName {
|
||||||
|
compNames = append(compNames, name)
|
||||||
|
}
|
||||||
|
|
||||||
created := 0
|
created := 0
|
||||||
seenCatZone := make(map[string]bool)
|
seenCatZone := make(map[string]bool)
|
||||||
for _, mp := range matchOutput.MatchedPatterns {
|
for _, mp := range matchOutput.MatchedPatterns {
|
||||||
|
// Narrative relevance filter: skip patterns whose zone/scenario
|
||||||
|
// mentions machine-specific terms that don't appear in our components
|
||||||
|
if !isPatternRelevant(mp, narrativeText, compNames) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
for _, cat := range mp.HazardCats {
|
for _, cat := range mp.HazardCats {
|
||||||
// Dedup by category + zone (allows multiple hazards per category at different zones)
|
// Dedup by category + normalized zone
|
||||||
zoneKey := mp.ZoneDE
|
zoneKey := normalizeZoneKey(mp.ZoneDE)
|
||||||
if zoneKey == "" {
|
if zoneKey == "" {
|
||||||
zoneKey = mp.PatternID
|
zoneKey = mp.PatternID
|
||||||
}
|
}
|
||||||
@@ -170,12 +182,10 @@ func (h *IACEHandler) InitializeProject(c *gin.Context) {
|
|||||||
if name == "" {
|
if name == "" {
|
||||||
name = cat
|
name = cat
|
||||||
}
|
}
|
||||||
// Append zone to name for specificity
|
|
||||||
if mp.ZoneDE != "" && !containsSubstring(name, mp.ZoneDE) {
|
if mp.ZoneDE != "" && !containsSubstring(name, mp.ZoneDE) {
|
||||||
name = name + " (" + mp.ZoneDE + ")"
|
name = name + " (" + mp.ZoneDE + ")"
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find matching component by zone name
|
|
||||||
compID := defaultCompID
|
compID := defaultCompID
|
||||||
if mp.ZoneDE != "" {
|
if mp.ZoneDE != "" {
|
||||||
zoneNorm := iace.NormalizeDEPublic(mp.ZoneDE)
|
zoneNorm := iace.NormalizeDEPublic(mp.ZoneDE)
|
||||||
|
|||||||
@@ -199,6 +199,96 @@ func containsSubstring(haystack, needle string) bool {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// machineSpecificTerms are words in a pattern's zone/scenario that indicate
|
||||||
|
// the pattern is specific to a particular machine type. If a pattern contains
|
||||||
|
// such a term but the machine narrative does NOT, the pattern is irrelevant.
|
||||||
|
var machineSpecificTerms = []string{
|
||||||
|
"extruder", "spinnmaschine", "spielplatz", "aufzug", "elevator",
|
||||||
|
"kran", "crane", "bagger", "excavator", "traktor", "tractor",
|
||||||
|
"harvester", "druckmaschine", "printing", "webstuhl", "weaving",
|
||||||
|
"ofen", "furnace", "kessel", "boiler", "walzwerk", "rolling",
|
||||||
|
"zentrifuge", "centrifuge", "autoklav", "autoclave", "saege",
|
||||||
|
"kreissaege", "circular_saw", "hobel", "fraese", "drehmaschine",
|
||||||
|
"lathe", "schleifmaschine", "grinder", "stanze", "stanzpresse",
|
||||||
|
"infusion", "beatmung", "ventilator", "patient",
|
||||||
|
"lebensmittel", "food", "pharma", "verpackung", "packaging",
|
||||||
|
"seilnetz", "kletterseil", "schaukel", "rutsche",
|
||||||
|
"gabelstapler", "forklift", "flurfoerder",
|
||||||
|
}
|
||||||
|
|
||||||
|
// isPatternRelevant checks whether a pattern match is relevant to the actual
|
||||||
|
// machine described in the narrative. A pattern is considered irrelevant if its
|
||||||
|
// zone or scenario contains machine-specific terms that don't appear in the
|
||||||
|
// narrative or component list.
|
||||||
|
func isPatternRelevant(mp iace.PatternMatch, narrative string, compNames []string) bool {
|
||||||
|
patternText := iace.NormalizeDEPublic(mp.ZoneDE + " " + mp.ScenarioDE + " " + mp.PatternName)
|
||||||
|
narrativeNorm := iace.NormalizeDEPublic(narrative)
|
||||||
|
|
||||||
|
// Check if pattern mentions machine-specific terms absent from narrative
|
||||||
|
for _, term := range machineSpecificTerms {
|
||||||
|
if !strings.Contains(patternText, term) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Pattern mentions this machine-specific term — check if machine has it
|
||||||
|
if strings.Contains(narrativeNorm, term) {
|
||||||
|
continue // Machine has this term, pattern is relevant
|
||||||
|
}
|
||||||
|
// Also check component names
|
||||||
|
found := false
|
||||||
|
for _, cn := range compNames {
|
||||||
|
if strings.Contains(cn, term) {
|
||||||
|
found = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !found {
|
||||||
|
return false // Pattern mentions a machine type we don't have
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// normalizeZoneKey reduces a zone string to its core components for better dedup.
|
||||||
|
// E.g. "Schaltschrank, Sammelschiene" and "Schaltschrank-Innenraum, Sammelschienen"
|
||||||
|
// should dedup to the same key.
|
||||||
|
func normalizeZoneKey(zone string) string {
|
||||||
|
if zone == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
norm := iace.NormalizeDEPublic(zone)
|
||||||
|
// Remove filler words and punctuation
|
||||||
|
for _, r := range []string{",", "/", "(", ")", "-", ".", ":", ";"} {
|
||||||
|
norm = strings.ReplaceAll(norm, r, " ")
|
||||||
|
}
|
||||||
|
// Extract significant words (>3 chars), sort for stable key
|
||||||
|
words := strings.Fields(norm)
|
||||||
|
var sig []string
|
||||||
|
seen := make(map[string]bool)
|
||||||
|
stopWords := map[string]bool{
|
||||||
|
"der": true, "die": true, "das": true, "und": true, "oder": true,
|
||||||
|
"von": true, "des": true, "den": true, "dem": true, "ein": true,
|
||||||
|
"eine": true, "fuer": true, "bei": true, "mit": true, "nach": true,
|
||||||
|
"alle": true, "aller": true, "allem": true, "sowie": true,
|
||||||
|
"insbesondere": true, "bereich": true, "gesamte": true, "gesamter": true,
|
||||||
|
"innerhalb": true, "ausserhalb": true, "umgebung": true,
|
||||||
|
}
|
||||||
|
for _, w := range words {
|
||||||
|
if len(w) < 4 || stopWords[w] || seen[w] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[w] = true
|
||||||
|
sig = append(sig, w)
|
||||||
|
}
|
||||||
|
if len(sig) == 0 {
|
||||||
|
return norm
|
||||||
|
}
|
||||||
|
// Take first 3 significant words as key (enough for dedup)
|
||||||
|
if len(sig) > 3 {
|
||||||
|
sig = sig[:3]
|
||||||
|
}
|
||||||
|
return strings.Join(sig, "_")
|
||||||
|
}
|
||||||
|
|
||||||
// findHazardForMeasureByCategory finds a matching hazard for a measure.
|
// findHazardForMeasureByCategory finds a matching hazard for a measure.
|
||||||
func findHazardForMeasureByCategory(measureCat string, hazardsByCategory map[string]uuid.UUID) uuid.UUID {
|
func findHazardForMeasureByCategory(measureCat string, hazardsByCategory map[string]uuid.UUID) uuid.UUID {
|
||||||
if id, ok := hazardsByCategory[measureCat]; ok {
|
if id, ok := hazardsByCategory[measureCat]; ok {
|
||||||
|
|||||||
Reference in New Issue
Block a user