feat(iace): narrative relevance filter + zone normalization for precision
Build + Deploy / build-backend-compliance (push) Successful in 3m14s
Build + Deploy / build-ai-sdk (push) Successful in 1m18s
Build + Deploy / build-developer-portal (push) Successful in 1m8s
CI / loc-budget (push) Failing after 19s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
Build + Deploy / build-admin-compliance (push) Successful in 1m56s
Build + Deploy / build-tts (push) Successful in 1m35s
Build + Deploy / build-document-crawler (push) Successful in 47s
Build + Deploy / build-dsms-gateway (push) Successful in 35s
Build + Deploy / build-dsms-node (push) Successful in 19s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / test-go (push) Successful in 44s
CI / test-python-backend (push) Successful in 38s
CI / test-python-document-crawler (push) Successful in 26s
CI / nodejs-build (push) Successful in 2m28s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-python-dsms-gateway (push) Successful in 21s
CI / validate-canonical-controls (push) Successful in 13s
Build + Deploy / trigger-orca (push) Successful in 2m54s

- isPatternRelevant() filters patterns whose zone/scenario mentions
  machine-specific terms (extruder, stanzpresse, spielplatz, etc.)
  absent from the actual machine narrative
- normalizeZoneKey() clusters similar zones for smarter dedup
  (e.g. "Schaltschrank, Sammelschiene" = "Schaltschrank-Innenraum")
- machineSpecificTerms list with 40+ terms for generic filtering

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-05-13 09:51:00 +02:00
parent c702260ec1
commit 1bd892afbf
2 changed files with 104 additions and 4 deletions
@@ -151,12 +151,24 @@ func (h *IACEHandler) InitializeProject(c *gin.Context) {
}
}
// Build component name set for relevance filtering
compNames := make([]string, 0, len(comps))
for name := range compByName {
compNames = append(compNames, name)
}
created := 0
seenCatZone := make(map[string]bool)
for _, mp := range matchOutput.MatchedPatterns {
// Narrative relevance filter: skip patterns whose zone/scenario
// mentions machine-specific terms that don't appear in our components
if !isPatternRelevant(mp, narrativeText, compNames) {
continue
}
for _, cat := range mp.HazardCats {
// Dedup by category + zone (allows multiple hazards per category at different zones)
zoneKey := mp.ZoneDE
// Dedup by category + normalized zone
zoneKey := normalizeZoneKey(mp.ZoneDE)
if zoneKey == "" {
zoneKey = mp.PatternID
}
@@ -170,12 +182,10 @@ func (h *IACEHandler) InitializeProject(c *gin.Context) {
if name == "" {
name = cat
}
// Append zone to name for specificity
if mp.ZoneDE != "" && !containsSubstring(name, mp.ZoneDE) {
name = name + " (" + mp.ZoneDE + ")"
}
// Find matching component by zone name
compID := defaultCompID
if mp.ZoneDE != "" {
zoneNorm := iace.NormalizeDEPublic(mp.ZoneDE)
@@ -199,6 +199,96 @@ func containsSubstring(haystack, needle string) bool {
)
}
// machineSpecificTerms are words in a pattern's zone/scenario that indicate
// the pattern is specific to a particular machine type. If a pattern contains
// such a term but the machine narrative does NOT, the pattern is irrelevant.
var machineSpecificTerms = []string{
"extruder", "spinnmaschine", "spielplatz", "aufzug", "elevator",
"kran", "crane", "bagger", "excavator", "traktor", "tractor",
"harvester", "druckmaschine", "printing", "webstuhl", "weaving",
"ofen", "furnace", "kessel", "boiler", "walzwerk", "rolling",
"zentrifuge", "centrifuge", "autoklav", "autoclave", "saege",
"kreissaege", "circular_saw", "hobel", "fraese", "drehmaschine",
"lathe", "schleifmaschine", "grinder", "stanze", "stanzpresse",
"infusion", "beatmung", "ventilator", "patient",
"lebensmittel", "food", "pharma", "verpackung", "packaging",
"seilnetz", "kletterseil", "schaukel", "rutsche",
"gabelstapler", "forklift", "flurfoerder",
}
// isPatternRelevant checks whether a pattern match is relevant to the actual
// machine described in the narrative. A pattern is considered irrelevant if its
// zone or scenario contains machine-specific terms that don't appear in the
// narrative or component list.
func isPatternRelevant(mp iace.PatternMatch, narrative string, compNames []string) bool {
patternText := iace.NormalizeDEPublic(mp.ZoneDE + " " + mp.ScenarioDE + " " + mp.PatternName)
narrativeNorm := iace.NormalizeDEPublic(narrative)
// Check if pattern mentions machine-specific terms absent from narrative
for _, term := range machineSpecificTerms {
if !strings.Contains(patternText, term) {
continue
}
// Pattern mentions this machine-specific term — check if machine has it
if strings.Contains(narrativeNorm, term) {
continue // Machine has this term, pattern is relevant
}
// Also check component names
found := false
for _, cn := range compNames {
if strings.Contains(cn, term) {
found = true
break
}
}
if !found {
return false // Pattern mentions a machine type we don't have
}
}
return true
}
// normalizeZoneKey reduces a zone string to its core components for better dedup.
// E.g. "Schaltschrank, Sammelschiene" and "Schaltschrank-Innenraum, Sammelschienen"
// should dedup to the same key.
func normalizeZoneKey(zone string) string {
if zone == "" {
return ""
}
norm := iace.NormalizeDEPublic(zone)
// Remove filler words and punctuation
for _, r := range []string{",", "/", "(", ")", "-", ".", ":", ";"} {
norm = strings.ReplaceAll(norm, r, " ")
}
// Extract significant words (>3 chars), sort for stable key
words := strings.Fields(norm)
var sig []string
seen := make(map[string]bool)
stopWords := map[string]bool{
"der": true, "die": true, "das": true, "und": true, "oder": true,
"von": true, "des": true, "den": true, "dem": true, "ein": true,
"eine": true, "fuer": true, "bei": true, "mit": true, "nach": true,
"alle": true, "aller": true, "allem": true, "sowie": true,
"insbesondere": true, "bereich": true, "gesamte": true, "gesamter": true,
"innerhalb": true, "ausserhalb": true, "umgebung": true,
}
for _, w := range words {
if len(w) < 4 || stopWords[w] || seen[w] {
continue
}
seen[w] = true
sig = append(sig, w)
}
if len(sig) == 0 {
return norm
}
// Take first 3 significant words as key (enough for dedup)
if len(sig) > 3 {
sig = sig[:3]
}
return strings.Join(sig, "_")
}
// findHazardForMeasureByCategory finds a matching hazard for a measure.
func findHazardForMeasureByCategory(measureCat string, hazardsByCategory map[string]uuid.UUID) uuid.UUID {
if id, ok := hazardsByCategory[measureCat]; ok {