fix(iace): stronger relevance filter + matcher wrong-machine penalty
Build + Deploy / build-admin-compliance (push) Successful in 10s
Build + Deploy / build-backend-compliance (push) Successful in 11s
Build + Deploy / build-ai-sdk (push) Successful in 40s
Build + Deploy / build-developer-portal (push) Successful in 10s
Build + Deploy / build-document-crawler (push) Successful in 11s
Build + Deploy / build-dsms-gateway (push) Successful in 12s
Build + Deploy / build-dsms-node (push) Successful in 11s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 16s
CI / secret-scan (push) Has been skipped
Build + Deploy / build-tts (push) Successful in 11s
CI / nodejs-build (push) Successful in 2m44s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Failing after 43s
CI / test-python-dsms-gateway (push) Successful in 22s
CI / validate-canonical-controls (push) Successful in 19s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-python-backend (push) Successful in 40s
CI / test-python-document-crawler (push) Successful in 25s
Build + Deploy / trigger-orca (push) Successful in 2m48s

Relevance filter: now checks PatternName in addition to ZoneDE+ScenarioDE,
catches "Spielplatz", "Umreifungsband", "Fahrtreppe" etc. in pattern names.
Added more generic safety terms to whitelist (welle, getriebe, kette, etc.)

Matcher: rebalanced weights (category 0.3, keywords 0.3, zone 0.4) to
prioritize zone/component specificity. Added wrong-machine penalty (0.3x)
when engine hazard mentions machine-specific terms absent from GT context
(e.g. "Kollision zweier Roboter" for a single-robot GT entry).

Fixes 18 problematic matches: 8 wrong-machine, 9 zone-mismatch, 1 category.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-05-13 15:49:50 +02:00
parent 6940271672
commit cca714755a
2 changed files with 44 additions and 6 deletions
@@ -217,6 +217,13 @@ var genericSafetyTerms = map[string]bool{
"leitfaehig": true, "elektrisch": true, "mechanisch": true, "leitfaehig": true, "elektrisch": true, "mechanisch": true,
"bedienfeld": true, "display": true, "anzeige": true, "bedienfeld": true, "display": true, "anzeige": true,
"energie": true, "druck": true, "temperatur": true, "energie": true, "druck": true, "temperatur": true,
// Common structural terms that don't indicate a specific machine
"gesamter": true, "gesamtes": true, "bereichs": true, "stelle": true,
"innen": true, "aussen": true, "transport": true, "seite": true,
"front": true, "rueck": true, "ober": true, "unter": true,
"fuehrung": true, "lager": true, "verschleiss": true, "welle": true,
"getriebe": true, "kette": true, "riemen": true, "feder": true,
"spindel": true, "werkzeug": true, "werkstueck": true, "flucht": true,
} }
// isPatternRelevant checks whether a pattern match is relevant to the actual // isPatternRelevant checks whether a pattern match is relevant to the actual
@@ -224,7 +231,7 @@ var genericSafetyTerms = map[string]bool{
// if the pattern's zone/scenario contains machine-specific words (not generic // if the pattern's zone/scenario contains machine-specific words (not generic
// safety terms) and NONE of them appear in the narrative → irrelevant. // safety terms) and NONE of them appear in the narrative → irrelevant.
func isPatternRelevant(mp iace.PatternMatch, narrative string, compNames []string) bool { func isPatternRelevant(mp iace.PatternMatch, narrative string, compNames []string) bool {
patternText := iace.NormalizeDEPublic(mp.ZoneDE + " " + mp.ScenarioDE) patternText := iace.NormalizeDEPublic(mp.ZoneDE + " " + mp.ScenarioDE + " " + mp.PatternName)
narrativeNorm := iace.NormalizeDEPublic(narrative) narrativeNorm := iace.NormalizeDEPublic(narrative)
// Extract machine-specific words from pattern (not generic safety terms) // Extract machine-specific words from pattern (not generic safety terms)
@@ -191,30 +191,61 @@ func fuzzyMatchScore(gt *GroundTruthEntry, h *Hazard) (float64, string) {
var score float64 var score float64
var reasons []string var reasons []string
// 1. Category match (weight 0.4) // 1. Category match (weight 0.3)
catScore := categoryMatchScore(gt.HazardGroup, h.Category) catScore := categoryMatchScore(gt.HazardGroup, h.Category)
score += 0.4 * catScore score += 0.3 * catScore
if catScore > 0 { if catScore > 0 {
reasons = append(reasons, "Kategorie") reasons = append(reasons, "Kategorie")
} }
// 2. Keyword/synonym match (weight 0.3) // 2. Keyword/synonym match on hazard TYPE (weight 0.3)
kwScore := keywordMatchScore(gt.HazardType, gt.HazardCause, h.Name, h.Description, h.Scenario) kwScore := keywordMatchScore(gt.HazardType, gt.HazardCause, h.Name, h.Description, h.Scenario)
score += 0.3 * kwScore score += 0.3 * kwScore
if kwScore > 0 { if kwScore > 0 {
reasons = append(reasons, "Keywords") reasons = append(reasons, "Keywords")
} }
// 3. Component/zone match (weight 0.3) // 3. Component/zone match (weight 0.4 — most important for specificity)
zoneScore := zoneMatchScore(gt.ComponentZone, gt.HazardSubgroup, h.HazardousZone, h.MachineModule) zoneScore := zoneMatchScore(gt.ComponentZone, gt.HazardSubgroup, h.HazardousZone, h.MachineModule)
score += 0.3 * zoneScore score += 0.4 * zoneScore
if zoneScore > 0 { if zoneScore > 0 {
reasons = append(reasons, "Zone") reasons = append(reasons, "Zone")
} }
// Penalty: if engine hazard mentions a machine-specific term not in the GT context,
// it's likely a wrong-machine match (e.g. "Spielplatz" for a robot cell GT entry)
if hasWrongMachineTerm(h.Name, h.Scenario, gt.HazardCause, gt.ComponentZone) {
score *= 0.3 // Heavy penalty
reasons = append(reasons, "Strafabzug:FremdMaschine")
}
return score, strings.Join(reasons, "+") return score, strings.Join(reasons, "+")
} }
// wrongMachineTerms are words in an engine hazard that indicate it's about
// a completely different machine type. If the GT entry doesn't mention these,
// the match is penalized.
var wrongMachineTerms = []string{
"spielplatz", "fahrtreppe", "trommelwaschmaschine", "umreifungsband",
"drehteller", "rundtaktanlage", "exzentrisch", "webstuhl",
"aufzug", "rolltreppe", "bagger", "kettensaege", "kreissaege",
"druckmaschine", "zentrifuge", "autoklav", "hobel",
"naehmaschine", "strickmaschine", "schleifmaschine",
"gabelstapler", "flurfoerder", "erntemaschine",
"kollision zweier roboter",
}
func hasWrongMachineTerm(engName, engScenario, gtCause, gtZone string) bool {
engText := normalizeDE(engName + " " + engScenario)
gtText := normalizeDE(gtCause + " " + gtZone)
for _, term := range wrongMachineTerms {
if strings.Contains(engText, term) && !strings.Contains(gtText, term) {
return true
}
}
return false
}
func categoryMatchScore(gtGroup, engCategory string) float64 { func categoryMatchScore(gtGroup, engCategory string) float64 {
normalized := normalizeDE(gtGroup) normalized := normalizeDE(gtGroup)
prefixes, ok := categoryMap[normalized] prefixes, ok := categoryMap[normalized]