fix(iace): stronger relevance filter + matcher wrong-machine penalty
Build + Deploy / build-admin-compliance (push) Successful in 10s
Build + Deploy / build-backend-compliance (push) Successful in 11s
Build + Deploy / build-ai-sdk (push) Successful in 40s
Build + Deploy / build-developer-portal (push) Successful in 10s
Build + Deploy / build-document-crawler (push) Successful in 11s
Build + Deploy / build-dsms-gateway (push) Successful in 12s
Build + Deploy / build-dsms-node (push) Successful in 11s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 16s
CI / secret-scan (push) Has been skipped
Build + Deploy / build-tts (push) Successful in 11s
CI / nodejs-build (push) Successful in 2m44s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Failing after 43s
CI / test-python-dsms-gateway (push) Successful in 22s
CI / validate-canonical-controls (push) Successful in 19s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-python-backend (push) Successful in 40s
CI / test-python-document-crawler (push) Successful in 25s
Build + Deploy / trigger-orca (push) Successful in 2m48s
Build + Deploy / build-admin-compliance (push) Successful in 10s
Build + Deploy / build-backend-compliance (push) Successful in 11s
Build + Deploy / build-ai-sdk (push) Successful in 40s
Build + Deploy / build-developer-portal (push) Successful in 10s
Build + Deploy / build-document-crawler (push) Successful in 11s
Build + Deploy / build-dsms-gateway (push) Successful in 12s
Build + Deploy / build-dsms-node (push) Successful in 11s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 16s
CI / secret-scan (push) Has been skipped
Build + Deploy / build-tts (push) Successful in 11s
CI / nodejs-build (push) Successful in 2m44s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Failing after 43s
CI / test-python-dsms-gateway (push) Successful in 22s
CI / validate-canonical-controls (push) Successful in 19s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-python-backend (push) Successful in 40s
CI / test-python-document-crawler (push) Successful in 25s
Build + Deploy / trigger-orca (push) Successful in 2m48s
Relevance filter: now checks PatternName in addition to ZoneDE+ScenarioDE, catches "Spielplatz", "Umreifungsband", "Fahrtreppe" etc. in pattern names. Added more generic safety terms to whitelist (welle, getriebe, kette, etc.) Matcher: rebalanced weights (category 0.3, keywords 0.3, zone 0.4) to prioritize zone/component specificity. Added wrong-machine penalty (0.3x) when engine hazard mentions machine-specific terms absent from GT context (e.g. "Kollision zweier Roboter" for a single-robot GT entry). Fixes 18 problematic matches: 8 wrong-machine, 9 zone-mismatch, 1 category. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -217,6 +217,13 @@ var genericSafetyTerms = map[string]bool{
|
||||
"leitfaehig": true, "elektrisch": true, "mechanisch": true,
|
||||
"bedienfeld": true, "display": true, "anzeige": true,
|
||||
"energie": true, "druck": true, "temperatur": true,
|
||||
// Common structural terms that don't indicate a specific machine
|
||||
"gesamter": true, "gesamtes": true, "bereichs": true, "stelle": true,
|
||||
"innen": true, "aussen": true, "transport": true, "seite": true,
|
||||
"front": true, "rueck": true, "ober": true, "unter": true,
|
||||
"fuehrung": true, "lager": true, "verschleiss": true, "welle": true,
|
||||
"getriebe": true, "kette": true, "riemen": true, "feder": true,
|
||||
"spindel": true, "werkzeug": true, "werkstueck": true, "flucht": true,
|
||||
}
|
||||
|
||||
// isPatternRelevant checks whether a pattern match is relevant to the actual
|
||||
@@ -224,7 +231,7 @@ var genericSafetyTerms = map[string]bool{
|
||||
// if the pattern's zone/scenario contains machine-specific words (not generic
|
||||
// safety terms) and NONE of them appear in the narrative → irrelevant.
|
||||
func isPatternRelevant(mp iace.PatternMatch, narrative string, compNames []string) bool {
|
||||
patternText := iace.NormalizeDEPublic(mp.ZoneDE + " " + mp.ScenarioDE)
|
||||
patternText := iace.NormalizeDEPublic(mp.ZoneDE + " " + mp.ScenarioDE + " " + mp.PatternName)
|
||||
narrativeNorm := iace.NormalizeDEPublic(narrative)
|
||||
|
||||
// Extract machine-specific words from pattern (not generic safety terms)
|
||||
|
||||
@@ -191,30 +191,61 @@ func fuzzyMatchScore(gt *GroundTruthEntry, h *Hazard) (float64, string) {
|
||||
var score float64
|
||||
var reasons []string
|
||||
|
||||
// 1. Category match (weight 0.4)
|
||||
// 1. Category match (weight 0.3)
|
||||
catScore := categoryMatchScore(gt.HazardGroup, h.Category)
|
||||
score += 0.4 * catScore
|
||||
score += 0.3 * catScore
|
||||
if catScore > 0 {
|
||||
reasons = append(reasons, "Kategorie")
|
||||
}
|
||||
|
||||
// 2. Keyword/synonym match (weight 0.3)
|
||||
// 2. Keyword/synonym match on hazard TYPE (weight 0.3)
|
||||
kwScore := keywordMatchScore(gt.HazardType, gt.HazardCause, h.Name, h.Description, h.Scenario)
|
||||
score += 0.3 * kwScore
|
||||
if kwScore > 0 {
|
||||
reasons = append(reasons, "Keywords")
|
||||
}
|
||||
|
||||
// 3. Component/zone match (weight 0.3)
|
||||
// 3. Component/zone match (weight 0.4 — most important for specificity)
|
||||
zoneScore := zoneMatchScore(gt.ComponentZone, gt.HazardSubgroup, h.HazardousZone, h.MachineModule)
|
||||
score += 0.3 * zoneScore
|
||||
score += 0.4 * zoneScore
|
||||
if zoneScore > 0 {
|
||||
reasons = append(reasons, "Zone")
|
||||
}
|
||||
|
||||
// Penalty: if engine hazard mentions a machine-specific term not in the GT context,
|
||||
// it's likely a wrong-machine match (e.g. "Spielplatz" for a robot cell GT entry)
|
||||
if hasWrongMachineTerm(h.Name, h.Scenario, gt.HazardCause, gt.ComponentZone) {
|
||||
score *= 0.3 // Heavy penalty
|
||||
reasons = append(reasons, "Strafabzug:FremdMaschine")
|
||||
}
|
||||
|
||||
return score, strings.Join(reasons, "+")
|
||||
}
|
||||
|
||||
// wrongMachineTerms are words in an engine hazard that indicate it's about
|
||||
// a completely different machine type. If the GT entry doesn't mention these,
|
||||
// the match is penalized.
|
||||
var wrongMachineTerms = []string{
|
||||
"spielplatz", "fahrtreppe", "trommelwaschmaschine", "umreifungsband",
|
||||
"drehteller", "rundtaktanlage", "exzentrisch", "webstuhl",
|
||||
"aufzug", "rolltreppe", "bagger", "kettensaege", "kreissaege",
|
||||
"druckmaschine", "zentrifuge", "autoklav", "hobel",
|
||||
"naehmaschine", "strickmaschine", "schleifmaschine",
|
||||
"gabelstapler", "flurfoerder", "erntemaschine",
|
||||
"kollision zweier roboter",
|
||||
}
|
||||
|
||||
func hasWrongMachineTerm(engName, engScenario, gtCause, gtZone string) bool {
|
||||
engText := normalizeDE(engName + " " + engScenario)
|
||||
gtText := normalizeDE(gtCause + " " + gtZone)
|
||||
for _, term := range wrongMachineTerms {
|
||||
if strings.Contains(engText, term) && !strings.Contains(gtText, term) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func categoryMatchScore(gtGroup, engCategory string) float64 {
|
||||
normalized := normalizeDE(gtGroup)
|
||||
prefixes, ok := categoryMap[normalized]
|
||||
|
||||
Reference in New Issue
Block a user