fix(iace): stronger relevance filter + matcher wrong-machine penalty
Build + Deploy / build-admin-compliance (push) Successful in 10s
Build + Deploy / build-backend-compliance (push) Successful in 11s
Build + Deploy / build-ai-sdk (push) Successful in 40s
Build + Deploy / build-developer-portal (push) Successful in 10s
Build + Deploy / build-document-crawler (push) Successful in 11s
Build + Deploy / build-dsms-gateway (push) Successful in 12s
Build + Deploy / build-dsms-node (push) Successful in 11s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 16s
CI / secret-scan (push) Has been skipped
Build + Deploy / build-tts (push) Successful in 11s
CI / nodejs-build (push) Successful in 2m44s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Failing after 43s
CI / test-python-dsms-gateway (push) Successful in 22s
CI / validate-canonical-controls (push) Successful in 19s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-python-backend (push) Successful in 40s
CI / test-python-document-crawler (push) Successful in 25s
Build + Deploy / trigger-orca (push) Successful in 2m48s
Build + Deploy / build-admin-compliance (push) Successful in 10s
Build + Deploy / build-backend-compliance (push) Successful in 11s
Build + Deploy / build-ai-sdk (push) Successful in 40s
Build + Deploy / build-developer-portal (push) Successful in 10s
Build + Deploy / build-document-crawler (push) Successful in 11s
Build + Deploy / build-dsms-gateway (push) Successful in 12s
Build + Deploy / build-dsms-node (push) Successful in 11s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 16s
CI / secret-scan (push) Has been skipped
Build + Deploy / build-tts (push) Successful in 11s
CI / nodejs-build (push) Successful in 2m44s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Failing after 43s
CI / test-python-dsms-gateway (push) Successful in 22s
CI / validate-canonical-controls (push) Successful in 19s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-python-backend (push) Successful in 40s
CI / test-python-document-crawler (push) Successful in 25s
Build + Deploy / trigger-orca (push) Successful in 2m48s
Relevance filter: now checks PatternName in addition to ZoneDE+ScenarioDE, catches "Spielplatz", "Umreifungsband", "Fahrtreppe" etc. in pattern names. Added more generic safety terms to whitelist (welle, getriebe, kette, etc.) Matcher: rebalanced weights (category 0.3, keywords 0.3, zone 0.4) to prioritize zone/component specificity. Added wrong-machine penalty (0.3x) when engine hazard mentions machine-specific terms absent from GT context (e.g. "Kollision zweier Roboter" for a single-robot GT entry). Fixes 18 problematic matches: 8 wrong-machine, 9 zone-mismatch, 1 category. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -217,6 +217,13 @@ var genericSafetyTerms = map[string]bool{
|
|||||||
"leitfaehig": true, "elektrisch": true, "mechanisch": true,
|
"leitfaehig": true, "elektrisch": true, "mechanisch": true,
|
||||||
"bedienfeld": true, "display": true, "anzeige": true,
|
"bedienfeld": true, "display": true, "anzeige": true,
|
||||||
"energie": true, "druck": true, "temperatur": true,
|
"energie": true, "druck": true, "temperatur": true,
|
||||||
|
// Common structural terms that don't indicate a specific machine
|
||||||
|
"gesamter": true, "gesamtes": true, "bereichs": true, "stelle": true,
|
||||||
|
"innen": true, "aussen": true, "transport": true, "seite": true,
|
||||||
|
"front": true, "rueck": true, "ober": true, "unter": true,
|
||||||
|
"fuehrung": true, "lager": true, "verschleiss": true, "welle": true,
|
||||||
|
"getriebe": true, "kette": true, "riemen": true, "feder": true,
|
||||||
|
"spindel": true, "werkzeug": true, "werkstueck": true, "flucht": true,
|
||||||
}
|
}
|
||||||
|
|
||||||
// isPatternRelevant checks whether a pattern match is relevant to the actual
|
// isPatternRelevant checks whether a pattern match is relevant to the actual
|
||||||
@@ -224,7 +231,7 @@ var genericSafetyTerms = map[string]bool{
|
|||||||
// if the pattern's zone/scenario contains machine-specific words (not generic
|
// if the pattern's zone/scenario contains machine-specific words (not generic
|
||||||
// safety terms) and NONE of them appear in the narrative → irrelevant.
|
// safety terms) and NONE of them appear in the narrative → irrelevant.
|
||||||
func isPatternRelevant(mp iace.PatternMatch, narrative string, compNames []string) bool {
|
func isPatternRelevant(mp iace.PatternMatch, narrative string, compNames []string) bool {
|
||||||
patternText := iace.NormalizeDEPublic(mp.ZoneDE + " " + mp.ScenarioDE)
|
patternText := iace.NormalizeDEPublic(mp.ZoneDE + " " + mp.ScenarioDE + " " + mp.PatternName)
|
||||||
narrativeNorm := iace.NormalizeDEPublic(narrative)
|
narrativeNorm := iace.NormalizeDEPublic(narrative)
|
||||||
|
|
||||||
// Extract machine-specific words from pattern (not generic safety terms)
|
// Extract machine-specific words from pattern (not generic safety terms)
|
||||||
|
|||||||
@@ -191,30 +191,61 @@ func fuzzyMatchScore(gt *GroundTruthEntry, h *Hazard) (float64, string) {
|
|||||||
var score float64
|
var score float64
|
||||||
var reasons []string
|
var reasons []string
|
||||||
|
|
||||||
// 1. Category match (weight 0.4)
|
// 1. Category match (weight 0.3)
|
||||||
catScore := categoryMatchScore(gt.HazardGroup, h.Category)
|
catScore := categoryMatchScore(gt.HazardGroup, h.Category)
|
||||||
score += 0.4 * catScore
|
score += 0.3 * catScore
|
||||||
if catScore > 0 {
|
if catScore > 0 {
|
||||||
reasons = append(reasons, "Kategorie")
|
reasons = append(reasons, "Kategorie")
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2. Keyword/synonym match (weight 0.3)
|
// 2. Keyword/synonym match on hazard TYPE (weight 0.3)
|
||||||
kwScore := keywordMatchScore(gt.HazardType, gt.HazardCause, h.Name, h.Description, h.Scenario)
|
kwScore := keywordMatchScore(gt.HazardType, gt.HazardCause, h.Name, h.Description, h.Scenario)
|
||||||
score += 0.3 * kwScore
|
score += 0.3 * kwScore
|
||||||
if kwScore > 0 {
|
if kwScore > 0 {
|
||||||
reasons = append(reasons, "Keywords")
|
reasons = append(reasons, "Keywords")
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. Component/zone match (weight 0.3)
|
// 3. Component/zone match (weight 0.4 — most important for specificity)
|
||||||
zoneScore := zoneMatchScore(gt.ComponentZone, gt.HazardSubgroup, h.HazardousZone, h.MachineModule)
|
zoneScore := zoneMatchScore(gt.ComponentZone, gt.HazardSubgroup, h.HazardousZone, h.MachineModule)
|
||||||
score += 0.3 * zoneScore
|
score += 0.4 * zoneScore
|
||||||
if zoneScore > 0 {
|
if zoneScore > 0 {
|
||||||
reasons = append(reasons, "Zone")
|
reasons = append(reasons, "Zone")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Penalty: if engine hazard mentions a machine-specific term not in the GT context,
|
||||||
|
// it's likely a wrong-machine match (e.g. "Spielplatz" for a robot cell GT entry)
|
||||||
|
if hasWrongMachineTerm(h.Name, h.Scenario, gt.HazardCause, gt.ComponentZone) {
|
||||||
|
score *= 0.3 // Heavy penalty
|
||||||
|
reasons = append(reasons, "Strafabzug:FremdMaschine")
|
||||||
|
}
|
||||||
|
|
||||||
return score, strings.Join(reasons, "+")
|
return score, strings.Join(reasons, "+")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// wrongMachineTerms are words in an engine hazard that indicate it's about
|
||||||
|
// a completely different machine type. If the GT entry doesn't mention these,
|
||||||
|
// the match is penalized.
|
||||||
|
var wrongMachineTerms = []string{
|
||||||
|
"spielplatz", "fahrtreppe", "trommelwaschmaschine", "umreifungsband",
|
||||||
|
"drehteller", "rundtaktanlage", "exzentrisch", "webstuhl",
|
||||||
|
"aufzug", "rolltreppe", "bagger", "kettensaege", "kreissaege",
|
||||||
|
"druckmaschine", "zentrifuge", "autoklav", "hobel",
|
||||||
|
"naehmaschine", "strickmaschine", "schleifmaschine",
|
||||||
|
"gabelstapler", "flurfoerder", "erntemaschine",
|
||||||
|
"kollision zweier roboter",
|
||||||
|
}
|
||||||
|
|
||||||
|
func hasWrongMachineTerm(engName, engScenario, gtCause, gtZone string) bool {
|
||||||
|
engText := normalizeDE(engName + " " + engScenario)
|
||||||
|
gtText := normalizeDE(gtCause + " " + gtZone)
|
||||||
|
for _, term := range wrongMachineTerms {
|
||||||
|
if strings.Contains(engText, term) && !strings.Contains(gtText, term) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
func categoryMatchScore(gtGroup, engCategory string) float64 {
|
func categoryMatchScore(gtGroup, engCategory string) float64 {
|
||||||
normalized := normalizeDE(gtGroup)
|
normalized := normalizeDE(gtGroup)
|
||||||
prefixes, ok := categoryMap[normalized]
|
prefixes, ok := categoryMap[normalized]
|
||||||
|
|||||||
Reference in New Issue
Block a user