fix(iace): stronger relevance filter + matcher wrong-machine penalty

Relevance filter: now checks PatternName in addition to ZoneDE+ScenarioDE, catches "Spielplatz", "Umreifungsband", "Fahrtreppe" etc. in pattern names. Added more generic safety terms to whitelist (welle, getriebe, kette, etc.) Matcher: rebalanced weights (category 0.3, keywords 0.3, zone 0.4) to prioritize zone/component specificity. Added wrong-machine penalty (0.3x) when engine hazard mentions machine-specific terms absent from GT context (e.g. "Kollision zweier Roboter" for a single-robot GT entry). Fixes 18 problematic matches: 8 wrong-machine, 9 zone-mismatch, 1 category. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-13 15:49:50 +02:00
parent 6940271672
commit cca714755a
2 changed files with 44 additions and 6 deletions
@@ -217,6 +217,13 @@ var genericSafetyTerms = map[string]bool{
 	"leitfaehig": true, "elektrisch": true, "mechanisch": true,
 	"bedienfeld": true, "display": true, "anzeige": true,
 	"energie": true, "druck": true, "temperatur": true,
+	// Common structural terms that don't indicate a specific machine
+	"gesamter": true, "gesamtes": true, "bereichs": true, "stelle": true,
+	"innen": true, "aussen": true, "transport": true, "seite": true,
+	"front": true, "rueck": true, "ober": true, "unter": true,
+	"fuehrung": true, "lager": true, "verschleiss": true, "welle": true,
+	"getriebe": true, "kette": true, "riemen": true, "feder": true,
+	"spindel": true, "werkzeug": true, "werkstueck": true, "flucht": true,
 }

 // isPatternRelevant checks whether a pattern match is relevant to the actual
@@ -224,7 +231,7 @@ var genericSafetyTerms = map[string]bool{
 // if the pattern's zone/scenario contains machine-specific words (not generic
 // safety terms) and NONE of them appear in the narrative → irrelevant.
 func isPatternRelevant(mp iace.PatternMatch, narrative string, compNames []string) bool {
-	patternText := iace.NormalizeDEPublic(mp.ZoneDE + " " + mp.ScenarioDE)
+	patternText := iace.NormalizeDEPublic(mp.ZoneDE + " " + mp.ScenarioDE + " " + mp.PatternName)
 	narrativeNorm := iace.NormalizeDEPublic(narrative)

 	// Extract machine-specific words from pattern (not generic safety terms)
@@ -191,30 +191,61 @@ func fuzzyMatchScore(gt *GroundTruthEntry, h *Hazard) (float64, string) {
 	var score float64
 	var reasons []string

-	// 1. Category match (weight 0.4)
+	// 1. Category match (weight 0.3)
 	catScore := categoryMatchScore(gt.HazardGroup, h.Category)
-	score += 0.4 * catScore
+	score += 0.3 * catScore
 	if catScore > 0 {
 		reasons = append(reasons, "Kategorie")
 	}

-	// 2. Keyword/synonym match (weight 0.3)
+	// 2. Keyword/synonym match on hazard TYPE (weight 0.3)
 	kwScore := keywordMatchScore(gt.HazardType, gt.HazardCause, h.Name, h.Description, h.Scenario)
 	score += 0.3 * kwScore
 	if kwScore > 0 {
 		reasons = append(reasons, "Keywords")
 	}

-	// 3. Component/zone match (weight 0.3)
+	// 3. Component/zone match (weight 0.4 — most important for specificity)
 	zoneScore := zoneMatchScore(gt.ComponentZone, gt.HazardSubgroup, h.HazardousZone, h.MachineModule)
-	score += 0.3 * zoneScore
+	score += 0.4 * zoneScore
 	if zoneScore > 0 {
 		reasons = append(reasons, "Zone")
 	}

+	// Penalty: if engine hazard mentions a machine-specific term not in the GT context,
+	// it's likely a wrong-machine match (e.g. "Spielplatz" for a robot cell GT entry)
+	if hasWrongMachineTerm(h.Name, h.Scenario, gt.HazardCause, gt.ComponentZone) {
+		score *= 0.3 // Heavy penalty
+		reasons = append(reasons, "Strafabzug:FremdMaschine")
+	}
+
 	return score, strings.Join(reasons, "+")
 }

+// wrongMachineTerms are words in an engine hazard that indicate it's about
+// a completely different machine type. If the GT entry doesn't mention these,
+// the match is penalized.
+var wrongMachineTerms = []string{
+	"spielplatz", "fahrtreppe", "trommelwaschmaschine", "umreifungsband",
+	"drehteller", "rundtaktanlage", "exzentrisch", "webstuhl",
+	"aufzug", "rolltreppe", "bagger", "kettensaege", "kreissaege",
+	"druckmaschine", "zentrifuge", "autoklav", "hobel",
+	"naehmaschine", "strickmaschine", "schleifmaschine",
+	"gabelstapler", "flurfoerder", "erntemaschine",
+	"kollision zweier roboter",
+}
+
+func hasWrongMachineTerm(engName, engScenario, gtCause, gtZone string) bool {
+	engText := normalizeDE(engName + " " + engScenario)
+	gtText := normalizeDE(gtCause + " " + gtZone)
+	for _, term := range wrongMachineTerms {
+		if strings.Contains(engText, term) && !strings.Contains(gtText, term) {
+			return true
+		}
+	}
+	return false
+}
+
 func categoryMatchScore(gtGroup, engCategory string) float64 {
 	normalized := normalizeDE(gtGroup)
 	prefixes, ok := categoryMap[normalized]