feat(iace): narrative vocabulary overlap filter replaces blacklist
Replace machine-specific term blacklist with generic vocabulary overlap: - Extract significant words (>=5 chars, not generic safety terms) from pattern zone/scenario - If pattern has specific words but NONE appear in narrative → filter - genericSafetyTerms whitelist with ~50 terms that appear in all assessments - Truly generic approach: works for any machine type without maintenance Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -199,53 +199,65 @@ func containsSubstring(haystack, needle string) bool {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
// machineSpecificTerms are words in a pattern's zone/scenario that indicate
|
// genericSafetyTerms are words that appear in almost all risk assessments
|
||||||
// the pattern is specific to a particular machine type. If a pattern contains
|
// and should NOT be used to determine machine-specificity.
|
||||||
// such a term but the machine narrative does NOT, the pattern is irrelevant.
|
var genericSafetyTerms = map[string]bool{
|
||||||
var machineSpecificTerms = []string{
|
"maschine": true, "anlage": true, "bereich": true, "gesamte": true,
|
||||||
"extruder", "spinnmaschine", "spielplatz", "aufzug", "elevator",
|
"arbeitsplatz": true, "gefahrbereich": true, "gefahrstelle": true,
|
||||||
"kran", "crane", "bagger", "excavator", "traktor", "tractor",
|
"gefahrenstelle": true, "person": true, "werker": true, "bediener": true,
|
||||||
"harvester", "druckmaschine", "printing", "webstuhl", "weaving",
|
"steuerung": true, "schutzeinrichtung": true, "sicherheit": true,
|
||||||
"ofen", "furnace", "kessel", "boiler", "walzwerk", "rolling",
|
"betrieb": true, "wartung": true, "instandhaltung": true, "reinigung": true,
|
||||||
"zentrifuge", "centrifuge", "autoklav", "autoclave", "saege",
|
"bewegung": true, "beweglich": true, "feststehend": true, "teil": true,
|
||||||
"kreissaege", "circular_saw", "hobel", "fraese", "drehmaschine",
|
"teile": true, "oeffnung": true, "zugang": true, "gefahr": true,
|
||||||
"lathe", "schleifmaschine", "grinder", "stanze", "stanzpresse",
|
"verletzung": true, "quetsch": true, "scher": true, "schneid": true,
|
||||||
"infusion", "beatmung", "ventilator", "patient",
|
"stoss": true, "schlag": true, "einzug": true, "brand": true,
|
||||||
"lebensmittel", "food", "pharma", "verpackung", "packaging",
|
"motor": true, "antrieb": true, "achse": true, "achsen": true,
|
||||||
"seilnetz", "kletterseil", "schaukel", "rutsche",
|
"kabel": true, "leitung": true, "schaltschrank": true, "spannung": true,
|
||||||
"gabelstapler", "forklift", "flurfoerder",
|
"schutz": true, "gehaeuse": true, "oberflaeche": true, "boden": true,
|
||||||
|
"leitfaehig": true, "elektrisch": true, "mechanisch": true,
|
||||||
|
"bedienfeld": true, "display": true, "anzeige": true,
|
||||||
|
"energie": true, "druck": true, "temperatur": true,
|
||||||
}
|
}
|
||||||
|
|
||||||
// isPatternRelevant checks whether a pattern match is relevant to the actual
|
// isPatternRelevant checks whether a pattern match is relevant to the actual
|
||||||
// machine described in the narrative. A pattern is considered irrelevant if its
|
// machine described in the narrative. Uses narrative vocabulary overlap:
|
||||||
// zone or scenario contains machine-specific terms that don't appear in the
|
// if the pattern's zone/scenario contains machine-specific words (not generic
|
||||||
// narrative or component list.
|
// safety terms) and NONE of them appear in the narrative → irrelevant.
|
||||||
func isPatternRelevant(mp iace.PatternMatch, narrative string, compNames []string) bool {
|
func isPatternRelevant(mp iace.PatternMatch, narrative string, compNames []string) bool {
|
||||||
patternText := iace.NormalizeDEPublic(mp.ZoneDE + " " + mp.ScenarioDE + " " + mp.PatternName)
|
patternText := iace.NormalizeDEPublic(mp.ZoneDE + " " + mp.ScenarioDE)
|
||||||
narrativeNorm := iace.NormalizeDEPublic(narrative)
|
narrativeNorm := iace.NormalizeDEPublic(narrative)
|
||||||
|
|
||||||
// Check if pattern mentions machine-specific terms absent from narrative
|
// Extract machine-specific words from pattern (not generic safety terms)
|
||||||
for _, term := range machineSpecificTerms {
|
patternWords := strings.Fields(patternText)
|
||||||
if !strings.Contains(patternText, term) {
|
var specificWords []string
|
||||||
|
for _, w := range patternWords {
|
||||||
|
// Clean punctuation
|
||||||
|
w = strings.Trim(w, ".,;:!?()/-")
|
||||||
|
if len(w) < 5 || genericSafetyTerms[w] {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
// Pattern mentions this machine-specific term — check if machine has it
|
specificWords = append(specificWords, w)
|
||||||
if strings.Contains(narrativeNorm, term) {
|
}
|
||||||
continue // Machine has this term, pattern is relevant
|
|
||||||
|
// If pattern has no specific words, it's generic → always relevant
|
||||||
|
if len(specificWords) == 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if at least one specific word appears in the narrative or components
|
||||||
|
for _, sw := range specificWords {
|
||||||
|
if strings.Contains(narrativeNorm, sw) {
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
// Also check component names
|
|
||||||
found := false
|
|
||||||
for _, cn := range compNames {
|
for _, cn := range compNames {
|
||||||
if strings.Contains(cn, term) {
|
if strings.Contains(cn, sw) {
|
||||||
found = true
|
return true
|
||||||
break
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !found {
|
|
||||||
return false // Pattern mentions a machine type we don't have
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return true
|
|
||||||
|
// No specific word found in narrative → pattern is for a different machine
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// normalizeZoneKey reduces a zone string to its core components for better dedup.
|
// normalizeZoneKey reduces a zone string to its core components for better dedup.
|
||||||
|
|||||||
Reference in New Issue
Block a user