package iace import "strings" // Pattern relevance gating (engine-side backstop). // // The pattern engine fires any pattern whose required tags are present. Some // patterns are gated only on near-universal tags (e.g. "structural_part" — every // machine has a frame) or on strong-but-broad tags a machine genuinely has // (gravity_risk, moving_part, high_force). That lets context-specific patterns // from other environments leak in (tick bites, confined-space oxygen, palletizer // reach-in). IsPatternRelevant is the text backstop: a pattern made only of // GENERIC hazard vocabulary (quetschen, stromschlag, absturz, person ...) is a // universal machine hazard and stays; a pattern carrying a machine-, environment- // or organism-specific word (palettierer, klettergeraet, zeckenbiss) only applies // if that word actually appears in this machine's limits. // // This is a BACKSTOP, not the primary gate — the authoritative gate is the // engine's machine-type + required-tag matching (see patternMatches). Keeping the // relevance logic in this package lets the precision test exercise the exact // production path. // genericSafetyTerms are exact words that appear in almost all risk assessments // and must NOT be treated as machine-specific. var genericSafetyTerms = map[string]bool{ "maschine": true, "anlage": true, "bereich": true, "gesamte": true, "arbeitsplatz": true, "gefahrbereich": true, "gefahrstelle": true, "gefahrenstelle": true, "person": true, "werker": true, "bediener": true, "steuerung": true, "schutzeinrichtung": true, "sicherheit": true, "betrieb": true, "wartung": true, "instandhaltung": true, "reinigung": true, "bewegung": true, "beweglich": true, "feststehend": true, "teil": true, "teile": true, "oeffnung": true, "zugang": true, "gefahr": true, "verletzung": true, "quetsch": true, "scher": true, "schneid": true, "stoss": true, "schlag": true, "einzug": true, "brand": true, "motor": true, "antrieb": true, "achse": true, "achsen": true, "kabel": true, "leitung": true, "schaltschrank": true, "spannung": true, "schutz": true, "gehaeuse": true, "oberflaeche": true, "boden": true, "leitfaehig": true, "elektrisch": true, "mechanisch": true, "bedienfeld": true, "display": true, "anzeige": true, "energie": true, "druck": true, "temperatur": true, // Abbreviations and synonyms that should not trigger relevance filter "kss": true, "emv": true, "esd": true, "dcs": true, "plr": true, "sil": true, "hmi": true, "sps": true, "rcd": true, "loto": true, "psa": true, // Common action words "bersten": true, "platzen": true, "abspringen": true, "spritzen": true, "einatmen": true, "ausrutschen": true, "herabfallen": true, "durchschlaegen": true, "wegschleudern": true, // Common structural terms that don't indicate a specific machine "gesamter": true, "gesamtes": true, "bereichs": true, "stelle": true, "innen": true, "aussen": true, "transport": true, "seite": true, "front": true, "rueck": true, "ober": true, "unter": true, "fuehrung": true, "lager": true, "verschleiss": true, "welle": true, "getriebe": true, "kette": true, "riemen": true, "feder": true, "spindel": true, "werkzeug": true, "werkstueck": true, "flucht": true, // Generic anatomy (too short for safe prefix stems) "arm": true, "arme": true, "bein": true, "beine": true, "fuss": true, "fuesse": true, "kopf": true, "koepfe": true, "hand": true, "haende": true, // Common German function words (prepositions/conjunctions/determiners) that // are not machine-specific but survive the >=5-char specific-word cut. "zwischen": true, "durch": true, "gegen": true, "neben": true, "hinter": true, "waehrend": true, "sowie": true, "dabei": true, "dadurch": true, "wodurch": true, "beim": true, "etwa": true, "jeder": true, "jede": true, "jedes": true, "dieser": true, "diese": true, "dieses": true, "welche": true, "welcher": true, "deren": true, "dessen": true, "sodass": true, "damit": true, // Location prepositions — never machine-distinctive. "ueber": true, "oberhalb": true, "unterhalb": true, "innerhalb": true, "ausserhalb": true, "entlang": true, "angrenzend": true, "darunter": true, "umliegend": true, "benachbart": true, } // genericStems cover inflected generic words by prefix (German adds suffixes: // person→personen/personal, arbeit→arbeiten/arbeitsraum, quetsch→quetschen). // Only stems long/distinct enough that a prefix match cannot catch an unrelated // specific compound are listed. This is the lemma half of the filter. var genericStems = []string{ // actors / organisation "person", "arbeit", "taetig", "mitarbeit", "bedien", "werker", "nutzer", "betrieb", "wartung", "instandhalt", "reinig", "einricht", "transport", "qualifik", "unterweis", "schulung", // hazard phenomena / kinematics "quetsch", "scher", "schneid", "schnitt", "stich", "stoss", "schlag", "einzug", "einzieh", "erfass", "wickel", "absturz", "abstuerz", "sturz", "stuerz", "kollision", "anprall", "anstoss", "verbrenn", "verbrueh", "verletz", "gefaehrd", "klemm", // energy / electrical / thermal descriptors "stromschl", "spannung", "elektr", "thermi", "energie", "leitung", // anatomy (long enough for prefix) "finger", "koerper", "gliedmass", "extremit", // structure / location / generic qualifiers "bereich", "struktur", "gehaeuse", "oberflaech", "beweg", "feststehend", "schutz", "sicher", "maschine", "anlage", "betriebs", // common generic verbs / adjectives (contact, motion, causation, state) "beruehr", "greif", "treff", "fall", "faell", "loes", "oeffn", "schliess", "gelang", "erreich", "direkt", "schwer", "offen", "scharf", "teil", "moeglich", "fehlend", "unerwart", "ploetzl", "unkontroll", "versehentl", } func isGenericTerm(w string) bool { if genericSafetyTerms[w] { return true } for _, s := range genericStems { if strings.HasPrefix(w, s) { return true } } return false } // narrativeTokenSet builds the set of words the machine actually describes // (limits text + component names), normalised and de-duplicated. func narrativeTokenSet(narrative string, compNames []string) map[string]bool { set := make(map[string]bool) add := func(text string) { for _, t := range strings.Fields(NormalizeDEPublic(text)) { t = strings.Trim(t, ".,;:!?()/-\"") if len(t) >= 4 { set[t] = true } } } add(narrative) for _, cn := range compNames { add(cn) } return set } // specificWordInNarrative reports whether a machine-specific pattern word is // present in the machine's vocabulary. Matches on token boundaries (full token, // or either word a prefix of the other for ≥5 chars) so German inflection is // tolerated ("behaelter" ~ "behaeltern") without substring false positives // ("arbeiten" inside "bearbeiten"). func specificWordInNarrative(sw string, tokens map[string]bool) bool { if tokens[sw] { return true } if len(sw) < 5 { return false } for t := range tokens { // Only the inflection direction: a narrative token is the specific word // plus a German suffix ("behaelter" → "behaeltern"). The REVERSE // direction is dropped — it let a long pattern word anchor on a short // common narrative token (pattern "uebertragen" matching "ueber", // "zugangsbereich" matching "zugang"), the dominant false-positive class. if len(t) >= 5 && strings.HasPrefix(t, sw) { return true } } return false } // IsPatternRelevant checks whether a pattern applies to the machine in the // narrative. A pattern with no machine-specific word is generic → relevant. A // pattern with specific words is relevant only if at least one appears in the // machine's own vocabulary. func IsPatternRelevant(mp PatternMatch, narrative string, compNames []string) bool { patternText := NormalizeDEPublic(mp.ZoneDE + " " + mp.ScenarioDE + " " + mp.PatternName) var specificWords []string for _, w := range strings.Fields(patternText) { w = strings.Trim(w, ".,;:!?()/-\"") if len(w) < 5 || isGenericTerm(w) { continue } specificWords = append(specificWords, w) } if len(specificWords) == 0 { return true } tokens := narrativeTokenSet(narrative, compNames) for _, sw := range specificWords { if specificWordInNarrative(sw, tokens) { return true } } return false }