afb3f83f30
Engine precision (stop foreign-machine patterns leaking into a project):
- Wire project.MachineType into the engine machine-type gate (empty input no
longer fires every machine class — press/cnc/excavator/crane/medical...).
- Capability-domain gating extended by 7 domains (outdoor, ventilation,
machining, bulk, palletizer, playground, fitness) so domain-specific hazards
only fire when the narrative names that domain; emitted via keyword_dictionary.
- Relevance backstop moved into iace (single gating contract, testable), and its
dominant false-anchor class removed (a long pattern word no longer matches a
short common token; prepositions/leitung added to the generic stoplist).
- New guard tests: TestCrossDomainPrecision (full pipeline, 0 foreign per GT) and
TestPatternReachability now asserts 0 dead patterns. Both GTs keep coverage 1.0.
Reachability fix: the 51 dead patterns required electrical/pneumatic/hydraulic
tags nothing produced — renamed to the canonical electrical_energy/
pneumatic_pressure/hydraulic_pressure/hydraulic_part.
Component review (negation is best-effort + expert-correctable):
- Parser surfaces negated components (ComponentMatch.Negated) instead of dropping
them; negated contribute no tags/energy → no phantom hazards.
- presence_status (vorhanden|nicht_vorhanden|geloescht) + ce_marked on components;
only `vorhanden` feed matching. CE+safety-relevant flags the PL/SIL obligation.
- Force re-seed preserves the expert's component decisions instead of wiping them.
- Tag-based component→hazard assignment (was: all on the first component).
- Negation-aware narrative parsing ("keine Pneumatik" no longer extracts it).
Local-dev DB: ai-sdk sets search_path=compliance,core,public; reconcile migrations
152-156 bring the consolidated local iace tables to the current schema + add the
presence_status/ce_marked columns. Machine-type vocabulary endpoint for the form.
[migration-approved]
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
182 lines
8.0 KiB
Go
182 lines
8.0 KiB
Go
package iace
|
|
|
|
import "strings"
|
|
|
|
// Pattern relevance gating (engine-side backstop).
|
|
//
|
|
// The pattern engine fires any pattern whose required tags are present. Some
|
|
// patterns are gated only on near-universal tags (e.g. "structural_part" — every
|
|
// machine has a frame) or on strong-but-broad tags a machine genuinely has
|
|
// (gravity_risk, moving_part, high_force). That lets context-specific patterns
|
|
// from other environments leak in (tick bites, confined-space oxygen, palletizer
|
|
// reach-in). IsPatternRelevant is the text backstop: a pattern made only of
|
|
// GENERIC hazard vocabulary (quetschen, stromschlag, absturz, person ...) is a
|
|
// universal machine hazard and stays; a pattern carrying a machine-, environment-
|
|
// or organism-specific word (palettierer, klettergeraet, zeckenbiss) only applies
|
|
// if that word actually appears in this machine's limits.
|
|
//
|
|
// This is a BACKSTOP, not the primary gate — the authoritative gate is the
|
|
// engine's machine-type + required-tag matching (see patternMatches). Keeping the
|
|
// relevance logic in this package lets the precision test exercise the exact
|
|
// production path.
|
|
|
|
// genericSafetyTerms are exact words that appear in almost all risk assessments
|
|
// and must NOT be treated as machine-specific.
|
|
var genericSafetyTerms = map[string]bool{
|
|
"maschine": true, "anlage": true, "bereich": true, "gesamte": true,
|
|
"arbeitsplatz": true, "gefahrbereich": true, "gefahrstelle": true,
|
|
"gefahrenstelle": true, "person": true, "werker": true, "bediener": true,
|
|
"steuerung": true, "schutzeinrichtung": true, "sicherheit": true,
|
|
"betrieb": true, "wartung": true, "instandhaltung": true, "reinigung": true,
|
|
"bewegung": true, "beweglich": true, "feststehend": true, "teil": true,
|
|
"teile": true, "oeffnung": true, "zugang": true, "gefahr": true,
|
|
"verletzung": true, "quetsch": true, "scher": true, "schneid": true,
|
|
"stoss": true, "schlag": true, "einzug": true, "brand": true,
|
|
"motor": true, "antrieb": true, "achse": true, "achsen": true,
|
|
"kabel": true, "leitung": true, "schaltschrank": true, "spannung": true,
|
|
"schutz": true, "gehaeuse": true, "oberflaeche": true, "boden": true,
|
|
"leitfaehig": true, "elektrisch": true, "mechanisch": true,
|
|
"bedienfeld": true, "display": true, "anzeige": true,
|
|
"energie": true, "druck": true, "temperatur": true,
|
|
// Abbreviations and synonyms that should not trigger relevance filter
|
|
"kss": true, "emv": true, "esd": true, "dcs": true, "plr": true, "sil": true,
|
|
"hmi": true, "sps": true, "rcd": true, "loto": true, "psa": true,
|
|
// Common action words
|
|
"bersten": true, "platzen": true, "abspringen": true, "spritzen": true,
|
|
"einatmen": true, "ausrutschen": true, "herabfallen": true,
|
|
"durchschlaegen": true, "wegschleudern": true,
|
|
// Common structural terms that don't indicate a specific machine
|
|
"gesamter": true, "gesamtes": true, "bereichs": true, "stelle": true,
|
|
"innen": true, "aussen": true, "transport": true, "seite": true,
|
|
"front": true, "rueck": true, "ober": true, "unter": true,
|
|
"fuehrung": true, "lager": true, "verschleiss": true, "welle": true,
|
|
"getriebe": true, "kette": true, "riemen": true, "feder": true,
|
|
"spindel": true, "werkzeug": true, "werkstueck": true, "flucht": true,
|
|
// Generic anatomy (too short for safe prefix stems)
|
|
"arm": true, "arme": true, "bein": true, "beine": true, "fuss": true,
|
|
"fuesse": true, "kopf": true, "koepfe": true, "hand": true, "haende": true,
|
|
// Common German function words (prepositions/conjunctions/determiners) that
|
|
// are not machine-specific but survive the >=5-char specific-word cut.
|
|
"zwischen": true, "durch": true, "gegen": true, "neben": true,
|
|
"hinter": true, "waehrend": true, "sowie": true, "dabei": true,
|
|
"dadurch": true, "wodurch": true, "beim": true, "etwa": true,
|
|
"jeder": true, "jede": true, "jedes": true, "dieser": true, "diese": true,
|
|
"dieses": true, "welche": true, "welcher": true, "deren": true,
|
|
"dessen": true, "sodass": true, "damit": true,
|
|
// Location prepositions — never machine-distinctive.
|
|
"ueber": true, "oberhalb": true, "unterhalb": true, "innerhalb": true,
|
|
"ausserhalb": true, "entlang": true, "angrenzend": true, "darunter": true,
|
|
"umliegend": true, "benachbart": true,
|
|
}
|
|
|
|
// genericStems cover inflected generic words by prefix (German adds suffixes:
|
|
// person→personen/personal, arbeit→arbeiten/arbeitsraum, quetsch→quetschen).
|
|
// Only stems long/distinct enough that a prefix match cannot catch an unrelated
|
|
// specific compound are listed. This is the lemma half of the filter.
|
|
var genericStems = []string{
|
|
// actors / organisation
|
|
"person", "arbeit", "taetig", "mitarbeit", "bedien", "werker", "nutzer",
|
|
"betrieb", "wartung", "instandhalt", "reinig", "einricht", "transport",
|
|
"qualifik", "unterweis", "schulung",
|
|
// hazard phenomena / kinematics
|
|
"quetsch", "scher", "schneid", "schnitt", "stich", "stoss", "schlag",
|
|
"einzug", "einzieh", "erfass", "wickel", "absturz", "abstuerz", "sturz",
|
|
"stuerz", "kollision", "anprall", "anstoss", "verbrenn", "verbrueh",
|
|
"verletz", "gefaehrd", "klemm",
|
|
// energy / electrical / thermal descriptors
|
|
"stromschl", "spannung", "elektr", "thermi", "energie", "leitung",
|
|
// anatomy (long enough for prefix)
|
|
"finger", "koerper", "gliedmass", "extremit",
|
|
// structure / location / generic qualifiers
|
|
"bereich", "struktur", "gehaeuse", "oberflaech", "beweg", "feststehend",
|
|
"schutz", "sicher", "maschine", "anlage", "betriebs",
|
|
// common generic verbs / adjectives (contact, motion, causation, state)
|
|
"beruehr", "greif", "treff", "fall", "faell", "loes", "oeffn", "schliess",
|
|
"gelang", "erreich", "direkt", "schwer", "offen", "scharf", "teil",
|
|
"moeglich", "fehlend", "unerwart", "ploetzl", "unkontroll", "versehentl",
|
|
}
|
|
|
|
func isGenericTerm(w string) bool {
|
|
if genericSafetyTerms[w] {
|
|
return true
|
|
}
|
|
for _, s := range genericStems {
|
|
if strings.HasPrefix(w, s) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// narrativeTokenSet builds the set of words the machine actually describes
|
|
// (limits text + component names), normalised and de-duplicated.
|
|
func narrativeTokenSet(narrative string, compNames []string) map[string]bool {
|
|
set := make(map[string]bool)
|
|
add := func(text string) {
|
|
for _, t := range strings.Fields(NormalizeDEPublic(text)) {
|
|
t = strings.Trim(t, ".,;:!?()/-\"")
|
|
if len(t) >= 4 {
|
|
set[t] = true
|
|
}
|
|
}
|
|
}
|
|
add(narrative)
|
|
for _, cn := range compNames {
|
|
add(cn)
|
|
}
|
|
return set
|
|
}
|
|
|
|
// specificWordInNarrative reports whether a machine-specific pattern word is
|
|
// present in the machine's vocabulary. Matches on token boundaries (full token,
|
|
// or either word a prefix of the other for ≥5 chars) so German inflection is
|
|
// tolerated ("behaelter" ~ "behaeltern") without substring false positives
|
|
// ("arbeiten" inside "bearbeiten").
|
|
func specificWordInNarrative(sw string, tokens map[string]bool) bool {
|
|
if tokens[sw] {
|
|
return true
|
|
}
|
|
if len(sw) < 5 {
|
|
return false
|
|
}
|
|
for t := range tokens {
|
|
// Only the inflection direction: a narrative token is the specific word
|
|
// plus a German suffix ("behaelter" → "behaeltern"). The REVERSE
|
|
// direction is dropped — it let a long pattern word anchor on a short
|
|
// common narrative token (pattern "uebertragen" matching "ueber",
|
|
// "zugangsbereich" matching "zugang"), the dominant false-positive class.
|
|
if len(t) >= 5 && strings.HasPrefix(t, sw) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// IsPatternRelevant checks whether a pattern applies to the machine in the
|
|
// narrative. A pattern with no machine-specific word is generic → relevant. A
|
|
// pattern with specific words is relevant only if at least one appears in the
|
|
// machine's own vocabulary.
|
|
func IsPatternRelevant(mp PatternMatch, narrative string, compNames []string) bool {
|
|
patternText := NormalizeDEPublic(mp.ZoneDE + " " + mp.ScenarioDE + " " + mp.PatternName)
|
|
|
|
var specificWords []string
|
|
for _, w := range strings.Fields(patternText) {
|
|
w = strings.Trim(w, ".,;:!?()/-\"")
|
|
if len(w) < 5 || isGenericTerm(w) {
|
|
continue
|
|
}
|
|
specificWords = append(specificWords, w)
|
|
}
|
|
if len(specificWords) == 0 {
|
|
return true
|
|
}
|
|
|
|
tokens := narrativeTokenSet(narrative, compNames)
|
|
for _, sw := range specificWords {
|
|
if specificWordInNarrative(sw, tokens) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|