Files
breakpilot-compliance/ai-compliance-sdk/internal/iace/pattern_relevance.go
T
Benjamin Admin afb3f83f30 feat(iace): cross-domain precision overhaul + component review + schema reconcile
Engine precision (stop foreign-machine patterns leaking into a project):
- Wire project.MachineType into the engine machine-type gate (empty input no
  longer fires every machine class — press/cnc/excavator/crane/medical...).
- Capability-domain gating extended by 7 domains (outdoor, ventilation,
  machining, bulk, palletizer, playground, fitness) so domain-specific hazards
  only fire when the narrative names that domain; emitted via keyword_dictionary.
- Relevance backstop moved into iace (single gating contract, testable), and its
  dominant false-anchor class removed (a long pattern word no longer matches a
  short common token; prepositions/leitung added to the generic stoplist).
- New guard tests: TestCrossDomainPrecision (full pipeline, 0 foreign per GT) and
  TestPatternReachability now asserts 0 dead patterns. Both GTs keep coverage 1.0.

Reachability fix: the 51 dead patterns required electrical/pneumatic/hydraulic
tags nothing produced — renamed to the canonical electrical_energy/
pneumatic_pressure/hydraulic_pressure/hydraulic_part.

Component review (negation is best-effort + expert-correctable):
- Parser surfaces negated components (ComponentMatch.Negated) instead of dropping
  them; negated contribute no tags/energy → no phantom hazards.
- presence_status (vorhanden|nicht_vorhanden|geloescht) + ce_marked on components;
  only `vorhanden` feed matching. CE+safety-relevant flags the PL/SIL obligation.
- Force re-seed preserves the expert's component decisions instead of wiping them.
- Tag-based component→hazard assignment (was: all on the first component).
- Negation-aware narrative parsing ("keine Pneumatik" no longer extracts it).

Local-dev DB: ai-sdk sets search_path=compliance,core,public; reconcile migrations
152-156 bring the consolidated local iace tables to the current schema + add the
presence_status/ce_marked columns. Machine-type vocabulary endpoint for the form.

[migration-approved]

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-10 17:15:55 +02:00

182 lines
8.0 KiB
Go

package iace
import "strings"
// Pattern relevance gating (engine-side backstop).
//
// The pattern engine fires any pattern whose required tags are present. Some
// patterns are gated only on near-universal tags (e.g. "structural_part" — every
// machine has a frame) or on strong-but-broad tags a machine genuinely has
// (gravity_risk, moving_part, high_force). That lets context-specific patterns
// from other environments leak in (tick bites, confined-space oxygen, palletizer
// reach-in). IsPatternRelevant is the text backstop: a pattern made only of
// GENERIC hazard vocabulary (quetschen, stromschlag, absturz, person ...) is a
// universal machine hazard and stays; a pattern carrying a machine-, environment-
// or organism-specific word (palettierer, klettergeraet, zeckenbiss) only applies
// if that word actually appears in this machine's limits.
//
// This is a BACKSTOP, not the primary gate — the authoritative gate is the
// engine's machine-type + required-tag matching (see patternMatches). Keeping the
// relevance logic in this package lets the precision test exercise the exact
// production path.
// genericSafetyTerms are exact words that appear in almost all risk assessments
// and must NOT be treated as machine-specific.
var genericSafetyTerms = map[string]bool{
"maschine": true, "anlage": true, "bereich": true, "gesamte": true,
"arbeitsplatz": true, "gefahrbereich": true, "gefahrstelle": true,
"gefahrenstelle": true, "person": true, "werker": true, "bediener": true,
"steuerung": true, "schutzeinrichtung": true, "sicherheit": true,
"betrieb": true, "wartung": true, "instandhaltung": true, "reinigung": true,
"bewegung": true, "beweglich": true, "feststehend": true, "teil": true,
"teile": true, "oeffnung": true, "zugang": true, "gefahr": true,
"verletzung": true, "quetsch": true, "scher": true, "schneid": true,
"stoss": true, "schlag": true, "einzug": true, "brand": true,
"motor": true, "antrieb": true, "achse": true, "achsen": true,
"kabel": true, "leitung": true, "schaltschrank": true, "spannung": true,
"schutz": true, "gehaeuse": true, "oberflaeche": true, "boden": true,
"leitfaehig": true, "elektrisch": true, "mechanisch": true,
"bedienfeld": true, "display": true, "anzeige": true,
"energie": true, "druck": true, "temperatur": true,
// Abbreviations and synonyms that should not trigger relevance filter
"kss": true, "emv": true, "esd": true, "dcs": true, "plr": true, "sil": true,
"hmi": true, "sps": true, "rcd": true, "loto": true, "psa": true,
// Common action words
"bersten": true, "platzen": true, "abspringen": true, "spritzen": true,
"einatmen": true, "ausrutschen": true, "herabfallen": true,
"durchschlaegen": true, "wegschleudern": true,
// Common structural terms that don't indicate a specific machine
"gesamter": true, "gesamtes": true, "bereichs": true, "stelle": true,
"innen": true, "aussen": true, "transport": true, "seite": true,
"front": true, "rueck": true, "ober": true, "unter": true,
"fuehrung": true, "lager": true, "verschleiss": true, "welle": true,
"getriebe": true, "kette": true, "riemen": true, "feder": true,
"spindel": true, "werkzeug": true, "werkstueck": true, "flucht": true,
// Generic anatomy (too short for safe prefix stems)
"arm": true, "arme": true, "bein": true, "beine": true, "fuss": true,
"fuesse": true, "kopf": true, "koepfe": true, "hand": true, "haende": true,
// Common German function words (prepositions/conjunctions/determiners) that
// are not machine-specific but survive the >=5-char specific-word cut.
"zwischen": true, "durch": true, "gegen": true, "neben": true,
"hinter": true, "waehrend": true, "sowie": true, "dabei": true,
"dadurch": true, "wodurch": true, "beim": true, "etwa": true,
"jeder": true, "jede": true, "jedes": true, "dieser": true, "diese": true,
"dieses": true, "welche": true, "welcher": true, "deren": true,
"dessen": true, "sodass": true, "damit": true,
// Location prepositions — never machine-distinctive.
"ueber": true, "oberhalb": true, "unterhalb": true, "innerhalb": true,
"ausserhalb": true, "entlang": true, "angrenzend": true, "darunter": true,
"umliegend": true, "benachbart": true,
}
// genericStems cover inflected generic words by prefix (German adds suffixes:
// person→personen/personal, arbeit→arbeiten/arbeitsraum, quetsch→quetschen).
// Only stems long/distinct enough that a prefix match cannot catch an unrelated
// specific compound are listed. This is the lemma half of the filter.
var genericStems = []string{
// actors / organisation
"person", "arbeit", "taetig", "mitarbeit", "bedien", "werker", "nutzer",
"betrieb", "wartung", "instandhalt", "reinig", "einricht", "transport",
"qualifik", "unterweis", "schulung",
// hazard phenomena / kinematics
"quetsch", "scher", "schneid", "schnitt", "stich", "stoss", "schlag",
"einzug", "einzieh", "erfass", "wickel", "absturz", "abstuerz", "sturz",
"stuerz", "kollision", "anprall", "anstoss", "verbrenn", "verbrueh",
"verletz", "gefaehrd", "klemm",
// energy / electrical / thermal descriptors
"stromschl", "spannung", "elektr", "thermi", "energie", "leitung",
// anatomy (long enough for prefix)
"finger", "koerper", "gliedmass", "extremit",
// structure / location / generic qualifiers
"bereich", "struktur", "gehaeuse", "oberflaech", "beweg", "feststehend",
"schutz", "sicher", "maschine", "anlage", "betriebs",
// common generic verbs / adjectives (contact, motion, causation, state)
"beruehr", "greif", "treff", "fall", "faell", "loes", "oeffn", "schliess",
"gelang", "erreich", "direkt", "schwer", "offen", "scharf", "teil",
"moeglich", "fehlend", "unerwart", "ploetzl", "unkontroll", "versehentl",
}
func isGenericTerm(w string) bool {
if genericSafetyTerms[w] {
return true
}
for _, s := range genericStems {
if strings.HasPrefix(w, s) {
return true
}
}
return false
}
// narrativeTokenSet builds the set of words the machine actually describes
// (limits text + component names), normalised and de-duplicated.
func narrativeTokenSet(narrative string, compNames []string) map[string]bool {
set := make(map[string]bool)
add := func(text string) {
for _, t := range strings.Fields(NormalizeDEPublic(text)) {
t = strings.Trim(t, ".,;:!?()/-\"")
if len(t) >= 4 {
set[t] = true
}
}
}
add(narrative)
for _, cn := range compNames {
add(cn)
}
return set
}
// specificWordInNarrative reports whether a machine-specific pattern word is
// present in the machine's vocabulary. Matches on token boundaries (full token,
// or either word a prefix of the other for ≥5 chars) so German inflection is
// tolerated ("behaelter" ~ "behaeltern") without substring false positives
// ("arbeiten" inside "bearbeiten").
func specificWordInNarrative(sw string, tokens map[string]bool) bool {
if tokens[sw] {
return true
}
if len(sw) < 5 {
return false
}
for t := range tokens {
// Only the inflection direction: a narrative token is the specific word
// plus a German suffix ("behaelter" → "behaeltern"). The REVERSE
// direction is dropped — it let a long pattern word anchor on a short
// common narrative token (pattern "uebertragen" matching "ueber",
// "zugangsbereich" matching "zugang"), the dominant false-positive class.
if len(t) >= 5 && strings.HasPrefix(t, sw) {
return true
}
}
return false
}
// IsPatternRelevant checks whether a pattern applies to the machine in the
// narrative. A pattern with no machine-specific word is generic → relevant. A
// pattern with specific words is relevant only if at least one appears in the
// machine's own vocabulary.
func IsPatternRelevant(mp PatternMatch, narrative string, compNames []string) bool {
patternText := NormalizeDEPublic(mp.ZoneDE + " " + mp.ScenarioDE + " " + mp.PatternName)
var specificWords []string
for _, w := range strings.Fields(patternText) {
w = strings.Trim(w, ".,;:!?()/-\"")
if len(w) < 5 || isGenericTerm(w) {
continue
}
specificWords = append(specificWords, w)
}
if len(specificWords) == 0 {
return true
}
tokens := narrativeTokenSet(narrative, compNames)
for _, sw := range specificWords {
if specificWordInNarrative(sw, tokens) {
return true
}
}
return false
}