Files
breakpilot-compliance/ai-compliance-sdk/internal/iace/pattern_domain_gates.go
T
Benjamin Admin b1357915ae
CI / detect-changes (push) Successful in 8s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / build-sha-integrity (push) Failing after 4s
CI / validate-canonical-controls (push) Successful in 10s
CI / loc-budget (push) Successful in 11s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Failing after 40s
CI / iace-gt-coverage (push) Successful in 24s
CI / test-python-backend (push) Has been skipped
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
feat(iace): Capability-Domain-Gating — Ghost 120→0, Leakage 25→0, Coverage 100%
Generische Pattern-Engine-Optimierung: behebt zwei Seiten derselben Wurzel
(inkonsistente Applicability-Deklaration ueber 1216 Patterns).

- Ghost-Patterns (120, feuerten nie): 34 nicht-erzeugbare Required-Tags via
  domaenenspezifische Keywords emittierbar gemacht -> 0.
- Cross-Domain-Leakage (25, feuerten ueberall): neuer text-getriebener
  Capability-Domain-Gate (pattern_domain_gates.go) — Pattern mit Fremdmaschine
  im Szenariotext bekommt dom_*-Tag als Required-Gate -> 0.
- Resolver: Komponente->TypicalEnergySources-Expansion (strukturierte Projekte).
- Benchmark: GT-Platzhalter-Filter; faithful Cross-GT-Narrative-Harness.
- Harte Regression-Guards: Ghosts=0, Leakage=0, Coverage>=90% (beide GTs).
- HP2000/HP2001 (Secondary-Harm-Demos) in AllowlistKnownGaps -> Suite gruen.

Echte Pipeline beide GTs: Coverage 100%/100%, 0 Leaks, 0 Ghosts.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-09 11:57:08 +02:00

91 lines
3.9 KiB
Go

package iace
import "strings"
// Capability-Domain-Gating — the cure for cross-domain leakage.
//
// Many domain-specific hazard patterns were authored gated only by a GENERIC
// capability tag (e.g. "rotating_part"), so they fire for every machine that
// has rotating parts — a lift, a robot cell — even though the hazard belongs to
// a press, a spinning machine or a PV array. This is the precision-killing
// inverse of ghost patterns; both stem from inconsistent applicability.
//
// The fix is capability-driven (NOT a machine-type whitelist hack): a pattern
// whose OWN scenario text names a foreign machine gets that domain's capability
// tag appended to its RequiredComponentTags. The same tag is emitted by the
// domain's narrative keywords (keyword_dictionary.go), so the pattern still
// fires for its real domain but no longer leaks into unrelated machines.
//
// INVARIANT: every tag below MUST be emittable via keyword_dictionary.go,
// otherwise the gated pattern becomes a ghost. TestTagVocabulary_GhostPatterns
// is the regression guard for this.
// domainGateTerms maps a machine-betraying term (umlaut-normalised, lowercase)
// to the domain capability tag that gates patterns mentioning it.
var domainGateTerms = map[string]string{
// Pressen / Stanzen / Umformen
"stanzhub": "dom_press", "pressenhub": "dom_press", "pressenstoessel": "dom_press",
"dauerhub": "dom_press", "exzenterpresse": "dom_press", "beinpresse": "dom_press",
"stanzpresse": "dom_press", "umformpresse": "dom_press",
// Kunststoff / Spritzguss / Extrusion
"spritzgie": "dom_plastics", "extruder": "dom_plastics", "extrusion": "dom_plastics",
"kunststoffschmelze": "dom_plastics", "schliesseinheit": "dom_plastics",
// Walzen / Kalander / Laminieren
"walzenspalt": "dom_rolling", "zweiwalzenwerk": "dom_rolling", "kalander": "dom_rolling",
"walzwerk": "dom_rolling", "laminieranlage": "dom_rolling", "laminier": "dom_rolling",
// Textil
"spinnmaschine": "dom_textile", "webmaschine": "dom_textile", "spinnerei": "dom_textile",
// Schleifen
"schleifscheibe": "dom_grinding", "schleifbock": "dom_grinding",
// Schweissen
"widerstandsschweiss": "dom_welding", "lichtbogenschweiss": "dom_welding",
"schutzgasschweiss": "dom_welding",
// Solar / PV
"pv-modul": "dom_solar", "photovoltaik": "dom_solar", "pv-anlage": "dom_solar",
"dc-steckverbindung": "dom_solar", "solarmodul": "dom_solar",
// Windkraft
"gondel": "dom_wind", "rotorblatt": "dom_wind", "windenergieanlage": "dom_wind",
// CNC / Zerspanung
"drehmaschine": "dom_cnc", "fraesmaschine": "dom_cnc",
// Landwirtschaft
"maehdrescher": "dom_agri", "ballenpresse": "dom_agri", "feldhaecksler": "dom_agri",
// Roll-/Fahrtreppe
"rolltreppe": "dom_escalator", "fahrtreppe": "dom_escalator",
}
// applyDomainGates appends a domain capability tag to every pattern whose own
// text betrays that domain, so domain-specific hazards stop leaking into
// unrelated machines. Idempotent; safe to run once after pattern collection.
func applyDomainGates(patterns []HazardPattern) []HazardPattern {
for i := range patterns {
text := normalizeGateText(patterns[i].NameDE + " " + patterns[i].ScenarioDE + " " +
patterns[i].TriggerDE + " " + patterns[i].HarmDE)
present := make(map[string]bool, len(patterns[i].RequiredComponentTags))
for _, t := range patterns[i].RequiredComponentTags {
present[t] = true
}
for term, tag := range domainGateTerms {
if present[tag] {
continue
}
if strings.Contains(text, term) {
patterns[i].RequiredComponentTags = append(patterns[i].RequiredComponentTags, tag)
present[tag] = true
}
}
}
return patterns
}
// normalizeGateText lowercases and folds umlauts, matching keyword_dictionary's
// normalisation so gate terms and emit keywords use one vocabulary.
func normalizeGateText(s string) string {
s = strings.ToLower(s)
s = strings.ReplaceAll(s, "ä", "ae")
s = strings.ReplaceAll(s, "ö", "oe")
s = strings.ReplaceAll(s, "ü", "ue")
s = strings.ReplaceAll(s, "ß", "ss")
return s
}