feat(iace): pattern audit suite + library hygiene wave
Add cmd/iace-audit CLI with 5 deterministic methods that find engine gaps without ground truth: - A reachability: 1058 patterns vs achievable tag universe - B consistency: components vs their declared hazard categories - C vocabulary: limits-form tokens vs keyword dictionary - D echo: limits-form sentences vs generated hazards (jaccard) - E hierarchy: hazards vs ISO 12100 design/protection/info levels Library fixes triggered by A+B+C findings: - tag_resolver: synonym map for electrical/pneumatic/hydraulic aliases - component_library: crush_point + EN03 (gravitational) on C014/C128 (Hubwerk family) - fixes HP1014/1015/1017/1018 which were silently weakly_reachable. noise_source added on 7 components (C006/C011/ C017/C020/C031/C041/C096). electrical_part on 8 drive components (C031/C032/C033/C034/C035/C036/C037/C038/C077/C092). cyber tag on 10 sensors (C081-C090) + 3 IT components (C111/C112/C116) + KI module C119 (ai_model added). pneumatic_part+hydraulic_part on valves C091/C093, hydraulic_part+chemical_risk on pump C097, moving_part on motion controller C075 - keyword_dictionary: EN03 added to aufzug/lift/hubwerk/hubgeraet (was wrongly EN04-only). New keyword entries for hub-action verbs: absenken/senken/anheben/heben + hubhoehe/hubweg/hubgeschwindig Audit impact: - A: weakly_reachable 409 -> 358 (-51 patterns now fully reachable) - B: incomplete components 46 -> 30 (-16, -33%) - HP1018 (Person unter absenkendem Maschinenteil eingeklemmt): weakly_reachable -> reachable Why: methods A/B/C surfaced that the Kistenhubgeraet test project generated 0 crush-under-load hazards despite OSHA 1910.212(a)(3) + EN ISO 12100 6.3.5.5 explicitly requiring them. Three orthogonal bugs (missing crush_point tag, wrong energy source mapping, missing action verbs in dictionary) silently disabled the entire lift crush pattern family.
This commit is contained in:
@@ -0,0 +1,171 @@
|
||||
package audit
|
||||
|
||||
import (
|
||||
"sort"
|
||||
|
||||
"github.com/breakpilot/ai-compliance-sdk/internal/iace"
|
||||
)
|
||||
|
||||
// runConsistencyImpl asks: does this component, with its own tags PLUS the
|
||||
// tags of its TypicalEnergySources, actually trigger at least one pattern
|
||||
// in every category listed in its TypicalHazardCategories?
|
||||
//
|
||||
// A component declares "this is what I am dangerous for" and the engine
|
||||
// turns that declaration into hazards through patterns. If no pattern can
|
||||
// fire from the component's tag set, the declaration is decorative — the
|
||||
// engine will never produce a hazard in that category for this component,
|
||||
// even though the library author said it should.
|
||||
func init() {
|
||||
runConsistencyImpl = runConsistency
|
||||
}
|
||||
|
||||
func runConsistency() ConsistencyReport {
|
||||
comps := iace.GetComponentLibrary()
|
||||
energies := iace.GetEnergySources()
|
||||
patterns := iace.AllPatterns()
|
||||
|
||||
energyByID := map[string]iace.EnergySourceEntry{}
|
||||
for _, e := range energies {
|
||||
energyByID[e.ID] = e
|
||||
}
|
||||
|
||||
report := ConsistencyReport{TotalComponents: len(comps)}
|
||||
|
||||
for _, c := range comps {
|
||||
if len(c.TypicalHazardCategories) == 0 {
|
||||
report.Consistent++
|
||||
continue
|
||||
}
|
||||
effective := buildEffectiveTags(c, energyByID)
|
||||
covered := categoriesCoveredByPatterns(effective, c.MapsToComponentType, patterns)
|
||||
|
||||
var missing []string
|
||||
for _, cat := range c.TypicalHazardCategories {
|
||||
if !covered[cat] {
|
||||
missing = append(missing, cat)
|
||||
}
|
||||
}
|
||||
if len(missing) == 0 {
|
||||
report.Consistent++
|
||||
continue
|
||||
}
|
||||
|
||||
result := ComponentResult{
|
||||
ComponentID: c.ID,
|
||||
NameDE: c.NameDE,
|
||||
DeclaredCategories: c.TypicalHazardCategories,
|
||||
}
|
||||
for cat := range covered {
|
||||
result.CoveredCategories = append(result.CoveredCategories, cat)
|
||||
}
|
||||
sort.Strings(result.CoveredCategories)
|
||||
for _, cat := range missing {
|
||||
result.MissingForCategories = append(result.MissingForCategories, CategoryGap{
|
||||
Category: cat,
|
||||
SuggestedTags: suggestTagsForCategory(cat, effective, patterns),
|
||||
})
|
||||
}
|
||||
report.Incomplete++
|
||||
report.IncompleteComponents = append(report.IncompleteComponents, result)
|
||||
}
|
||||
|
||||
sort.Slice(report.IncompleteComponents, func(i, j int) bool {
|
||||
return report.IncompleteComponents[i].ComponentID < report.IncompleteComponents[j].ComponentID
|
||||
})
|
||||
return report
|
||||
}
|
||||
|
||||
func buildEffectiveTags(c iace.ComponentLibraryEntry, energyByID map[string]iace.EnergySourceEntry) map[string]bool {
|
||||
set := map[string]bool{}
|
||||
for _, t := range c.Tags {
|
||||
set[t] = true
|
||||
}
|
||||
for _, eID := range c.TypicalEnergySources {
|
||||
e, ok := energyByID[eID]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
for _, t := range e.Tags {
|
||||
set[t] = true
|
||||
}
|
||||
}
|
||||
return set
|
||||
}
|
||||
|
||||
// categoriesCoveredByPatterns iterates patterns and finds which
|
||||
// GeneratedHazardCats can fire given the component's effective tags.
|
||||
// We ignore lifecycle, op-state, and human-role filters — those are
|
||||
// project-level. The audit asks "can the library produce ANY hazard in
|
||||
// this category for this component if the project configures everything
|
||||
// reasonably?"
|
||||
func categoriesCoveredByPatterns(tags map[string]bool, _ string, patterns []iace.HazardPattern) map[string]bool {
|
||||
covered := map[string]bool{}
|
||||
for _, p := range patterns {
|
||||
if !tagsCover(tags, p.RequiredComponentTags) {
|
||||
continue
|
||||
}
|
||||
if !tagsCover(tags, p.RequiredEnergyTags) {
|
||||
continue
|
||||
}
|
||||
for _, cat := range p.GeneratedHazardCats {
|
||||
covered[cat] = true
|
||||
}
|
||||
}
|
||||
return covered
|
||||
}
|
||||
|
||||
func tagsCover(have map[string]bool, required []string) bool {
|
||||
for _, t := range required {
|
||||
if !have[t] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// suggestTagsForCategory looks at patterns that DO generate this category
|
||||
// and identifies the tags that would close the gap. Returns the tags most
|
||||
// commonly required by patterns in that category, minus what the component
|
||||
// already has.
|
||||
func suggestTagsForCategory(cat string, have map[string]bool, patterns []iace.HazardPattern) []string {
|
||||
counts := map[string]int{}
|
||||
for _, p := range patterns {
|
||||
matchCat := false
|
||||
for _, c := range p.GeneratedHazardCats {
|
||||
if c == cat {
|
||||
matchCat = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !matchCat {
|
||||
continue
|
||||
}
|
||||
for _, t := range p.RequiredComponentTags {
|
||||
if !have[t] {
|
||||
counts[t]++
|
||||
}
|
||||
}
|
||||
for _, t := range p.RequiredEnergyTags {
|
||||
if !have[t] {
|
||||
counts[t]++
|
||||
}
|
||||
}
|
||||
}
|
||||
type kv struct {
|
||||
tag string
|
||||
n int
|
||||
}
|
||||
var sorted []kv
|
||||
for t, n := range counts {
|
||||
sorted = append(sorted, kv{t, n})
|
||||
}
|
||||
sort.Slice(sorted, func(i, j int) bool { return sorted[i].n > sorted[j].n })
|
||||
var out []string
|
||||
for i, s := range sorted {
|
||||
if i >= 6 {
|
||||
break
|
||||
}
|
||||
out = append(out, s.tag)
|
||||
}
|
||||
return out
|
||||
}
|
||||
@@ -0,0 +1,161 @@
|
||||
package audit
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// runEchoImpl checks if each meaningful phrase from the limits-form is
|
||||
// echoed by at least one generated hazard. A phrase that names a concrete
|
||||
// scenario, fault, or constraint must reappear (semantically) in some
|
||||
// hazard's name, scenario, or description. Phrases without echo are gaps:
|
||||
// the engineer documented the risk but the engine never lifted it into
|
||||
// the hazard register.
|
||||
//
|
||||
// Echo detection here is a lightweight Jaccard overlap of content tokens
|
||||
// (not embeddings) — robust enough for the demonstrative diagnostic and
|
||||
// keeps the audit fully deterministic without an external model. The
|
||||
// caller can later swap in a vector-based scorer.
|
||||
func init() {
|
||||
runEchoImpl = runEcho
|
||||
}
|
||||
|
||||
// Significant limits-form fields. Each item is (key, label). We only
|
||||
// audit the freeform fields where engineers describe risks — list/enum
|
||||
// fields (operating_modes, person_groups, industry_sectors) are out of
|
||||
// scope because they carry no narrative phrases.
|
||||
var echoFields = []struct {
|
||||
key string
|
||||
label string
|
||||
}{
|
||||
{"general_description", "Allg. Beschreibung"},
|
||||
{"intended_purpose", "Bestimmungsgemaesse Verwendung"},
|
||||
{"variants", "Varianten"},
|
||||
{"foreseeable_misuses", "Vorhersehbare Fehlanwendung"},
|
||||
{"spatial_limits", "Raeumliche Grenzen"},
|
||||
{"temporal_limits", "Zeitliche Grenzen"},
|
||||
{"operating_conditions", "Betriebsbedingungen"},
|
||||
{"energy_supply", "Energieversorgung"},
|
||||
{"mechanical_interfaces", "Mechanische Schnittstellen"},
|
||||
{"electrical_interfaces", "Elektrische Schnittstellen"},
|
||||
{"software_interfaces", "Software-Schnittstellen"},
|
||||
{"pneumatic_hydraulic_interfaces", "Pneumatik/Hydraulik"},
|
||||
{"qualification_requirements", "Personenqualifikation"},
|
||||
}
|
||||
|
||||
var sentenceSplit = regexp.MustCompile(`[.!?]\s+|\n+`)
|
||||
var wordRE = regexp.MustCompile(`[a-zäöüßA-ZÄÖÜ]{4,}`)
|
||||
|
||||
// echoThreshold — minimum Jaccard overlap (between sentence content
|
||||
// tokens and a hazard's content tokens) above which the sentence is
|
||||
// considered echoed. Tuned by hand to give meaningful results without a
|
||||
// labeled corpus; the audit reports the actual best score for each
|
||||
// orphaned phrase so a human can re-tune if needed.
|
||||
const echoThreshold = 0.18
|
||||
|
||||
func runEcho(form map[string]any, hazards []map[string]any) EchoReport {
|
||||
limits := unwrapLimits(form)
|
||||
|
||||
// Precompute hazard token bags once
|
||||
type bag struct {
|
||||
tokens map[string]bool
|
||||
text string
|
||||
}
|
||||
var hazardBags []bag
|
||||
for _, h := range hazards {
|
||||
txt := joinHazardText(h)
|
||||
toks := contentTokenSet(txt)
|
||||
hazardBags = append(hazardBags, bag{tokens: toks, text: txt})
|
||||
}
|
||||
|
||||
report := EchoReport{}
|
||||
for _, fld := range echoFields {
|
||||
raw, _ := limits[fld.key].(string)
|
||||
raw = strings.TrimSpace(raw)
|
||||
if raw == "" {
|
||||
continue
|
||||
}
|
||||
for _, sent := range sentenceSplit.Split(raw, -1) {
|
||||
sent = strings.TrimSpace(sent)
|
||||
if len(sent) < 30 {
|
||||
// Skip very short fragments
|
||||
continue
|
||||
}
|
||||
report.TotalPhrases++
|
||||
st := contentTokenSet(sent)
|
||||
if len(st) < 3 {
|
||||
continue
|
||||
}
|
||||
bestScore := 0.0
|
||||
for _, hb := range hazardBags {
|
||||
score := jaccard(st, hb.tokens)
|
||||
if score > bestScore {
|
||||
bestScore = score
|
||||
}
|
||||
}
|
||||
if bestScore >= echoThreshold {
|
||||
report.Echoed++
|
||||
continue
|
||||
}
|
||||
report.Orphaned++
|
||||
report.OrphanedPhrases = append(report.OrphanedPhrases, OrphanedPhrase{
|
||||
Field: fld.label,
|
||||
Phrase: sent,
|
||||
BestScore: bestScore,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
sort.Slice(report.OrphanedPhrases, func(i, j int) bool {
|
||||
// Lowest scores first — most clearly orphaned
|
||||
return report.OrphanedPhrases[i].BestScore < report.OrphanedPhrases[j].BestScore
|
||||
})
|
||||
return report
|
||||
}
|
||||
|
||||
func unwrapLimits(form map[string]any) map[string]any {
|
||||
if inner, ok := form["limits_form"].(map[string]any); ok {
|
||||
return inner
|
||||
}
|
||||
return form
|
||||
}
|
||||
|
||||
func joinHazardText(h map[string]any) string {
|
||||
parts := []string{}
|
||||
for _, k := range []string{"name", "description", "scenario", "trigger_event", "possible_harm", "hazardous_zone", "category", "sub_category"} {
|
||||
if v, ok := h[k].(string); ok {
|
||||
parts = append(parts, v)
|
||||
}
|
||||
}
|
||||
return strings.Join(parts, " ")
|
||||
}
|
||||
|
||||
func contentTokenSet(s string) map[string]bool {
|
||||
out := map[string]bool{}
|
||||
for _, m := range wordRE.FindAllString(s, -1) {
|
||||
w := strings.ToLower(m)
|
||||
if stopWords[w] {
|
||||
continue
|
||||
}
|
||||
out[w] = true
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func jaccard(a, b map[string]bool) float64 {
|
||||
if len(a) == 0 || len(b) == 0 {
|
||||
return 0
|
||||
}
|
||||
inter := 0
|
||||
for x := range a {
|
||||
if b[x] {
|
||||
inter++
|
||||
}
|
||||
}
|
||||
union := len(a) + len(b) - inter
|
||||
if union == 0 {
|
||||
return 0
|
||||
}
|
||||
return float64(inter) / float64(union)
|
||||
}
|
||||
@@ -0,0 +1,158 @@
|
||||
package audit
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// runHierarchyImpl checks the ISO 12100 / EN 12100 risk-reduction
|
||||
// hierarchy on the generated mitigation set: every safety-relevant
|
||||
// hazard should have at least one "inherently safe design" measure
|
||||
// (design) and additionally either a guarding/protective device
|
||||
// (protection) or an information-for-use measure (information).
|
||||
//
|
||||
// Cyber-, ergonomic-, and software-only hazards have looser
|
||||
// expectations — design alone or information alone may legitimately
|
||||
// suffice. The audit reports which level is missing, not whether the
|
||||
// remaining measures are individually correct. That is a different
|
||||
// check (E2 — semantic quality), out of scope here.
|
||||
func init() {
|
||||
runHierarchyImpl = runHierarchy
|
||||
}
|
||||
|
||||
// hazardExpectsProtection lists hazard categories where a pure
|
||||
// design+information combination is usually not enough — the engine
|
||||
// should produce at least one explicit protective measure (guard,
|
||||
// interlock, sensor, presence detector, …).
|
||||
var hazardExpectsProtection = map[string]bool{
|
||||
"mechanical_hazard": true,
|
||||
"electrical_hazard": true,
|
||||
"thermal_hazard": true,
|
||||
"pneumatic_hydraulic": true,
|
||||
"radiation_hazard": true,
|
||||
"laser_hazard": true,
|
||||
"fire_explosion_hazard": true,
|
||||
"chemical_hazard": true,
|
||||
}
|
||||
|
||||
func runHierarchy(hazards, mitigations []map[string]any) HierarchyReport {
|
||||
report := HierarchyReport{TotalHazards: len(hazards)}
|
||||
|
||||
// Index mitigations by hazard_id
|
||||
byHazard := map[string][]map[string]any{}
|
||||
for _, m := range mitigations {
|
||||
hid, _ := m["hazard_id"].(string)
|
||||
if hid == "" {
|
||||
continue
|
||||
}
|
||||
byHazard[hid] = append(byHazard[hid], m)
|
||||
}
|
||||
|
||||
for _, h := range hazards {
|
||||
hid, _ := h["id"].(string)
|
||||
category, _ := h["category"].(string)
|
||||
name, _ := h["name"].(string)
|
||||
|
||||
levels := levelsForHazard(byHazard[hid])
|
||||
missing := expectedMissing(category, levels)
|
||||
|
||||
if len(missing) == 0 {
|
||||
report.Complete++
|
||||
continue
|
||||
}
|
||||
for _, m := range missing {
|
||||
switch m {
|
||||
case "design":
|
||||
report.MissingDesign++
|
||||
case "protection":
|
||||
report.MissingProtection++
|
||||
case "information":
|
||||
report.MissingInfo++
|
||||
}
|
||||
}
|
||||
report.IncompleteHazards = append(report.IncompleteHazards, HazardHierarchyResult{
|
||||
HazardID: hid,
|
||||
Name: name,
|
||||
Category: category,
|
||||
Levels: levels,
|
||||
MissingLevels: missing,
|
||||
})
|
||||
}
|
||||
|
||||
// Sort: protection-missing first (most consequential), then by category
|
||||
sort.Slice(report.IncompleteHazards, func(i, j int) bool {
|
||||
a := report.IncompleteHazards[i]
|
||||
b := report.IncompleteHazards[j]
|
||||
ap := contains(a.MissingLevels, "protection")
|
||||
bp := contains(b.MissingLevels, "protection")
|
||||
if ap != bp {
|
||||
return ap
|
||||
}
|
||||
return a.Category < b.Category
|
||||
})
|
||||
return report
|
||||
}
|
||||
|
||||
// levelsForHazard returns the distinct reduction-type levels present
|
||||
// for a hazard's mitigation set. Possible values: design, protection,
|
||||
// information.
|
||||
func levelsForHazard(mits []map[string]any) []string {
|
||||
seen := map[string]bool{}
|
||||
for _, m := range mits {
|
||||
rt, _ := m["reduction_type"].(string)
|
||||
switch strings.ToLower(rt) {
|
||||
case "design":
|
||||
seen["design"] = true
|
||||
case "protection", "protective":
|
||||
seen["protection"] = true
|
||||
case "information":
|
||||
seen["information"] = true
|
||||
}
|
||||
}
|
||||
var out []string
|
||||
for k := range seen {
|
||||
out = append(out, k)
|
||||
}
|
||||
sort.Strings(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// expectedMissing returns the levels that the hierarchy demands but
|
||||
// the mitigation set does not provide.
|
||||
//
|
||||
// Rule:
|
||||
// - Every hazard with mitigations should have a design measure.
|
||||
// - Categories in hazardExpectsProtection additionally need a
|
||||
// protection measure.
|
||||
// - All hazards should have an information measure unless they
|
||||
// already have both design + protection (the information layer
|
||||
// can then be considered subsumed for the audit's purpose; the
|
||||
// real engine usually still adds it).
|
||||
func expectedMissing(category string, present []string) []string {
|
||||
have := toBoolSet(present)
|
||||
var missing []string
|
||||
if !have["design"] {
|
||||
missing = append(missing, "design")
|
||||
}
|
||||
if hazardExpectsProtection[category] && !have["protection"] {
|
||||
missing = append(missing, "protection")
|
||||
}
|
||||
// Information is only flagged if both design and protection are
|
||||
// also absent — otherwise too noisy. We still surface the case
|
||||
// where information is the SOLE present level: that means the
|
||||
// hazard is mitigated only by warning labels, which is rarely
|
||||
// adequate.
|
||||
if !have["information"] && !have["design"] && !have["protection"] {
|
||||
missing = append(missing, "information")
|
||||
}
|
||||
return missing
|
||||
}
|
||||
|
||||
func contains(list []string, target string) bool {
|
||||
for _, x := range list {
|
||||
if x == target {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
package audit
|
||||
|
||||
// Implementation entry points for Methods B-E. The full algorithms live
|
||||
// in consistency.go, vocabulary.go, echo.go, hierarchy.go respectively.
|
||||
// Until those files land, these wrappers keep main.go compilable and
|
||||
// return a clearly-marked empty report.
|
||||
|
||||
func RunConsistency() ConsistencyReport {
|
||||
return runConsistencyImpl()
|
||||
}
|
||||
|
||||
func RunVocabulary(form map[string]any) VocabularyReport {
|
||||
return runVocabularyImpl(form)
|
||||
}
|
||||
|
||||
func RunEcho(form map[string]any, hazards []map[string]any) EchoReport {
|
||||
return runEchoImpl(form, hazards)
|
||||
}
|
||||
|
||||
func RunHierarchy(hazards, mitigations []map[string]any) HierarchyReport {
|
||||
return runHierarchyImpl(hazards, mitigations)
|
||||
}
|
||||
|
||||
// Default implementations — replaced when each method file lands.
|
||||
// Keeping them as separate functions in one place avoids name clashes
|
||||
// once consistency.go etc. add their real implementations.
|
||||
|
||||
var (
|
||||
runConsistencyImpl = func() ConsistencyReport { return ConsistencyReport{} }
|
||||
runVocabularyImpl = func(form map[string]any) VocabularyReport { return VocabularyReport{} }
|
||||
runEchoImpl = func(form map[string]any, hazards []map[string]any) EchoReport {
|
||||
return EchoReport{}
|
||||
}
|
||||
runHierarchyImpl = func(hazards, mitigations []map[string]any) HierarchyReport {
|
||||
return HierarchyReport{}
|
||||
}
|
||||
)
|
||||
@@ -0,0 +1,298 @@
|
||||
// Package audit provides static and runtime audits of the IACE pattern
|
||||
// engine — finding pattern reachability, library consistency, and
|
||||
// limits-form coverage gaps without a ground-truth reference.
|
||||
package audit
|
||||
|
||||
import (
|
||||
"sort"
|
||||
|
||||
"github.com/breakpilot/ai-compliance-sdk/internal/iace"
|
||||
)
|
||||
|
||||
// ReachabilityResult is the verdict for a single pattern in Method A.
|
||||
type ReachabilityResult struct {
|
||||
PatternID string `json:"pattern_id"`
|
||||
Name string `json:"name_de"`
|
||||
Priority int `json:"priority"`
|
||||
RequiredAllTags []string `json:"required_tags"`
|
||||
UnreachableTags []string `json:"unreachable_tags,omitempty"`
|
||||
Status string `json:"status"` // "reachable" | "weakly_reachable" | "unreachable"
|
||||
ReachableSources []string `json:"reachable_sources,omitempty"`
|
||||
FixSuggestions []string `json:"fix_suggestions,omitempty"`
|
||||
}
|
||||
|
||||
// ReachabilityReport is the full Method A output.
|
||||
type ReachabilityReport struct {
|
||||
TotalPatterns int `json:"total_patterns"`
|
||||
Reachable int `json:"reachable"`
|
||||
WeaklyReachable int `json:"weakly_reachable"`
|
||||
Unreachable int `json:"unreachable"`
|
||||
UniverseTags []string `json:"universe_tags"`
|
||||
UnreachablePatterns []ReachabilityResult `json:"unreachable_patterns"`
|
||||
WeakPatterns []ReachabilityResult `json:"weak_patterns"`
|
||||
}
|
||||
|
||||
// RunReachability evaluates every pattern against the achievable tag universe.
|
||||
//
|
||||
// A pattern is:
|
||||
// - "unreachable" if at least one required tag is not produced by any
|
||||
// component, energy source, or keyword-dictionary entry.
|
||||
// - "weakly_reachable" if all required tags exist in the universe but
|
||||
// no single source (one Component or one EnergySource or one Keyword
|
||||
// entry) supplies all of them at once — i.e., it relies on multiple
|
||||
// parser hits to combine.
|
||||
// - "reachable" if some single source covers all required tags.
|
||||
//
|
||||
// The classification ignores ExcludedComponentTags and runtime filters
|
||||
// (lifecycle/op-state/machine-type), because those are project-level
|
||||
// concerns. The audit answers "could this pattern EVER fire", not
|
||||
// "does it fire for project X".
|
||||
func RunReachability() ReachabilityReport {
|
||||
patterns := iace.AllPatterns()
|
||||
comps := iace.GetComponentLibrary()
|
||||
energies := iace.GetEnergySources()
|
||||
keywords := iace.GetKeywordDictionary()
|
||||
|
||||
// Tag universe: union of every tag emitted anywhere
|
||||
universe := map[string][]string{} // tag → list of source IDs that emit it
|
||||
for _, c := range comps {
|
||||
for _, t := range c.Tags {
|
||||
universe[t] = appendUnique(universe[t], "component:"+c.ID)
|
||||
}
|
||||
}
|
||||
for _, e := range energies {
|
||||
for _, t := range e.Tags {
|
||||
universe[t] = appendUnique(universe[t], "energy:"+e.ID)
|
||||
}
|
||||
}
|
||||
for i, kw := range keywords {
|
||||
for _, t := range kw.ExtraTags {
|
||||
universe[t] = appendUnique(universe[t], keywordLabel(kw, i))
|
||||
}
|
||||
// Keyword entries can also reference components/energies, which
|
||||
// transitively add their tags to the keyword's effective tag set.
|
||||
for _, cID := range kw.ComponentIDs {
|
||||
for _, c := range comps {
|
||||
if c.ID != cID {
|
||||
continue
|
||||
}
|
||||
for _, t := range c.Tags {
|
||||
universe[t] = appendUnique(universe[t], keywordLabel(kw, i))
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, eID := range kw.EnergyIDs {
|
||||
for _, e := range energies {
|
||||
if e.ID != eID {
|
||||
continue
|
||||
}
|
||||
for _, t := range e.Tags {
|
||||
universe[t] = appendUnique(universe[t], keywordLabel(kw, i))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Single-source coverage map: tag → covering sources, but also
|
||||
// per-source tag set so we can check "is there ONE source covering
|
||||
// all required tags".
|
||||
sourceTags := map[string]map[string]bool{}
|
||||
for _, c := range comps {
|
||||
key := "component:" + c.ID
|
||||
sourceTags[key] = toBoolSet(c.Tags)
|
||||
}
|
||||
for _, e := range energies {
|
||||
key := "energy:" + e.ID
|
||||
sourceTags[key] = toBoolSet(e.Tags)
|
||||
}
|
||||
for i, kw := range keywords {
|
||||
key := keywordLabel(kw, i)
|
||||
set := toBoolSet(kw.ExtraTags)
|
||||
for _, cID := range kw.ComponentIDs {
|
||||
for _, c := range comps {
|
||||
if c.ID == cID {
|
||||
for _, t := range c.Tags {
|
||||
set[t] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, eID := range kw.EnergyIDs {
|
||||
for _, e := range energies {
|
||||
if e.ID == eID {
|
||||
for _, t := range e.Tags {
|
||||
set[t] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
sourceTags[key] = set
|
||||
}
|
||||
|
||||
report := ReachabilityReport{TotalPatterns: len(patterns)}
|
||||
|
||||
// Universe tag list (sorted) for the report header
|
||||
for t := range universe {
|
||||
report.UniverseTags = append(report.UniverseTags, t)
|
||||
}
|
||||
sort.Strings(report.UniverseTags)
|
||||
|
||||
for _, p := range patterns {
|
||||
all := dedup(append(append([]string{}, p.RequiredComponentTags...), p.RequiredEnergyTags...))
|
||||
if len(all) == 0 {
|
||||
// Pattern with no tag requirements relies on lifecycle/machine_type
|
||||
// filters only — count as reachable by default.
|
||||
report.Reachable++
|
||||
continue
|
||||
}
|
||||
|
||||
var missing []string
|
||||
for _, t := range all {
|
||||
if _, ok := universe[t]; !ok {
|
||||
missing = append(missing, t)
|
||||
}
|
||||
}
|
||||
|
||||
res := ReachabilityResult{
|
||||
PatternID: p.ID,
|
||||
Name: p.NameDE,
|
||||
Priority: p.Priority,
|
||||
RequiredAllTags: all,
|
||||
}
|
||||
|
||||
if len(missing) > 0 {
|
||||
res.Status = "unreachable"
|
||||
res.UnreachableTags = missing
|
||||
res.FixSuggestions = suggestFixes(p, missing, comps, sourceTags)
|
||||
report.Unreachable++
|
||||
report.UnreachablePatterns = append(report.UnreachablePatterns, res)
|
||||
continue
|
||||
}
|
||||
|
||||
// All tags in universe — check single-source coverage
|
||||
single := findSingleSourceCovers(all, sourceTags)
|
||||
if len(single) > 0 {
|
||||
res.Status = "reachable"
|
||||
res.ReachableSources = single
|
||||
report.Reachable++
|
||||
continue
|
||||
}
|
||||
|
||||
res.Status = "weakly_reachable"
|
||||
res.FixSuggestions = suggestSingleSourceFixes(p, all, comps, sourceTags)
|
||||
report.WeaklyReachable++
|
||||
report.WeakPatterns = append(report.WeakPatterns, res)
|
||||
}
|
||||
|
||||
sort.Slice(report.UnreachablePatterns, func(i, j int) bool {
|
||||
return report.UnreachablePatterns[i].Priority > report.UnreachablePatterns[j].Priority
|
||||
})
|
||||
sort.Slice(report.WeakPatterns, func(i, j int) bool {
|
||||
return report.WeakPatterns[i].Priority > report.WeakPatterns[j].Priority
|
||||
})
|
||||
return report
|
||||
}
|
||||
|
||||
func findSingleSourceCovers(required []string, sourceTags map[string]map[string]bool) []string {
|
||||
var hits []string
|
||||
for src, tags := range sourceTags {
|
||||
ok := true
|
||||
for _, t := range required {
|
||||
if !tags[t] {
|
||||
ok = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if ok {
|
||||
hits = append(hits, src)
|
||||
}
|
||||
}
|
||||
sort.Strings(hits)
|
||||
return hits
|
||||
}
|
||||
|
||||
// suggestFixes proposes concrete library edits for unreachable patterns:
|
||||
// "Add tag X to Component C014 (Hubwerk)" type suggestions.
|
||||
func suggestFixes(p iace.HazardPattern, missing []string, comps []iace.ComponentLibraryEntry, sourceTags map[string]map[string]bool) []string {
|
||||
var out []string
|
||||
// For each missing tag, find candidates: components/energies that
|
||||
// would semantically own that tag based on existing tags overlap.
|
||||
for _, tag := range missing {
|
||||
candidates := nearComponents(p, tag, comps, sourceTags)
|
||||
if len(candidates) > 0 {
|
||||
out = append(out, "Add tag '"+tag+"' to one of: "+joinFirst(candidates, 3))
|
||||
} else {
|
||||
out = append(out, "Tag '"+tag+"' is undefined anywhere — needs a new component or energy source carrying it")
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func suggestSingleSourceFixes(p iace.HazardPattern, all []string, comps []iace.ComponentLibraryEntry, sourceTags map[string]map[string]bool) []string {
|
||||
// Find components that match the most required tags, then suggest
|
||||
// adding the residual ones.
|
||||
best := ""
|
||||
bestCover := 0
|
||||
var bestMissing []string
|
||||
for src, tags := range sourceTags {
|
||||
hit := 0
|
||||
var miss []string
|
||||
for _, t := range all {
|
||||
if tags[t] {
|
||||
hit++
|
||||
} else {
|
||||
miss = append(miss, t)
|
||||
}
|
||||
}
|
||||
if hit > bestCover {
|
||||
best, bestCover, bestMissing = src, hit, miss
|
||||
}
|
||||
}
|
||||
if best == "" || bestCover == 0 {
|
||||
return []string{"No single source covers any required tags — pattern needs a new dedicated component"}
|
||||
}
|
||||
if len(bestMissing) == 0 {
|
||||
return nil
|
||||
}
|
||||
return []string{"Closest single source '" + best + "' covers " + itoa(bestCover) + "/" + itoa(len(all)) + " tags. Add missing tags to it: " + joinFirst(bestMissing, 5)}
|
||||
}
|
||||
|
||||
// nearComponents finds components whose tags overlap most with the pattern's
|
||||
// requirements — these are good candidates to receive the missing tag.
|
||||
func nearComponents(p iace.HazardPattern, missing string, comps []iace.ComponentLibraryEntry, sourceTags map[string]map[string]bool) []string {
|
||||
required := dedup(append(append([]string{}, p.RequiredComponentTags...), p.RequiredEnergyTags...))
|
||||
required = removeOne(required, missing)
|
||||
if len(required) == 0 {
|
||||
return nil
|
||||
}
|
||||
type scored struct {
|
||||
id string
|
||||
score int
|
||||
}
|
||||
var scoredList []scored
|
||||
for _, c := range comps {
|
||||
tagSet := toBoolSet(c.Tags)
|
||||
s := 0
|
||||
for _, t := range required {
|
||||
if tagSet[t] {
|
||||
s++
|
||||
}
|
||||
}
|
||||
if s > 0 {
|
||||
scoredList = append(scoredList, scored{id: c.ID + " (" + c.NameDE + ")", score: s})
|
||||
}
|
||||
}
|
||||
sort.Slice(scoredList, func(i, j int) bool { return scoredList[i].score > scoredList[j].score })
|
||||
var out []string
|
||||
for _, s := range scoredList {
|
||||
out = append(out, s.id)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func keywordLabel(kw iace.KeywordEntry, idx int) string {
|
||||
if len(kw.Keywords) > 0 {
|
||||
return "keyword:" + kw.Keywords[0]
|
||||
}
|
||||
return "keyword:" + itoa(idx)
|
||||
}
|
||||
@@ -0,0 +1,84 @@
|
||||
package audit
|
||||
|
||||
// Stubs for Methods B-E. Each is filled in its own file as the audit
|
||||
// suite grows. Keeping the type contracts here lets the CLI compile
|
||||
// before each method has its full implementation.
|
||||
|
||||
// ============================================================================
|
||||
// Method B — Component Self-Consistency
|
||||
// ============================================================================
|
||||
|
||||
type CategoryGap struct {
|
||||
Category string `json:"category"`
|
||||
SuggestedTags []string `json:"suggested_tags"`
|
||||
}
|
||||
|
||||
type ComponentResult struct {
|
||||
ComponentID string `json:"component_id"`
|
||||
NameDE string `json:"name_de"`
|
||||
DeclaredCategories []string `json:"declared_categories"`
|
||||
CoveredCategories []string `json:"covered_categories"`
|
||||
MissingForCategories []CategoryGap `json:"missing_for_categories,omitempty"`
|
||||
}
|
||||
|
||||
type ConsistencyReport struct {
|
||||
TotalComponents int `json:"total_components"`
|
||||
Consistent int `json:"consistent"`
|
||||
Incomplete int `json:"incomplete"`
|
||||
IncompleteComponents []ComponentResult `json:"incomplete_components"`
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Method C — Limits-Form Vocabulary Diff
|
||||
// ============================================================================
|
||||
|
||||
type DictionarySuggestion struct {
|
||||
Token string `json:"token"`
|
||||
Field string `json:"field"`
|
||||
PatternIDs []string `json:"pattern_ids"`
|
||||
}
|
||||
|
||||
type VocabularyReport struct {
|
||||
UniqueTokens int `json:"unique_tokens"`
|
||||
KnownTokens []string `json:"known_tokens"`
|
||||
UnknownTokens []string `json:"unknown_tokens"`
|
||||
SuggestedDictionaryEntries []DictionarySuggestion `json:"suggested_dictionary_entries"`
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Method D — Limits-Form Echo
|
||||
// ============================================================================
|
||||
|
||||
type OrphanedPhrase struct {
|
||||
Field string `json:"field"`
|
||||
Phrase string `json:"phrase"`
|
||||
BestScore float64 `json:"best_score"`
|
||||
}
|
||||
|
||||
type EchoReport struct {
|
||||
TotalPhrases int `json:"total_phrases"`
|
||||
Echoed int `json:"echoed"`
|
||||
Orphaned int `json:"orphaned"`
|
||||
OrphanedPhrases []OrphanedPhrase `json:"orphaned_phrases"`
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Method E — Hierarchy Completeness
|
||||
// ============================================================================
|
||||
|
||||
type HazardHierarchyResult struct {
|
||||
HazardID string `json:"hazard_id"`
|
||||
Name string `json:"name"`
|
||||
Category string `json:"category"`
|
||||
Levels []string `json:"present_levels"`
|
||||
MissingLevels []string `json:"missing_levels"`
|
||||
}
|
||||
|
||||
type HierarchyReport struct {
|
||||
TotalHazards int `json:"total_hazards"`
|
||||
Complete int `json:"complete"`
|
||||
MissingDesign int `json:"missing_design"`
|
||||
MissingProtection int `json:"missing_protection"`
|
||||
MissingInfo int `json:"missing_information"`
|
||||
IncompleteHazards []HazardHierarchyResult `json:"incomplete_hazards"`
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
package audit
|
||||
|
||||
import "strconv"
|
||||
|
||||
func appendUnique(list []string, item string) []string {
|
||||
for _, x := range list {
|
||||
if x == item {
|
||||
return list
|
||||
}
|
||||
}
|
||||
return append(list, item)
|
||||
}
|
||||
|
||||
func toBoolSet(list []string) map[string]bool {
|
||||
s := make(map[string]bool, len(list))
|
||||
for _, x := range list {
|
||||
s[x] = true
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func dedup(list []string) []string {
|
||||
seen := map[string]bool{}
|
||||
var out []string
|
||||
for _, x := range list {
|
||||
if !seen[x] {
|
||||
seen[x] = true
|
||||
out = append(out, x)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func removeOne(list []string, item string) []string {
|
||||
out := make([]string, 0, len(list))
|
||||
for _, x := range list {
|
||||
if x != item {
|
||||
out = append(out, x)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func joinFirst(list []string, n int) string {
|
||||
if len(list) <= n {
|
||||
return joinAll(list)
|
||||
}
|
||||
return joinAll(list[:n]) + ", ..."
|
||||
}
|
||||
|
||||
func joinAll(list []string) string {
|
||||
s := ""
|
||||
for i, x := range list {
|
||||
if i > 0 {
|
||||
s += ", "
|
||||
}
|
||||
s += x
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func itoa(n int) string { return strconv.Itoa(n) }
|
||||
@@ -0,0 +1,153 @@
|
||||
package audit
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/breakpilot/ai-compliance-sdk/internal/iace"
|
||||
)
|
||||
|
||||
// runVocabularyImpl takes a limits-form payload (the structured machine
|
||||
// description filled in by the engineer) and asks: which of its words
|
||||
// are unknown to the keyword dictionary yet appear in any pattern's
|
||||
// scenario/trigger/harm/zone text? Each such word is a dictionary gap —
|
||||
// the engineer typed a term that some pattern is waiting for, but the
|
||||
// parser cannot translate it into a tag.
|
||||
func init() {
|
||||
runVocabularyImpl = runVocabulary
|
||||
}
|
||||
|
||||
var tokenRE = regexp.MustCompile(`[a-zäöüßA-ZÄÖÜ]{4,}`)
|
||||
|
||||
// German + English stop words that show up in any narrative but carry
|
||||
// no engineering meaning. Kept short on purpose — we only want to drop
|
||||
// obvious filler.
|
||||
var stopWords = map[string]bool{
|
||||
"oder": true, "und": true, "auch": true, "wenn": true, "wird": true,
|
||||
"werden": true, "kann": true, "koennen": true, "soll": true, "muss": true,
|
||||
"sind": true, "eine": true, "einer": true, "einem": true, "einen": true,
|
||||
"diese": true, "dieser": true, "dieses": true, "diesem": true, "diesen": true,
|
||||
"durch": true, "nach": true, "ueber": true, "unter": true, "zwischen": true,
|
||||
"nicht": true, "ohne": true, "fuer": true, "bzw": true, "etc": true,
|
||||
"sowie": true, "siehe": true, "etwa": true, "ggf": true, "the": true,
|
||||
"with": true, "from": true, "this": true, "that": true, "have": true,
|
||||
"insbesondere": true, "ausschliesslich": true, "ebenfalls": true,
|
||||
"jeweils": true, "weitere": true, "weiteren": true, "weiterer": true,
|
||||
}
|
||||
|
||||
func runVocabulary(form map[string]any) VocabularyReport {
|
||||
limits, ok := form["limits_form"].(map[string]any)
|
||||
if !ok {
|
||||
// Form may already be the inner object
|
||||
limits = form
|
||||
}
|
||||
|
||||
tokens := map[string]bool{}
|
||||
for _, v := range limits {
|
||||
extractTokens(v, tokens)
|
||||
}
|
||||
report := VocabularyReport{UniqueTokens: len(tokens)}
|
||||
|
||||
dictTokens := dictionaryVocabulary()
|
||||
|
||||
for tok := range tokens {
|
||||
if stopWords[tok] {
|
||||
continue
|
||||
}
|
||||
if dictTokenHit(tok, dictTokens) {
|
||||
report.KnownTokens = append(report.KnownTokens, tok)
|
||||
} else {
|
||||
report.UnknownTokens = append(report.UnknownTokens, tok)
|
||||
}
|
||||
}
|
||||
sort.Strings(report.KnownTokens)
|
||||
sort.Strings(report.UnknownTokens)
|
||||
|
||||
// For each unknown token check if any pattern names it
|
||||
patterns := iace.AllPatterns()
|
||||
for _, tok := range report.UnknownTokens {
|
||||
hits := patternsMentioning(tok, patterns)
|
||||
if len(hits) == 0 {
|
||||
continue
|
||||
}
|
||||
report.SuggestedDictionaryEntries = append(report.SuggestedDictionaryEntries, DictionarySuggestion{
|
||||
Token: tok,
|
||||
PatternIDs: hits,
|
||||
})
|
||||
}
|
||||
sort.Slice(report.SuggestedDictionaryEntries, func(i, j int) bool {
|
||||
return len(report.SuggestedDictionaryEntries[i].PatternIDs) > len(report.SuggestedDictionaryEntries[j].PatternIDs)
|
||||
})
|
||||
return report
|
||||
}
|
||||
|
||||
func extractTokens(v any, out map[string]bool) {
|
||||
switch x := v.(type) {
|
||||
case string:
|
||||
for _, m := range tokenRE.FindAllString(x, -1) {
|
||||
out[strings.ToLower(m)] = true
|
||||
}
|
||||
case []any:
|
||||
for _, e := range x {
|
||||
extractTokens(e, out)
|
||||
}
|
||||
case map[string]any:
|
||||
for _, e := range x {
|
||||
extractTokens(e, out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// dictionaryVocabulary builds the lowercase set of all keyword strings
|
||||
// that the parser will recognize, including normalized forms (umlauts
|
||||
// replaced like in the keyword dictionary).
|
||||
func dictionaryVocabulary() map[string]bool {
|
||||
out := map[string]bool{}
|
||||
for _, kw := range iace.GetKeywordDictionary() {
|
||||
for _, k := range kw.Keywords {
|
||||
out[strings.ToLower(k)] = true
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// dictTokenHit returns true if the token would be matched by any
|
||||
// dictionary entry. Dictionary entries can be substrings, so we treat
|
||||
// the dict as a set of stem-like matchers: a token is "known" if it
|
||||
// equals a dict word OR contains a dict word as substring OR the dict
|
||||
// word contains the token.
|
||||
func dictTokenHit(tok string, dict map[string]bool) bool {
|
||||
if dict[tok] {
|
||||
return true
|
||||
}
|
||||
for d := range dict {
|
||||
if strings.Contains(tok, d) || strings.Contains(d, tok) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// patternsMentioning returns up to 8 pattern IDs whose scenario/trigger/
|
||||
// harm/zone text contains the token (case-insensitive substring).
|
||||
func patternsMentioning(tok string, patterns []iace.HazardPattern) []string {
|
||||
tokLower := strings.ToLower(tok)
|
||||
seen := map[string]bool{}
|
||||
var out []string
|
||||
for _, p := range patterns {
|
||||
hay := strings.ToLower(p.ScenarioDE + " " + p.TriggerDE + " " + p.HarmDE + " " + p.ZoneDE + " " + p.NameDE)
|
||||
if !strings.Contains(hay, tokLower) {
|
||||
continue
|
||||
}
|
||||
if seen[p.ID] {
|
||||
continue
|
||||
}
|
||||
seen[p.ID] = true
|
||||
out = append(out, p.ID)
|
||||
if len(out) >= 8 {
|
||||
break
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
Reference in New Issue
Block a user