feat(iace): pattern audit suite + library hygiene wave

Add cmd/iace-audit CLI with 5 deterministic methods that find engine
gaps without ground truth:

- A reachability: 1058 patterns vs achievable tag universe
- B consistency: components vs their declared hazard categories
- C vocabulary: limits-form tokens vs keyword dictionary
- D echo: limits-form sentences vs generated hazards (jaccard)
- E hierarchy: hazards vs ISO 12100 design/protection/info levels

Library fixes triggered by A+B+C findings:

- tag_resolver: synonym map for electrical/pneumatic/hydraulic aliases
- component_library: crush_point + EN03 (gravitational) on C014/C128
  (Hubwerk family) - fixes HP1014/1015/1017/1018 which were silently
  weakly_reachable. noise_source added on 7 components (C006/C011/
  C017/C020/C031/C041/C096). electrical_part on 8 drive components
  (C031/C032/C033/C034/C035/C036/C037/C038/C077/C092). cyber tag
  on 10 sensors (C081-C090) + 3 IT components (C111/C112/C116) +
  KI module C119 (ai_model added). pneumatic_part+hydraulic_part
  on valves C091/C093, hydraulic_part+chemical_risk on pump C097,
  moving_part on motion controller C075
- keyword_dictionary: EN03 added to aufzug/lift/hubwerk/hubgeraet
  (was wrongly EN04-only). New keyword entries for hub-action verbs:
  absenken/senken/anheben/heben + hubhoehe/hubweg/hubgeschwindig

Audit impact:
- A: weakly_reachable 409 -> 358 (-51 patterns now fully reachable)
- B: incomplete components 46 -> 30 (-16, -33%)
- HP1018 (Person unter absenkendem Maschinenteil eingeklemmt):
  weakly_reachable -> reachable

Why: methods A/B/C surfaced that the Kistenhubgeraet test project
generated 0 crush-under-load hazards despite OSHA 1910.212(a)(3) +
EN ISO 12100 6.3.5.5 explicitly requiring them. Three orthogonal
bugs (missing crush_point tag, wrong energy source mapping, missing
action verbs in dictionary) silently disabled the entire lift crush
pattern family.
This commit is contained in:
Benjamin Admin
2026-05-21 10:51:08 +02:00
parent 4946571863
commit f534b52817
12 changed files with 1442 additions and 38 deletions
@@ -0,0 +1,171 @@
package audit
import (
"sort"
"github.com/breakpilot/ai-compliance-sdk/internal/iace"
)
// runConsistencyImpl asks: does this component, with its own tags PLUS the
// tags of its TypicalEnergySources, actually trigger at least one pattern
// in every category listed in its TypicalHazardCategories?
//
// A component declares "this is what I am dangerous for" and the engine
// turns that declaration into hazards through patterns. If no pattern can
// fire from the component's tag set, the declaration is decorative — the
// engine will never produce a hazard in that category for this component,
// even though the library author said it should.
func init() {
runConsistencyImpl = runConsistency
}
func runConsistency() ConsistencyReport {
comps := iace.GetComponentLibrary()
energies := iace.GetEnergySources()
patterns := iace.AllPatterns()
energyByID := map[string]iace.EnergySourceEntry{}
for _, e := range energies {
energyByID[e.ID] = e
}
report := ConsistencyReport{TotalComponents: len(comps)}
for _, c := range comps {
if len(c.TypicalHazardCategories) == 0 {
report.Consistent++
continue
}
effective := buildEffectiveTags(c, energyByID)
covered := categoriesCoveredByPatterns(effective, c.MapsToComponentType, patterns)
var missing []string
for _, cat := range c.TypicalHazardCategories {
if !covered[cat] {
missing = append(missing, cat)
}
}
if len(missing) == 0 {
report.Consistent++
continue
}
result := ComponentResult{
ComponentID: c.ID,
NameDE: c.NameDE,
DeclaredCategories: c.TypicalHazardCategories,
}
for cat := range covered {
result.CoveredCategories = append(result.CoveredCategories, cat)
}
sort.Strings(result.CoveredCategories)
for _, cat := range missing {
result.MissingForCategories = append(result.MissingForCategories, CategoryGap{
Category: cat,
SuggestedTags: suggestTagsForCategory(cat, effective, patterns),
})
}
report.Incomplete++
report.IncompleteComponents = append(report.IncompleteComponents, result)
}
sort.Slice(report.IncompleteComponents, func(i, j int) bool {
return report.IncompleteComponents[i].ComponentID < report.IncompleteComponents[j].ComponentID
})
return report
}
func buildEffectiveTags(c iace.ComponentLibraryEntry, energyByID map[string]iace.EnergySourceEntry) map[string]bool {
set := map[string]bool{}
for _, t := range c.Tags {
set[t] = true
}
for _, eID := range c.TypicalEnergySources {
e, ok := energyByID[eID]
if !ok {
continue
}
for _, t := range e.Tags {
set[t] = true
}
}
return set
}
// categoriesCoveredByPatterns iterates patterns and finds which
// GeneratedHazardCats can fire given the component's effective tags.
// We ignore lifecycle, op-state, and human-role filters — those are
// project-level. The audit asks "can the library produce ANY hazard in
// this category for this component if the project configures everything
// reasonably?"
func categoriesCoveredByPatterns(tags map[string]bool, _ string, patterns []iace.HazardPattern) map[string]bool {
covered := map[string]bool{}
for _, p := range patterns {
if !tagsCover(tags, p.RequiredComponentTags) {
continue
}
if !tagsCover(tags, p.RequiredEnergyTags) {
continue
}
for _, cat := range p.GeneratedHazardCats {
covered[cat] = true
}
}
return covered
}
func tagsCover(have map[string]bool, required []string) bool {
for _, t := range required {
if !have[t] {
return false
}
}
return true
}
// suggestTagsForCategory looks at patterns that DO generate this category
// and identifies the tags that would close the gap. Returns the tags most
// commonly required by patterns in that category, minus what the component
// already has.
func suggestTagsForCategory(cat string, have map[string]bool, patterns []iace.HazardPattern) []string {
counts := map[string]int{}
for _, p := range patterns {
matchCat := false
for _, c := range p.GeneratedHazardCats {
if c == cat {
matchCat = true
break
}
}
if !matchCat {
continue
}
for _, t := range p.RequiredComponentTags {
if !have[t] {
counts[t]++
}
}
for _, t := range p.RequiredEnergyTags {
if !have[t] {
counts[t]++
}
}
}
type kv struct {
tag string
n int
}
var sorted []kv
for t, n := range counts {
sorted = append(sorted, kv{t, n})
}
sort.Slice(sorted, func(i, j int) bool { return sorted[i].n > sorted[j].n })
var out []string
for i, s := range sorted {
if i >= 6 {
break
}
out = append(out, s.tag)
}
return out
}
@@ -0,0 +1,161 @@
package audit
import (
"regexp"
"sort"
"strings"
)
// runEchoImpl checks if each meaningful phrase from the limits-form is
// echoed by at least one generated hazard. A phrase that names a concrete
// scenario, fault, or constraint must reappear (semantically) in some
// hazard's name, scenario, or description. Phrases without echo are gaps:
// the engineer documented the risk but the engine never lifted it into
// the hazard register.
//
// Echo detection here is a lightweight Jaccard overlap of content tokens
// (not embeddings) — robust enough for the demonstrative diagnostic and
// keeps the audit fully deterministic without an external model. The
// caller can later swap in a vector-based scorer.
func init() {
runEchoImpl = runEcho
}
// Significant limits-form fields. Each item is (key, label). We only
// audit the freeform fields where engineers describe risks — list/enum
// fields (operating_modes, person_groups, industry_sectors) are out of
// scope because they carry no narrative phrases.
var echoFields = []struct {
key string
label string
}{
{"general_description", "Allg. Beschreibung"},
{"intended_purpose", "Bestimmungsgemaesse Verwendung"},
{"variants", "Varianten"},
{"foreseeable_misuses", "Vorhersehbare Fehlanwendung"},
{"spatial_limits", "Raeumliche Grenzen"},
{"temporal_limits", "Zeitliche Grenzen"},
{"operating_conditions", "Betriebsbedingungen"},
{"energy_supply", "Energieversorgung"},
{"mechanical_interfaces", "Mechanische Schnittstellen"},
{"electrical_interfaces", "Elektrische Schnittstellen"},
{"software_interfaces", "Software-Schnittstellen"},
{"pneumatic_hydraulic_interfaces", "Pneumatik/Hydraulik"},
{"qualification_requirements", "Personenqualifikation"},
}
var sentenceSplit = regexp.MustCompile(`[.!?]\s+|\n+`)
var wordRE = regexp.MustCompile(`[a-zäöüßA-ZÄÖÜ]{4,}`)
// echoThreshold — minimum Jaccard overlap (between sentence content
// tokens and a hazard's content tokens) above which the sentence is
// considered echoed. Tuned by hand to give meaningful results without a
// labeled corpus; the audit reports the actual best score for each
// orphaned phrase so a human can re-tune if needed.
const echoThreshold = 0.18
func runEcho(form map[string]any, hazards []map[string]any) EchoReport {
limits := unwrapLimits(form)
// Precompute hazard token bags once
type bag struct {
tokens map[string]bool
text string
}
var hazardBags []bag
for _, h := range hazards {
txt := joinHazardText(h)
toks := contentTokenSet(txt)
hazardBags = append(hazardBags, bag{tokens: toks, text: txt})
}
report := EchoReport{}
for _, fld := range echoFields {
raw, _ := limits[fld.key].(string)
raw = strings.TrimSpace(raw)
if raw == "" {
continue
}
for _, sent := range sentenceSplit.Split(raw, -1) {
sent = strings.TrimSpace(sent)
if len(sent) < 30 {
// Skip very short fragments
continue
}
report.TotalPhrases++
st := contentTokenSet(sent)
if len(st) < 3 {
continue
}
bestScore := 0.0
for _, hb := range hazardBags {
score := jaccard(st, hb.tokens)
if score > bestScore {
bestScore = score
}
}
if bestScore >= echoThreshold {
report.Echoed++
continue
}
report.Orphaned++
report.OrphanedPhrases = append(report.OrphanedPhrases, OrphanedPhrase{
Field: fld.label,
Phrase: sent,
BestScore: bestScore,
})
}
}
sort.Slice(report.OrphanedPhrases, func(i, j int) bool {
// Lowest scores first — most clearly orphaned
return report.OrphanedPhrases[i].BestScore < report.OrphanedPhrases[j].BestScore
})
return report
}
func unwrapLimits(form map[string]any) map[string]any {
if inner, ok := form["limits_form"].(map[string]any); ok {
return inner
}
return form
}
func joinHazardText(h map[string]any) string {
parts := []string{}
for _, k := range []string{"name", "description", "scenario", "trigger_event", "possible_harm", "hazardous_zone", "category", "sub_category"} {
if v, ok := h[k].(string); ok {
parts = append(parts, v)
}
}
return strings.Join(parts, " ")
}
func contentTokenSet(s string) map[string]bool {
out := map[string]bool{}
for _, m := range wordRE.FindAllString(s, -1) {
w := strings.ToLower(m)
if stopWords[w] {
continue
}
out[w] = true
}
return out
}
func jaccard(a, b map[string]bool) float64 {
if len(a) == 0 || len(b) == 0 {
return 0
}
inter := 0
for x := range a {
if b[x] {
inter++
}
}
union := len(a) + len(b) - inter
if union == 0 {
return 0
}
return float64(inter) / float64(union)
}
@@ -0,0 +1,158 @@
package audit
import (
"sort"
"strings"
)
// runHierarchyImpl checks the ISO 12100 / EN 12100 risk-reduction
// hierarchy on the generated mitigation set: every safety-relevant
// hazard should have at least one "inherently safe design" measure
// (design) and additionally either a guarding/protective device
// (protection) or an information-for-use measure (information).
//
// Cyber-, ergonomic-, and software-only hazards have looser
// expectations — design alone or information alone may legitimately
// suffice. The audit reports which level is missing, not whether the
// remaining measures are individually correct. That is a different
// check (E2 — semantic quality), out of scope here.
func init() {
runHierarchyImpl = runHierarchy
}
// hazardExpectsProtection lists hazard categories where a pure
// design+information combination is usually not enough — the engine
// should produce at least one explicit protective measure (guard,
// interlock, sensor, presence detector, …).
var hazardExpectsProtection = map[string]bool{
"mechanical_hazard": true,
"electrical_hazard": true,
"thermal_hazard": true,
"pneumatic_hydraulic": true,
"radiation_hazard": true,
"laser_hazard": true,
"fire_explosion_hazard": true,
"chemical_hazard": true,
}
func runHierarchy(hazards, mitigations []map[string]any) HierarchyReport {
report := HierarchyReport{TotalHazards: len(hazards)}
// Index mitigations by hazard_id
byHazard := map[string][]map[string]any{}
for _, m := range mitigations {
hid, _ := m["hazard_id"].(string)
if hid == "" {
continue
}
byHazard[hid] = append(byHazard[hid], m)
}
for _, h := range hazards {
hid, _ := h["id"].(string)
category, _ := h["category"].(string)
name, _ := h["name"].(string)
levels := levelsForHazard(byHazard[hid])
missing := expectedMissing(category, levels)
if len(missing) == 0 {
report.Complete++
continue
}
for _, m := range missing {
switch m {
case "design":
report.MissingDesign++
case "protection":
report.MissingProtection++
case "information":
report.MissingInfo++
}
}
report.IncompleteHazards = append(report.IncompleteHazards, HazardHierarchyResult{
HazardID: hid,
Name: name,
Category: category,
Levels: levels,
MissingLevels: missing,
})
}
// Sort: protection-missing first (most consequential), then by category
sort.Slice(report.IncompleteHazards, func(i, j int) bool {
a := report.IncompleteHazards[i]
b := report.IncompleteHazards[j]
ap := contains(a.MissingLevels, "protection")
bp := contains(b.MissingLevels, "protection")
if ap != bp {
return ap
}
return a.Category < b.Category
})
return report
}
// levelsForHazard returns the distinct reduction-type levels present
// for a hazard's mitigation set. Possible values: design, protection,
// information.
func levelsForHazard(mits []map[string]any) []string {
seen := map[string]bool{}
for _, m := range mits {
rt, _ := m["reduction_type"].(string)
switch strings.ToLower(rt) {
case "design":
seen["design"] = true
case "protection", "protective":
seen["protection"] = true
case "information":
seen["information"] = true
}
}
var out []string
for k := range seen {
out = append(out, k)
}
sort.Strings(out)
return out
}
// expectedMissing returns the levels that the hierarchy demands but
// the mitigation set does not provide.
//
// Rule:
// - Every hazard with mitigations should have a design measure.
// - Categories in hazardExpectsProtection additionally need a
// protection measure.
// - All hazards should have an information measure unless they
// already have both design + protection (the information layer
// can then be considered subsumed for the audit's purpose; the
// real engine usually still adds it).
func expectedMissing(category string, present []string) []string {
have := toBoolSet(present)
var missing []string
if !have["design"] {
missing = append(missing, "design")
}
if hazardExpectsProtection[category] && !have["protection"] {
missing = append(missing, "protection")
}
// Information is only flagged if both design and protection are
// also absent — otherwise too noisy. We still surface the case
// where information is the SOLE present level: that means the
// hazard is mitigated only by warning labels, which is rarely
// adequate.
if !have["information"] && !have["design"] && !have["protection"] {
missing = append(missing, "information")
}
return missing
}
func contains(list []string, target string) bool {
for _, x := range list {
if x == target {
return true
}
}
return false
}
@@ -0,0 +1,37 @@
package audit
// Implementation entry points for Methods B-E. The full algorithms live
// in consistency.go, vocabulary.go, echo.go, hierarchy.go respectively.
// Until those files land, these wrappers keep main.go compilable and
// return a clearly-marked empty report.
func RunConsistency() ConsistencyReport {
return runConsistencyImpl()
}
func RunVocabulary(form map[string]any) VocabularyReport {
return runVocabularyImpl(form)
}
func RunEcho(form map[string]any, hazards []map[string]any) EchoReport {
return runEchoImpl(form, hazards)
}
func RunHierarchy(hazards, mitigations []map[string]any) HierarchyReport {
return runHierarchyImpl(hazards, mitigations)
}
// Default implementations — replaced when each method file lands.
// Keeping them as separate functions in one place avoids name clashes
// once consistency.go etc. add their real implementations.
var (
runConsistencyImpl = func() ConsistencyReport { return ConsistencyReport{} }
runVocabularyImpl = func(form map[string]any) VocabularyReport { return VocabularyReport{} }
runEchoImpl = func(form map[string]any, hazards []map[string]any) EchoReport {
return EchoReport{}
}
runHierarchyImpl = func(hazards, mitigations []map[string]any) HierarchyReport {
return HierarchyReport{}
}
)
@@ -0,0 +1,298 @@
// Package audit provides static and runtime audits of the IACE pattern
// engine — finding pattern reachability, library consistency, and
// limits-form coverage gaps without a ground-truth reference.
package audit
import (
"sort"
"github.com/breakpilot/ai-compliance-sdk/internal/iace"
)
// ReachabilityResult is the verdict for a single pattern in Method A.
type ReachabilityResult struct {
PatternID string `json:"pattern_id"`
Name string `json:"name_de"`
Priority int `json:"priority"`
RequiredAllTags []string `json:"required_tags"`
UnreachableTags []string `json:"unreachable_tags,omitempty"`
Status string `json:"status"` // "reachable" | "weakly_reachable" | "unreachable"
ReachableSources []string `json:"reachable_sources,omitempty"`
FixSuggestions []string `json:"fix_suggestions,omitempty"`
}
// ReachabilityReport is the full Method A output.
type ReachabilityReport struct {
TotalPatterns int `json:"total_patterns"`
Reachable int `json:"reachable"`
WeaklyReachable int `json:"weakly_reachable"`
Unreachable int `json:"unreachable"`
UniverseTags []string `json:"universe_tags"`
UnreachablePatterns []ReachabilityResult `json:"unreachable_patterns"`
WeakPatterns []ReachabilityResult `json:"weak_patterns"`
}
// RunReachability evaluates every pattern against the achievable tag universe.
//
// A pattern is:
// - "unreachable" if at least one required tag is not produced by any
// component, energy source, or keyword-dictionary entry.
// - "weakly_reachable" if all required tags exist in the universe but
// no single source (one Component or one EnergySource or one Keyword
// entry) supplies all of them at once — i.e., it relies on multiple
// parser hits to combine.
// - "reachable" if some single source covers all required tags.
//
// The classification ignores ExcludedComponentTags and runtime filters
// (lifecycle/op-state/machine-type), because those are project-level
// concerns. The audit answers "could this pattern EVER fire", not
// "does it fire for project X".
func RunReachability() ReachabilityReport {
patterns := iace.AllPatterns()
comps := iace.GetComponentLibrary()
energies := iace.GetEnergySources()
keywords := iace.GetKeywordDictionary()
// Tag universe: union of every tag emitted anywhere
universe := map[string][]string{} // tag → list of source IDs that emit it
for _, c := range comps {
for _, t := range c.Tags {
universe[t] = appendUnique(universe[t], "component:"+c.ID)
}
}
for _, e := range energies {
for _, t := range e.Tags {
universe[t] = appendUnique(universe[t], "energy:"+e.ID)
}
}
for i, kw := range keywords {
for _, t := range kw.ExtraTags {
universe[t] = appendUnique(universe[t], keywordLabel(kw, i))
}
// Keyword entries can also reference components/energies, which
// transitively add their tags to the keyword's effective tag set.
for _, cID := range kw.ComponentIDs {
for _, c := range comps {
if c.ID != cID {
continue
}
for _, t := range c.Tags {
universe[t] = appendUnique(universe[t], keywordLabel(kw, i))
}
}
}
for _, eID := range kw.EnergyIDs {
for _, e := range energies {
if e.ID != eID {
continue
}
for _, t := range e.Tags {
universe[t] = appendUnique(universe[t], keywordLabel(kw, i))
}
}
}
}
// Single-source coverage map: tag → covering sources, but also
// per-source tag set so we can check "is there ONE source covering
// all required tags".
sourceTags := map[string]map[string]bool{}
for _, c := range comps {
key := "component:" + c.ID
sourceTags[key] = toBoolSet(c.Tags)
}
for _, e := range energies {
key := "energy:" + e.ID
sourceTags[key] = toBoolSet(e.Tags)
}
for i, kw := range keywords {
key := keywordLabel(kw, i)
set := toBoolSet(kw.ExtraTags)
for _, cID := range kw.ComponentIDs {
for _, c := range comps {
if c.ID == cID {
for _, t := range c.Tags {
set[t] = true
}
}
}
}
for _, eID := range kw.EnergyIDs {
for _, e := range energies {
if e.ID == eID {
for _, t := range e.Tags {
set[t] = true
}
}
}
}
sourceTags[key] = set
}
report := ReachabilityReport{TotalPatterns: len(patterns)}
// Universe tag list (sorted) for the report header
for t := range universe {
report.UniverseTags = append(report.UniverseTags, t)
}
sort.Strings(report.UniverseTags)
for _, p := range patterns {
all := dedup(append(append([]string{}, p.RequiredComponentTags...), p.RequiredEnergyTags...))
if len(all) == 0 {
// Pattern with no tag requirements relies on lifecycle/machine_type
// filters only — count as reachable by default.
report.Reachable++
continue
}
var missing []string
for _, t := range all {
if _, ok := universe[t]; !ok {
missing = append(missing, t)
}
}
res := ReachabilityResult{
PatternID: p.ID,
Name: p.NameDE,
Priority: p.Priority,
RequiredAllTags: all,
}
if len(missing) > 0 {
res.Status = "unreachable"
res.UnreachableTags = missing
res.FixSuggestions = suggestFixes(p, missing, comps, sourceTags)
report.Unreachable++
report.UnreachablePatterns = append(report.UnreachablePatterns, res)
continue
}
// All tags in universe — check single-source coverage
single := findSingleSourceCovers(all, sourceTags)
if len(single) > 0 {
res.Status = "reachable"
res.ReachableSources = single
report.Reachable++
continue
}
res.Status = "weakly_reachable"
res.FixSuggestions = suggestSingleSourceFixes(p, all, comps, sourceTags)
report.WeaklyReachable++
report.WeakPatterns = append(report.WeakPatterns, res)
}
sort.Slice(report.UnreachablePatterns, func(i, j int) bool {
return report.UnreachablePatterns[i].Priority > report.UnreachablePatterns[j].Priority
})
sort.Slice(report.WeakPatterns, func(i, j int) bool {
return report.WeakPatterns[i].Priority > report.WeakPatterns[j].Priority
})
return report
}
func findSingleSourceCovers(required []string, sourceTags map[string]map[string]bool) []string {
var hits []string
for src, tags := range sourceTags {
ok := true
for _, t := range required {
if !tags[t] {
ok = false
break
}
}
if ok {
hits = append(hits, src)
}
}
sort.Strings(hits)
return hits
}
// suggestFixes proposes concrete library edits for unreachable patterns:
// "Add tag X to Component C014 (Hubwerk)" type suggestions.
func suggestFixes(p iace.HazardPattern, missing []string, comps []iace.ComponentLibraryEntry, sourceTags map[string]map[string]bool) []string {
var out []string
// For each missing tag, find candidates: components/energies that
// would semantically own that tag based on existing tags overlap.
for _, tag := range missing {
candidates := nearComponents(p, tag, comps, sourceTags)
if len(candidates) > 0 {
out = append(out, "Add tag '"+tag+"' to one of: "+joinFirst(candidates, 3))
} else {
out = append(out, "Tag '"+tag+"' is undefined anywhere — needs a new component or energy source carrying it")
}
}
return out
}
func suggestSingleSourceFixes(p iace.HazardPattern, all []string, comps []iace.ComponentLibraryEntry, sourceTags map[string]map[string]bool) []string {
// Find components that match the most required tags, then suggest
// adding the residual ones.
best := ""
bestCover := 0
var bestMissing []string
for src, tags := range sourceTags {
hit := 0
var miss []string
for _, t := range all {
if tags[t] {
hit++
} else {
miss = append(miss, t)
}
}
if hit > bestCover {
best, bestCover, bestMissing = src, hit, miss
}
}
if best == "" || bestCover == 0 {
return []string{"No single source covers any required tags — pattern needs a new dedicated component"}
}
if len(bestMissing) == 0 {
return nil
}
return []string{"Closest single source '" + best + "' covers " + itoa(bestCover) + "/" + itoa(len(all)) + " tags. Add missing tags to it: " + joinFirst(bestMissing, 5)}
}
// nearComponents finds components whose tags overlap most with the pattern's
// requirements — these are good candidates to receive the missing tag.
func nearComponents(p iace.HazardPattern, missing string, comps []iace.ComponentLibraryEntry, sourceTags map[string]map[string]bool) []string {
required := dedup(append(append([]string{}, p.RequiredComponentTags...), p.RequiredEnergyTags...))
required = removeOne(required, missing)
if len(required) == 0 {
return nil
}
type scored struct {
id string
score int
}
var scoredList []scored
for _, c := range comps {
tagSet := toBoolSet(c.Tags)
s := 0
for _, t := range required {
if tagSet[t] {
s++
}
}
if s > 0 {
scoredList = append(scoredList, scored{id: c.ID + " (" + c.NameDE + ")", score: s})
}
}
sort.Slice(scoredList, func(i, j int) bool { return scoredList[i].score > scoredList[j].score })
var out []string
for _, s := range scoredList {
out = append(out, s.id)
}
return out
}
func keywordLabel(kw iace.KeywordEntry, idx int) string {
if len(kw.Keywords) > 0 {
return "keyword:" + kw.Keywords[0]
}
return "keyword:" + itoa(idx)
}
@@ -0,0 +1,84 @@
package audit
// Stubs for Methods B-E. Each is filled in its own file as the audit
// suite grows. Keeping the type contracts here lets the CLI compile
// before each method has its full implementation.
// ============================================================================
// Method B — Component Self-Consistency
// ============================================================================
type CategoryGap struct {
Category string `json:"category"`
SuggestedTags []string `json:"suggested_tags"`
}
type ComponentResult struct {
ComponentID string `json:"component_id"`
NameDE string `json:"name_de"`
DeclaredCategories []string `json:"declared_categories"`
CoveredCategories []string `json:"covered_categories"`
MissingForCategories []CategoryGap `json:"missing_for_categories,omitempty"`
}
type ConsistencyReport struct {
TotalComponents int `json:"total_components"`
Consistent int `json:"consistent"`
Incomplete int `json:"incomplete"`
IncompleteComponents []ComponentResult `json:"incomplete_components"`
}
// ============================================================================
// Method C — Limits-Form Vocabulary Diff
// ============================================================================
type DictionarySuggestion struct {
Token string `json:"token"`
Field string `json:"field"`
PatternIDs []string `json:"pattern_ids"`
}
type VocabularyReport struct {
UniqueTokens int `json:"unique_tokens"`
KnownTokens []string `json:"known_tokens"`
UnknownTokens []string `json:"unknown_tokens"`
SuggestedDictionaryEntries []DictionarySuggestion `json:"suggested_dictionary_entries"`
}
// ============================================================================
// Method D — Limits-Form Echo
// ============================================================================
type OrphanedPhrase struct {
Field string `json:"field"`
Phrase string `json:"phrase"`
BestScore float64 `json:"best_score"`
}
type EchoReport struct {
TotalPhrases int `json:"total_phrases"`
Echoed int `json:"echoed"`
Orphaned int `json:"orphaned"`
OrphanedPhrases []OrphanedPhrase `json:"orphaned_phrases"`
}
// ============================================================================
// Method E — Hierarchy Completeness
// ============================================================================
type HazardHierarchyResult struct {
HazardID string `json:"hazard_id"`
Name string `json:"name"`
Category string `json:"category"`
Levels []string `json:"present_levels"`
MissingLevels []string `json:"missing_levels"`
}
type HierarchyReport struct {
TotalHazards int `json:"total_hazards"`
Complete int `json:"complete"`
MissingDesign int `json:"missing_design"`
MissingProtection int `json:"missing_protection"`
MissingInfo int `json:"missing_information"`
IncompleteHazards []HazardHierarchyResult `json:"incomplete_hazards"`
}
@@ -0,0 +1,62 @@
package audit
import "strconv"
func appendUnique(list []string, item string) []string {
for _, x := range list {
if x == item {
return list
}
}
return append(list, item)
}
func toBoolSet(list []string) map[string]bool {
s := make(map[string]bool, len(list))
for _, x := range list {
s[x] = true
}
return s
}
func dedup(list []string) []string {
seen := map[string]bool{}
var out []string
for _, x := range list {
if !seen[x] {
seen[x] = true
out = append(out, x)
}
}
return out
}
func removeOne(list []string, item string) []string {
out := make([]string, 0, len(list))
for _, x := range list {
if x != item {
out = append(out, x)
}
}
return out
}
func joinFirst(list []string, n int) string {
if len(list) <= n {
return joinAll(list)
}
return joinAll(list[:n]) + ", ..."
}
func joinAll(list []string) string {
s := ""
for i, x := range list {
if i > 0 {
s += ", "
}
s += x
}
return s
}
func itoa(n int) string { return strconv.Itoa(n) }
@@ -0,0 +1,153 @@
package audit
import (
"regexp"
"sort"
"strings"
"github.com/breakpilot/ai-compliance-sdk/internal/iace"
)
// runVocabularyImpl takes a limits-form payload (the structured machine
// description filled in by the engineer) and asks: which of its words
// are unknown to the keyword dictionary yet appear in any pattern's
// scenario/trigger/harm/zone text? Each such word is a dictionary gap —
// the engineer typed a term that some pattern is waiting for, but the
// parser cannot translate it into a tag.
func init() {
runVocabularyImpl = runVocabulary
}
var tokenRE = regexp.MustCompile(`[a-zäöüßA-ZÄÖÜ]{4,}`)
// German + English stop words that show up in any narrative but carry
// no engineering meaning. Kept short on purpose — we only want to drop
// obvious filler.
var stopWords = map[string]bool{
"oder": true, "und": true, "auch": true, "wenn": true, "wird": true,
"werden": true, "kann": true, "koennen": true, "soll": true, "muss": true,
"sind": true, "eine": true, "einer": true, "einem": true, "einen": true,
"diese": true, "dieser": true, "dieses": true, "diesem": true, "diesen": true,
"durch": true, "nach": true, "ueber": true, "unter": true, "zwischen": true,
"nicht": true, "ohne": true, "fuer": true, "bzw": true, "etc": true,
"sowie": true, "siehe": true, "etwa": true, "ggf": true, "the": true,
"with": true, "from": true, "this": true, "that": true, "have": true,
"insbesondere": true, "ausschliesslich": true, "ebenfalls": true,
"jeweils": true, "weitere": true, "weiteren": true, "weiterer": true,
}
func runVocabulary(form map[string]any) VocabularyReport {
limits, ok := form["limits_form"].(map[string]any)
if !ok {
// Form may already be the inner object
limits = form
}
tokens := map[string]bool{}
for _, v := range limits {
extractTokens(v, tokens)
}
report := VocabularyReport{UniqueTokens: len(tokens)}
dictTokens := dictionaryVocabulary()
for tok := range tokens {
if stopWords[tok] {
continue
}
if dictTokenHit(tok, dictTokens) {
report.KnownTokens = append(report.KnownTokens, tok)
} else {
report.UnknownTokens = append(report.UnknownTokens, tok)
}
}
sort.Strings(report.KnownTokens)
sort.Strings(report.UnknownTokens)
// For each unknown token check if any pattern names it
patterns := iace.AllPatterns()
for _, tok := range report.UnknownTokens {
hits := patternsMentioning(tok, patterns)
if len(hits) == 0 {
continue
}
report.SuggestedDictionaryEntries = append(report.SuggestedDictionaryEntries, DictionarySuggestion{
Token: tok,
PatternIDs: hits,
})
}
sort.Slice(report.SuggestedDictionaryEntries, func(i, j int) bool {
return len(report.SuggestedDictionaryEntries[i].PatternIDs) > len(report.SuggestedDictionaryEntries[j].PatternIDs)
})
return report
}
func extractTokens(v any, out map[string]bool) {
switch x := v.(type) {
case string:
for _, m := range tokenRE.FindAllString(x, -1) {
out[strings.ToLower(m)] = true
}
case []any:
for _, e := range x {
extractTokens(e, out)
}
case map[string]any:
for _, e := range x {
extractTokens(e, out)
}
}
}
// dictionaryVocabulary builds the lowercase set of all keyword strings
// that the parser will recognize, including normalized forms (umlauts
// replaced like in the keyword dictionary).
func dictionaryVocabulary() map[string]bool {
out := map[string]bool{}
for _, kw := range iace.GetKeywordDictionary() {
for _, k := range kw.Keywords {
out[strings.ToLower(k)] = true
}
}
return out
}
// dictTokenHit returns true if the token would be matched by any
// dictionary entry. Dictionary entries can be substrings, so we treat
// the dict as a set of stem-like matchers: a token is "known" if it
// equals a dict word OR contains a dict word as substring OR the dict
// word contains the token.
func dictTokenHit(tok string, dict map[string]bool) bool {
if dict[tok] {
return true
}
for d := range dict {
if strings.Contains(tok, d) || strings.Contains(d, tok) {
return true
}
}
return false
}
// patternsMentioning returns up to 8 pattern IDs whose scenario/trigger/
// harm/zone text contains the token (case-insensitive substring).
func patternsMentioning(tok string, patterns []iace.HazardPattern) []string {
tokLower := strings.ToLower(tok)
seen := map[string]bool{}
var out []string
for _, p := range patterns {
hay := strings.ToLower(p.ScenarioDE + " " + p.TriggerDE + " " + p.HarmDE + " " + p.ZoneDE + " " + p.NameDE)
if !strings.Contains(hay, tokLower) {
continue
}
if seen[p.ID] {
continue
}
seen[p.ID] = true
out = append(out, p.ID)
if len(out) >= 8 {
break
}
}
return out
}