feat(iace): pattern audit suite + library hygiene wave

Add cmd/iace-audit CLI with 5 deterministic methods that find engine gaps without ground truth: - A reachability: 1058 patterns vs achievable tag universe - B consistency: components vs their declared hazard categories - C vocabulary: limits-form tokens vs keyword dictionary - D echo: limits-form sentences vs generated hazards (jaccard) - E hierarchy: hazards vs ISO 12100 design/protection/info levels Library fixes triggered by A+B+C findings: - tag_resolver: synonym map for electrical/pneumatic/hydraulic aliases - component_library: crush_point + EN03 (gravitational) on C014/C128 (Hubwerk family) - fixes HP1014/1015/1017/1018 which were silently weakly_reachable. noise_source added on 7 components (C006/C011/ C017/C020/C031/C041/C096). electrical_part on 8 drive components (C031/C032/C033/C034/C035/C036/C037/C038/C077/C092). cyber tag on 10 sensors (C081-C090) + 3 IT components (C111/C112/C116) + KI module C119 (ai_model added). pneumatic_part+hydraulic_part on valves C091/C093, hydraulic_part+chemical_risk on pump C097, moving_part on motion controller C075 - keyword_dictionary: EN03 added to aufzug/lift/hubwerk/hubgeraet (was wrongly EN04-only). New keyword entries for hub-action verbs: absenken/senken/anheben/heben + hubhoehe/hubweg/hubgeschwindig Audit impact: - A: weakly_reachable 409 -> 358 (-51 patterns now fully reachable) - B: incomplete components 46 -> 30 (-16, -33%) - HP1018 (Person unter absenkendem Maschinenteil eingeklemmt): weakly_reachable -> reachable Why: methods A/B/C surfaced that the Kistenhubgeraet test project generated 0 crush-under-load hazards despite OSHA 1910.212(a)(3) + EN ISO 12100 6.3.5.5 explicitly requiring them. Three orthogonal bugs (missing crush_point tag, wrong energy source mapping, missing action verbs in dictionary) silently disabled the entire lift crush pattern family.
2026-05-21 10:51:08 +02:00
parent 4946571863
commit f534b52817
12 changed files with 1442 additions and 38 deletions
@@ -0,0 +1,171 @@
+package audit
+
+import (
+	"sort"
+
+	"github.com/breakpilot/ai-compliance-sdk/internal/iace"
+)
+
+// runConsistencyImpl asks: does this component, with its own tags PLUS the
+// tags of its TypicalEnergySources, actually trigger at least one pattern
+// in every category listed in its TypicalHazardCategories?
+//
+// A component declares "this is what I am dangerous for" and the engine
+// turns that declaration into hazards through patterns. If no pattern can
+// fire from the component's tag set, the declaration is decorative — the
+// engine will never produce a hazard in that category for this component,
+// even though the library author said it should.
+func init() {
+	runConsistencyImpl = runConsistency
+}
+
+func runConsistency() ConsistencyReport {
+	comps := iace.GetComponentLibrary()
+	energies := iace.GetEnergySources()
+	patterns := iace.AllPatterns()
+
+	energyByID := map[string]iace.EnergySourceEntry{}
+	for _, e := range energies {
+		energyByID[e.ID] = e
+	}
+
+	report := ConsistencyReport{TotalComponents: len(comps)}
+
+	for _, c := range comps {
+		if len(c.TypicalHazardCategories) == 0 {
+			report.Consistent++
+			continue
+		}
+		effective := buildEffectiveTags(c, energyByID)
+		covered := categoriesCoveredByPatterns(effective, c.MapsToComponentType, patterns)
+
+		var missing []string
+		for _, cat := range c.TypicalHazardCategories {
+			if !covered[cat] {
+				missing = append(missing, cat)
+			}
+		}
+		if len(missing) == 0 {
+			report.Consistent++
+			continue
+		}
+
+		result := ComponentResult{
+			ComponentID:        c.ID,
+			NameDE:             c.NameDE,
+			DeclaredCategories: c.TypicalHazardCategories,
+		}
+		for cat := range covered {
+			result.CoveredCategories = append(result.CoveredCategories, cat)
+		}
+		sort.Strings(result.CoveredCategories)
+		for _, cat := range missing {
+			result.MissingForCategories = append(result.MissingForCategories, CategoryGap{
+				Category:      cat,
+				SuggestedTags: suggestTagsForCategory(cat, effective, patterns),
+			})
+		}
+		report.Incomplete++
+		report.IncompleteComponents = append(report.IncompleteComponents, result)
+	}
+
+	sort.Slice(report.IncompleteComponents, func(i, j int) bool {
+		return report.IncompleteComponents[i].ComponentID < report.IncompleteComponents[j].ComponentID
+	})
+	return report
+}
+
+func buildEffectiveTags(c iace.ComponentLibraryEntry, energyByID map[string]iace.EnergySourceEntry) map[string]bool {
+	set := map[string]bool{}
+	for _, t := range c.Tags {
+		set[t] = true
+	}
+	for _, eID := range c.TypicalEnergySources {
+		e, ok := energyByID[eID]
+		if !ok {
+			continue
+		}
+		for _, t := range e.Tags {
+			set[t] = true
+		}
+	}
+	return set
+}
+
+// categoriesCoveredByPatterns iterates patterns and finds which
+// GeneratedHazardCats can fire given the component's effective tags.
+// We ignore lifecycle, op-state, and human-role filters — those are
+// project-level. The audit asks "can the library produce ANY hazard in
+// this category for this component if the project configures everything
+// reasonably?"
+func categoriesCoveredByPatterns(tags map[string]bool, _ string, patterns []iace.HazardPattern) map[string]bool {
+	covered := map[string]bool{}
+	for _, p := range patterns {
+		if !tagsCover(tags, p.RequiredComponentTags) {
+			continue
+		}
+		if !tagsCover(tags, p.RequiredEnergyTags) {
+			continue
+		}
+		for _, cat := range p.GeneratedHazardCats {
+			covered[cat] = true
+		}
+	}
+	return covered
+}
+
+func tagsCover(have map[string]bool, required []string) bool {
+	for _, t := range required {
+		if !have[t] {
+			return false
+		}
+	}
+	return true
+}
+
+// suggestTagsForCategory looks at patterns that DO generate this category
+// and identifies the tags that would close the gap. Returns the tags most
+// commonly required by patterns in that category, minus what the component
+// already has.
+func suggestTagsForCategory(cat string, have map[string]bool, patterns []iace.HazardPattern) []string {
+	counts := map[string]int{}
+	for _, p := range patterns {
+		matchCat := false
+		for _, c := range p.GeneratedHazardCats {
+			if c == cat {
+				matchCat = true
+				break
+			}
+		}
+		if !matchCat {
+			continue
+		}
+		for _, t := range p.RequiredComponentTags {
+			if !have[t] {
+				counts[t]++
+			}
+		}
+		for _, t := range p.RequiredEnergyTags {
+			if !have[t] {
+				counts[t]++
+			}
+		}
+	}
+	type kv struct {
+		tag string
+		n   int
+	}
+	var sorted []kv
+	for t, n := range counts {
+		sorted = append(sorted, kv{t, n})
+	}
+	sort.Slice(sorted, func(i, j int) bool { return sorted[i].n > sorted[j].n })
+	var out []string
+	for i, s := range sorted {
+		if i >= 6 {
+			break
+		}
+		out = append(out, s.tag)
+	}
+	return out
+}
@@ -0,0 +1,161 @@
+package audit
+
+import (
+	"regexp"
+	"sort"
+	"strings"
+)
+
+// runEchoImpl checks if each meaningful phrase from the limits-form is
+// echoed by at least one generated hazard. A phrase that names a concrete
+// scenario, fault, or constraint must reappear (semantically) in some
+// hazard's name, scenario, or description. Phrases without echo are gaps:
+// the engineer documented the risk but the engine never lifted it into
+// the hazard register.
+//
+// Echo detection here is a lightweight Jaccard overlap of content tokens
+// (not embeddings) — robust enough for the demonstrative diagnostic and
+// keeps the audit fully deterministic without an external model. The
+// caller can later swap in a vector-based scorer.
+func init() {
+	runEchoImpl = runEcho
+}
+
+// Significant limits-form fields. Each item is (key, label). We only
+// audit the freeform fields where engineers describe risks — list/enum
+// fields (operating_modes, person_groups, industry_sectors) are out of
+// scope because they carry no narrative phrases.
+var echoFields = []struct {
+	key   string
+	label string
+}{
+	{"general_description", "Allg. Beschreibung"},
+	{"intended_purpose", "Bestimmungsgemaesse Verwendung"},
+	{"variants", "Varianten"},
+	{"foreseeable_misuses", "Vorhersehbare Fehlanwendung"},
+	{"spatial_limits", "Raeumliche Grenzen"},
+	{"temporal_limits", "Zeitliche Grenzen"},
+	{"operating_conditions", "Betriebsbedingungen"},
+	{"energy_supply", "Energieversorgung"},
+	{"mechanical_interfaces", "Mechanische Schnittstellen"},
+	{"electrical_interfaces", "Elektrische Schnittstellen"},
+	{"software_interfaces", "Software-Schnittstellen"},
+	{"pneumatic_hydraulic_interfaces", "Pneumatik/Hydraulik"},
+	{"qualification_requirements", "Personenqualifikation"},
+}
+
+var sentenceSplit = regexp.MustCompile(`[.!?]\s+|\n+`)
+var wordRE = regexp.MustCompile(`[a-zäöüßA-ZÄÖÜ]{4,}`)
+
+// echoThreshold — minimum Jaccard overlap (between sentence content
+// tokens and a hazard's content tokens) above which the sentence is
+// considered echoed. Tuned by hand to give meaningful results without a
+// labeled corpus; the audit reports the actual best score for each
+// orphaned phrase so a human can re-tune if needed.
+const echoThreshold = 0.18
+
+func runEcho(form map[string]any, hazards []map[string]any) EchoReport {
+	limits := unwrapLimits(form)
+
+	// Precompute hazard token bags once
+	type bag struct {
+		tokens map[string]bool
+		text   string
+	}
+	var hazardBags []bag
+	for _, h := range hazards {
+		txt := joinHazardText(h)
+		toks := contentTokenSet(txt)
+		hazardBags = append(hazardBags, bag{tokens: toks, text: txt})
+	}
+
+	report := EchoReport{}
+	for _, fld := range echoFields {
+		raw, _ := limits[fld.key].(string)
+		raw = strings.TrimSpace(raw)
+		if raw == "" {
+			continue
+		}
+		for _, sent := range sentenceSplit.Split(raw, -1) {
+			sent = strings.TrimSpace(sent)
+			if len(sent) < 30 {
+				// Skip very short fragments
+				continue
+			}
+			report.TotalPhrases++
+			st := contentTokenSet(sent)
+			if len(st) < 3 {
+				continue
+			}
+			bestScore := 0.0
+			for _, hb := range hazardBags {
+				score := jaccard(st, hb.tokens)
+				if score > bestScore {
+					bestScore = score
+				}
+			}
+			if bestScore >= echoThreshold {
+				report.Echoed++
+				continue
+			}
+			report.Orphaned++
+			report.OrphanedPhrases = append(report.OrphanedPhrases, OrphanedPhrase{
+				Field:     fld.label,
+				Phrase:    sent,
+				BestScore: bestScore,
+			})
+		}
+	}
+
+	sort.Slice(report.OrphanedPhrases, func(i, j int) bool {
+		// Lowest scores first — most clearly orphaned
+		return report.OrphanedPhrases[i].BestScore < report.OrphanedPhrases[j].BestScore
+	})
+	return report
+}
+
+func unwrapLimits(form map[string]any) map[string]any {
+	if inner, ok := form["limits_form"].(map[string]any); ok {
+		return inner
+	}
+	return form
+}
+
+func joinHazardText(h map[string]any) string {
+	parts := []string{}
+	for _, k := range []string{"name", "description", "scenario", "trigger_event", "possible_harm", "hazardous_zone", "category", "sub_category"} {
+		if v, ok := h[k].(string); ok {
+			parts = append(parts, v)
+		}
+	}
+	return strings.Join(parts, " ")
+}
+
+func contentTokenSet(s string) map[string]bool {
+	out := map[string]bool{}
+	for _, m := range wordRE.FindAllString(s, -1) {
+		w := strings.ToLower(m)
+		if stopWords[w] {
+			continue
+		}
+		out[w] = true
+	}
+	return out
+}
+
+func jaccard(a, b map[string]bool) float64 {
+	if len(a) == 0 || len(b) == 0 {
+		return 0
+	}
+	inter := 0
+	for x := range a {
+		if b[x] {
+			inter++
+		}
+	}
+	union := len(a) + len(b) - inter
+	if union == 0 {
+		return 0
+	}
+	return float64(inter) / float64(union)
+}
@@ -0,0 +1,158 @@
+package audit
+
+import (
+	"sort"
+	"strings"
+)
+
+// runHierarchyImpl checks the ISO 12100 / EN 12100 risk-reduction
+// hierarchy on the generated mitigation set: every safety-relevant
+// hazard should have at least one "inherently safe design" measure
+// (design) and additionally either a guarding/protective device
+// (protection) or an information-for-use measure (information).
+//
+// Cyber-, ergonomic-, and software-only hazards have looser
+// expectations — design alone or information alone may legitimately
+// suffice. The audit reports which level is missing, not whether the
+// remaining measures are individually correct. That is a different
+// check (E2 — semantic quality), out of scope here.
+func init() {
+	runHierarchyImpl = runHierarchy
+}
+
+// hazardExpectsProtection lists hazard categories where a pure
+// design+information combination is usually not enough — the engine
+// should produce at least one explicit protective measure (guard,
+// interlock, sensor, presence detector, …).
+var hazardExpectsProtection = map[string]bool{
+	"mechanical_hazard":     true,
+	"electrical_hazard":     true,
+	"thermal_hazard":        true,
+	"pneumatic_hydraulic":   true,
+	"radiation_hazard":      true,
+	"laser_hazard":          true,
+	"fire_explosion_hazard": true,
+	"chemical_hazard":       true,
+}
+
+func runHierarchy(hazards, mitigations []map[string]any) HierarchyReport {
+	report := HierarchyReport{TotalHazards: len(hazards)}
+
+	// Index mitigations by hazard_id
+	byHazard := map[string][]map[string]any{}
+	for _, m := range mitigations {
+		hid, _ := m["hazard_id"].(string)
+		if hid == "" {
+			continue
+		}
+		byHazard[hid] = append(byHazard[hid], m)
+	}
+
+	for _, h := range hazards {
+		hid, _ := h["id"].(string)
+		category, _ := h["category"].(string)
+		name, _ := h["name"].(string)
+
+		levels := levelsForHazard(byHazard[hid])
+		missing := expectedMissing(category, levels)
+
+		if len(missing) == 0 {
+			report.Complete++
+			continue
+		}
+		for _, m := range missing {
+			switch m {
+			case "design":
+				report.MissingDesign++
+			case "protection":
+				report.MissingProtection++
+			case "information":
+				report.MissingInfo++
+			}
+		}
+		report.IncompleteHazards = append(report.IncompleteHazards, HazardHierarchyResult{
+			HazardID:      hid,
+			Name:          name,
+			Category:      category,
+			Levels:        levels,
+			MissingLevels: missing,
+		})
+	}
+
+	// Sort: protection-missing first (most consequential), then by category
+	sort.Slice(report.IncompleteHazards, func(i, j int) bool {
+		a := report.IncompleteHazards[i]
+		b := report.IncompleteHazards[j]
+		ap := contains(a.MissingLevels, "protection")
+		bp := contains(b.MissingLevels, "protection")
+		if ap != bp {
+			return ap
+		}
+		return a.Category < b.Category
+	})
+	return report
+}
+
+// levelsForHazard returns the distinct reduction-type levels present
+// for a hazard's mitigation set. Possible values: design, protection,
+// information.
+func levelsForHazard(mits []map[string]any) []string {
+	seen := map[string]bool{}
+	for _, m := range mits {
+		rt, _ := m["reduction_type"].(string)
+		switch strings.ToLower(rt) {
+		case "design":
+			seen["design"] = true
+		case "protection", "protective":
+			seen["protection"] = true
+		case "information":
+			seen["information"] = true
+		}
+	}
+	var out []string
+	for k := range seen {
+		out = append(out, k)
+	}
+	sort.Strings(out)
+	return out
+}
+
+// expectedMissing returns the levels that the hierarchy demands but
+// the mitigation set does not provide.
+//
+// Rule:
+//   - Every hazard with mitigations should have a design measure.
+//   - Categories in hazardExpectsProtection additionally need a
+//     protection measure.
+//   - All hazards should have an information measure unless they
+//     already have both design + protection (the information layer
+//     can then be considered subsumed for the audit's purpose; the
+//     real engine usually still adds it).
+func expectedMissing(category string, present []string) []string {
+	have := toBoolSet(present)
+	var missing []string
+	if !have["design"] {
+		missing = append(missing, "design")
+	}
+	if hazardExpectsProtection[category] && !have["protection"] {
+		missing = append(missing, "protection")
+	}
+	// Information is only flagged if both design and protection are
+	// also absent — otherwise too noisy. We still surface the case
+	// where information is the SOLE present level: that means the
+	// hazard is mitigated only by warning labels, which is rarely
+	// adequate.
+	if !have["information"] && !have["design"] && !have["protection"] {
+		missing = append(missing, "information")
+	}
+	return missing
+}
+
+func contains(list []string, target string) bool {
+	for _, x := range list {
+		if x == target {
+			return true
+		}
+	}
+	return false
+}
@@ -0,0 +1,37 @@
+package audit
+
+// Implementation entry points for Methods B-E. The full algorithms live
+// in consistency.go, vocabulary.go, echo.go, hierarchy.go respectively.
+// Until those files land, these wrappers keep main.go compilable and
+// return a clearly-marked empty report.
+
+func RunConsistency() ConsistencyReport {
+	return runConsistencyImpl()
+}
+
+func RunVocabulary(form map[string]any) VocabularyReport {
+	return runVocabularyImpl(form)
+}
+
+func RunEcho(form map[string]any, hazards []map[string]any) EchoReport {
+	return runEchoImpl(form, hazards)
+}
+
+func RunHierarchy(hazards, mitigations []map[string]any) HierarchyReport {
+	return runHierarchyImpl(hazards, mitigations)
+}
+
+// Default implementations — replaced when each method file lands.
+// Keeping them as separate functions in one place avoids name clashes
+// once consistency.go etc. add their real implementations.
+
+var (
+	runConsistencyImpl = func() ConsistencyReport { return ConsistencyReport{} }
+	runVocabularyImpl  = func(form map[string]any) VocabularyReport { return VocabularyReport{} }
+	runEchoImpl        = func(form map[string]any, hazards []map[string]any) EchoReport {
+		return EchoReport{}
+	}
+	runHierarchyImpl = func(hazards, mitigations []map[string]any) HierarchyReport {
+		return HierarchyReport{}
+	}
+)
@@ -0,0 +1,298 @@
+// Package audit provides static and runtime audits of the IACE pattern
+// engine — finding pattern reachability, library consistency, and
+// limits-form coverage gaps without a ground-truth reference.
+package audit
+
+import (
+	"sort"
+
+	"github.com/breakpilot/ai-compliance-sdk/internal/iace"
+)
+
+// ReachabilityResult is the verdict for a single pattern in Method A.
+type ReachabilityResult struct {
+	PatternID        string   `json:"pattern_id"`
+	Name             string   `json:"name_de"`
+	Priority         int      `json:"priority"`
+	RequiredAllTags  []string `json:"required_tags"`
+	UnreachableTags  []string `json:"unreachable_tags,omitempty"`
+	Status           string   `json:"status"` // "reachable" | "weakly_reachable" | "unreachable"
+	ReachableSources []string `json:"reachable_sources,omitempty"`
+	FixSuggestions   []string `json:"fix_suggestions,omitempty"`
+}
+
+// ReachabilityReport is the full Method A output.
+type ReachabilityReport struct {
+	TotalPatterns      int                  `json:"total_patterns"`
+	Reachable          int                  `json:"reachable"`
+	WeaklyReachable    int                  `json:"weakly_reachable"`
+	Unreachable        int                  `json:"unreachable"`
+	UniverseTags       []string             `json:"universe_tags"`
+	UnreachablePatterns []ReachabilityResult `json:"unreachable_patterns"`
+	WeakPatterns       []ReachabilityResult `json:"weak_patterns"`
+}
+
+// RunReachability evaluates every pattern against the achievable tag universe.
+//
+// A pattern is:
+//   - "unreachable" if at least one required tag is not produced by any
+//     component, energy source, or keyword-dictionary entry.
+//   - "weakly_reachable" if all required tags exist in the universe but
+//     no single source (one Component or one EnergySource or one Keyword
+//     entry) supplies all of them at once — i.e., it relies on multiple
+//     parser hits to combine.
+//   - "reachable" if some single source covers all required tags.
+//
+// The classification ignores ExcludedComponentTags and runtime filters
+// (lifecycle/op-state/machine-type), because those are project-level
+// concerns. The audit answers "could this pattern EVER fire", not
+// "does it fire for project X".
+func RunReachability() ReachabilityReport {
+	patterns := iace.AllPatterns()
+	comps := iace.GetComponentLibrary()
+	energies := iace.GetEnergySources()
+	keywords := iace.GetKeywordDictionary()
+
+	// Tag universe: union of every tag emitted anywhere
+	universe := map[string][]string{} // tag → list of source IDs that emit it
+	for _, c := range comps {
+		for _, t := range c.Tags {
+			universe[t] = appendUnique(universe[t], "component:"+c.ID)
+		}
+	}
+	for _, e := range energies {
+		for _, t := range e.Tags {
+			universe[t] = appendUnique(universe[t], "energy:"+e.ID)
+		}
+	}
+	for i, kw := range keywords {
+		for _, t := range kw.ExtraTags {
+			universe[t] = appendUnique(universe[t], keywordLabel(kw, i))
+		}
+		// Keyword entries can also reference components/energies, which
+		// transitively add their tags to the keyword's effective tag set.
+		for _, cID := range kw.ComponentIDs {
+			for _, c := range comps {
+				if c.ID != cID {
+					continue
+				}
+				for _, t := range c.Tags {
+					universe[t] = appendUnique(universe[t], keywordLabel(kw, i))
+				}
+			}
+		}
+		for _, eID := range kw.EnergyIDs {
+			for _, e := range energies {
+				if e.ID != eID {
+					continue
+				}
+				for _, t := range e.Tags {
+					universe[t] = appendUnique(universe[t], keywordLabel(kw, i))
+				}
+			}
+		}
+	}
+
+	// Single-source coverage map: tag → covering sources, but also
+	// per-source tag set so we can check "is there ONE source covering
+	// all required tags".
+	sourceTags := map[string]map[string]bool{}
+	for _, c := range comps {
+		key := "component:" + c.ID
+		sourceTags[key] = toBoolSet(c.Tags)
+	}
+	for _, e := range energies {
+		key := "energy:" + e.ID
+		sourceTags[key] = toBoolSet(e.Tags)
+	}
+	for i, kw := range keywords {
+		key := keywordLabel(kw, i)
+		set := toBoolSet(kw.ExtraTags)
+		for _, cID := range kw.ComponentIDs {
+			for _, c := range comps {
+				if c.ID == cID {
+					for _, t := range c.Tags {
+						set[t] = true
+					}
+				}
+			}
+		}
+		for _, eID := range kw.EnergyIDs {
+			for _, e := range energies {
+				if e.ID == eID {
+					for _, t := range e.Tags {
+						set[t] = true
+					}
+				}
+			}
+		}
+		sourceTags[key] = set
+	}
+
+	report := ReachabilityReport{TotalPatterns: len(patterns)}
+
+	// Universe tag list (sorted) for the report header
+	for t := range universe {
+		report.UniverseTags = append(report.UniverseTags, t)
+	}
+	sort.Strings(report.UniverseTags)
+
+	for _, p := range patterns {
+		all := dedup(append(append([]string{}, p.RequiredComponentTags...), p.RequiredEnergyTags...))
+		if len(all) == 0 {
+			// Pattern with no tag requirements relies on lifecycle/machine_type
+			// filters only — count as reachable by default.
+			report.Reachable++
+			continue
+		}
+
+		var missing []string
+		for _, t := range all {
+			if _, ok := universe[t]; !ok {
+				missing = append(missing, t)
+			}
+		}
+
+		res := ReachabilityResult{
+			PatternID:       p.ID,
+			Name:            p.NameDE,
+			Priority:        p.Priority,
+			RequiredAllTags: all,
+		}
+
+		if len(missing) > 0 {
+			res.Status = "unreachable"
+			res.UnreachableTags = missing
+			res.FixSuggestions = suggestFixes(p, missing, comps, sourceTags)
+			report.Unreachable++
+			report.UnreachablePatterns = append(report.UnreachablePatterns, res)
+			continue
+		}
+
+		// All tags in universe — check single-source coverage
+		single := findSingleSourceCovers(all, sourceTags)
+		if len(single) > 0 {
+			res.Status = "reachable"
+			res.ReachableSources = single
+			report.Reachable++
+			continue
+		}
+
+		res.Status = "weakly_reachable"
+		res.FixSuggestions = suggestSingleSourceFixes(p, all, comps, sourceTags)
+		report.WeaklyReachable++
+		report.WeakPatterns = append(report.WeakPatterns, res)
+	}
+
+	sort.Slice(report.UnreachablePatterns, func(i, j int) bool {
+		return report.UnreachablePatterns[i].Priority > report.UnreachablePatterns[j].Priority
+	})
+	sort.Slice(report.WeakPatterns, func(i, j int) bool {
+		return report.WeakPatterns[i].Priority > report.WeakPatterns[j].Priority
+	})
+	return report
+}
+
+func findSingleSourceCovers(required []string, sourceTags map[string]map[string]bool) []string {
+	var hits []string
+	for src, tags := range sourceTags {
+		ok := true
+		for _, t := range required {
+			if !tags[t] {
+				ok = false
+				break
+			}
+		}
+		if ok {
+			hits = append(hits, src)
+		}
+	}
+	sort.Strings(hits)
+	return hits
+}
+
+// suggestFixes proposes concrete library edits for unreachable patterns:
+// "Add tag X to Component C014 (Hubwerk)" type suggestions.
+func suggestFixes(p iace.HazardPattern, missing []string, comps []iace.ComponentLibraryEntry, sourceTags map[string]map[string]bool) []string {
+	var out []string
+	// For each missing tag, find candidates: components/energies that
+	// would semantically own that tag based on existing tags overlap.
+	for _, tag := range missing {
+		candidates := nearComponents(p, tag, comps, sourceTags)
+		if len(candidates) > 0 {
+			out = append(out, "Add tag '"+tag+"' to one of: "+joinFirst(candidates, 3))
+		} else {
+			out = append(out, "Tag '"+tag+"' is undefined anywhere — needs a new component or energy source carrying it")
+		}
+	}
+	return out
+}
+
+func suggestSingleSourceFixes(p iace.HazardPattern, all []string, comps []iace.ComponentLibraryEntry, sourceTags map[string]map[string]bool) []string {
+	// Find components that match the most required tags, then suggest
+	// adding the residual ones.
+	best := ""
+	bestCover := 0
+	var bestMissing []string
+	for src, tags := range sourceTags {
+		hit := 0
+		var miss []string
+		for _, t := range all {
+			if tags[t] {
+				hit++
+			} else {
+				miss = append(miss, t)
+			}
+		}
+		if hit > bestCover {
+			best, bestCover, bestMissing = src, hit, miss
+		}
+	}
+	if best == "" || bestCover == 0 {
+		return []string{"No single source covers any required tags — pattern needs a new dedicated component"}
+	}
+	if len(bestMissing) == 0 {
+		return nil
+	}
+	return []string{"Closest single source '" + best + "' covers " + itoa(bestCover) + "/" + itoa(len(all)) + " tags. Add missing tags to it: " + joinFirst(bestMissing, 5)}
+}
+
+// nearComponents finds components whose tags overlap most with the pattern's
+// requirements — these are good candidates to receive the missing tag.
+func nearComponents(p iace.HazardPattern, missing string, comps []iace.ComponentLibraryEntry, sourceTags map[string]map[string]bool) []string {
+	required := dedup(append(append([]string{}, p.RequiredComponentTags...), p.RequiredEnergyTags...))
+	required = removeOne(required, missing)
+	if len(required) == 0 {
+		return nil
+	}
+	type scored struct {
+		id    string
+		score int
+	}
+	var scoredList []scored
+	for _, c := range comps {
+		tagSet := toBoolSet(c.Tags)
+		s := 0
+		for _, t := range required {
+			if tagSet[t] {
+				s++
+			}
+		}
+		if s > 0 {
+			scoredList = append(scoredList, scored{id: c.ID + " (" + c.NameDE + ")", score: s})
+		}
+	}
+	sort.Slice(scoredList, func(i, j int) bool { return scoredList[i].score > scoredList[j].score })
+	var out []string
+	for _, s := range scoredList {
+		out = append(out, s.id)
+	}
+	return out
+}
+
+func keywordLabel(kw iace.KeywordEntry, idx int) string {
+	if len(kw.Keywords) > 0 {
+		return "keyword:" + kw.Keywords[0]
+	}
+	return "keyword:" + itoa(idx)
+}
@@ -0,0 +1,84 @@
+package audit
+
+// Stubs for Methods B-E. Each is filled in its own file as the audit
+// suite grows. Keeping the type contracts here lets the CLI compile
+// before each method has its full implementation.
+
+// ============================================================================
+// Method B — Component Self-Consistency
+// ============================================================================
+
+type CategoryGap struct {
+	Category      string   `json:"category"`
+	SuggestedTags []string `json:"suggested_tags"`
+}
+
+type ComponentResult struct {
+	ComponentID          string        `json:"component_id"`
+	NameDE               string        `json:"name_de"`
+	DeclaredCategories   []string      `json:"declared_categories"`
+	CoveredCategories    []string      `json:"covered_categories"`
+	MissingForCategories []CategoryGap `json:"missing_for_categories,omitempty"`
+}
+
+type ConsistencyReport struct {
+	TotalComponents      int               `json:"total_components"`
+	Consistent           int               `json:"consistent"`
+	Incomplete           int               `json:"incomplete"`
+	IncompleteComponents []ComponentResult `json:"incomplete_components"`
+}
+
+// ============================================================================
+// Method C — Limits-Form Vocabulary Diff
+// ============================================================================
+
+type DictionarySuggestion struct {
+	Token      string   `json:"token"`
+	Field      string   `json:"field"`
+	PatternIDs []string `json:"pattern_ids"`
+}
+
+type VocabularyReport struct {
+	UniqueTokens               int                    `json:"unique_tokens"`
+	KnownTokens                []string               `json:"known_tokens"`
+	UnknownTokens              []string               `json:"unknown_tokens"`
+	SuggestedDictionaryEntries []DictionarySuggestion `json:"suggested_dictionary_entries"`
+}
+
+// ============================================================================
+// Method D — Limits-Form Echo
+// ============================================================================
+
+type OrphanedPhrase struct {
+	Field     string  `json:"field"`
+	Phrase    string  `json:"phrase"`
+	BestScore float64 `json:"best_score"`
+}
+
+type EchoReport struct {
+	TotalPhrases    int              `json:"total_phrases"`
+	Echoed          int              `json:"echoed"`
+	Orphaned        int              `json:"orphaned"`
+	OrphanedPhrases []OrphanedPhrase `json:"orphaned_phrases"`
+}
+
+// ============================================================================
+// Method E — Hierarchy Completeness
+// ============================================================================
+
+type HazardHierarchyResult struct {
+	HazardID      string   `json:"hazard_id"`
+	Name          string   `json:"name"`
+	Category      string   `json:"category"`
+	Levels        []string `json:"present_levels"`
+	MissingLevels []string `json:"missing_levels"`
+}
+
+type HierarchyReport struct {
+	TotalHazards      int                     `json:"total_hazards"`
+	Complete          int                     `json:"complete"`
+	MissingDesign     int                     `json:"missing_design"`
+	MissingProtection int                     `json:"missing_protection"`
+	MissingInfo       int                     `json:"missing_information"`
+	IncompleteHazards []HazardHierarchyResult `json:"incomplete_hazards"`
+}
@@ -0,0 +1,62 @@
+package audit
+
+import "strconv"
+
+func appendUnique(list []string, item string) []string {
+	for _, x := range list {
+		if x == item {
+			return list
+		}
+	}
+	return append(list, item)
+}
+
+func toBoolSet(list []string) map[string]bool {
+	s := make(map[string]bool, len(list))
+	for _, x := range list {
+		s[x] = true
+	}
+	return s
+}
+
+func dedup(list []string) []string {
+	seen := map[string]bool{}
+	var out []string
+	for _, x := range list {
+		if !seen[x] {
+			seen[x] = true
+			out = append(out, x)
+		}
+	}
+	return out
+}
+
+func removeOne(list []string, item string) []string {
+	out := make([]string, 0, len(list))
+	for _, x := range list {
+		if x != item {
+			out = append(out, x)
+		}
+	}
+	return out
+}
+
+func joinFirst(list []string, n int) string {
+	if len(list) <= n {
+		return joinAll(list)
+	}
+	return joinAll(list[:n]) + ", ..."
+}
+
+func joinAll(list []string) string {
+	s := ""
+	for i, x := range list {
+		if i > 0 {
+			s += ", "
+		}
+		s += x
+	}
+	return s
+}
+
+func itoa(n int) string { return strconv.Itoa(n) }
@@ -0,0 +1,153 @@
+package audit
+
+import (
+	"regexp"
+	"sort"
+	"strings"
+
+	"github.com/breakpilot/ai-compliance-sdk/internal/iace"
+)
+
+// runVocabularyImpl takes a limits-form payload (the structured machine
+// description filled in by the engineer) and asks: which of its words
+// are unknown to the keyword dictionary yet appear in any pattern's
+// scenario/trigger/harm/zone text? Each such word is a dictionary gap —
+// the engineer typed a term that some pattern is waiting for, but the
+// parser cannot translate it into a tag.
+func init() {
+	runVocabularyImpl = runVocabulary
+}
+
+var tokenRE = regexp.MustCompile(`[a-zäöüßA-ZÄÖÜ]{4,}`)
+
+// German + English stop words that show up in any narrative but carry
+// no engineering meaning. Kept short on purpose — we only want to drop
+// obvious filler.
+var stopWords = map[string]bool{
+	"oder": true, "und": true, "auch": true, "wenn": true, "wird": true,
+	"werden": true, "kann": true, "koennen": true, "soll": true, "muss": true,
+	"sind": true, "eine": true, "einer": true, "einem": true, "einen": true,
+	"diese": true, "dieser": true, "dieses": true, "diesem": true, "diesen": true,
+	"durch": true, "nach": true, "ueber": true, "unter": true, "zwischen": true,
+	"nicht": true, "ohne": true, "fuer": true, "bzw": true, "etc": true,
+	"sowie": true, "siehe": true, "etwa": true, "ggf": true, "the": true,
+	"with": true, "from": true, "this": true, "that": true, "have": true,
+	"insbesondere": true, "ausschliesslich": true, "ebenfalls": true,
+	"jeweils": true, "weitere": true, "weiteren": true, "weiterer": true,
+}
+
+func runVocabulary(form map[string]any) VocabularyReport {
+	limits, ok := form["limits_form"].(map[string]any)
+	if !ok {
+		// Form may already be the inner object
+		limits = form
+	}
+
+	tokens := map[string]bool{}
+	for _, v := range limits {
+		extractTokens(v, tokens)
+	}
+	report := VocabularyReport{UniqueTokens: len(tokens)}
+
+	dictTokens := dictionaryVocabulary()
+
+	for tok := range tokens {
+		if stopWords[tok] {
+			continue
+		}
+		if dictTokenHit(tok, dictTokens) {
+			report.KnownTokens = append(report.KnownTokens, tok)
+		} else {
+			report.UnknownTokens = append(report.UnknownTokens, tok)
+		}
+	}
+	sort.Strings(report.KnownTokens)
+	sort.Strings(report.UnknownTokens)
+
+	// For each unknown token check if any pattern names it
+	patterns := iace.AllPatterns()
+	for _, tok := range report.UnknownTokens {
+		hits := patternsMentioning(tok, patterns)
+		if len(hits) == 0 {
+			continue
+		}
+		report.SuggestedDictionaryEntries = append(report.SuggestedDictionaryEntries, DictionarySuggestion{
+			Token:      tok,
+			PatternIDs: hits,
+		})
+	}
+	sort.Slice(report.SuggestedDictionaryEntries, func(i, j int) bool {
+		return len(report.SuggestedDictionaryEntries[i].PatternIDs) > len(report.SuggestedDictionaryEntries[j].PatternIDs)
+	})
+	return report
+}
+
+func extractTokens(v any, out map[string]bool) {
+	switch x := v.(type) {
+	case string:
+		for _, m := range tokenRE.FindAllString(x, -1) {
+			out[strings.ToLower(m)] = true
+		}
+	case []any:
+		for _, e := range x {
+			extractTokens(e, out)
+		}
+	case map[string]any:
+		for _, e := range x {
+			extractTokens(e, out)
+		}
+	}
+}
+
+// dictionaryVocabulary builds the lowercase set of all keyword strings
+// that the parser will recognize, including normalized forms (umlauts
+// replaced like in the keyword dictionary).
+func dictionaryVocabulary() map[string]bool {
+	out := map[string]bool{}
+	for _, kw := range iace.GetKeywordDictionary() {
+		for _, k := range kw.Keywords {
+			out[strings.ToLower(k)] = true
+		}
+	}
+	return out
+}
+
+// dictTokenHit returns true if the token would be matched by any
+// dictionary entry. Dictionary entries can be substrings, so we treat
+// the dict as a set of stem-like matchers: a token is "known" if it
+// equals a dict word OR contains a dict word as substring OR the dict
+// word contains the token.
+func dictTokenHit(tok string, dict map[string]bool) bool {
+	if dict[tok] {
+		return true
+	}
+	for d := range dict {
+		if strings.Contains(tok, d) || strings.Contains(d, tok) {
+			return true
+		}
+	}
+	return false
+}
+
+// patternsMentioning returns up to 8 pattern IDs whose scenario/trigger/
+// harm/zone text contains the token (case-insensitive substring).
+func patternsMentioning(tok string, patterns []iace.HazardPattern) []string {
+	tokLower := strings.ToLower(tok)
+	seen := map[string]bool{}
+	var out []string
+	for _, p := range patterns {
+		hay := strings.ToLower(p.ScenarioDE + " " + p.TriggerDE + " " + p.HarmDE + " " + p.ZoneDE + " " + p.NameDE)
+		if !strings.Contains(hay, tokLower) {
+			continue
+		}
+		if seen[p.ID] {
+			continue
+		}
+		seen[p.ID] = true
+		out = append(out, p.ID)
+		if len(out) >= 8 {
+			break
+		}
+	}
+	return out
+}