From c13aa9183a97e12a8ed8093ec0a7f237bf75d3ac Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Thu, 25 Jun 2026 09:51:12 +0200 Subject: [PATCH] feat(ai-sdk): vocab->tag proposer (P2 slice 5, type 3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends Method C: for each unknown narrative token that pattern text names, suggest the keyword_dictionary tag = the RequiredComponentTags shared by the naming patterns (ranked by frequency, kept only when shared by >=40% of them, top 3). Surfaces real dictionary gaps like "zwischenkreis" -> stored_energy and "updates" -> has_software, which close coverage without hand-editing the dict. Two precision fixes to Method C while here: - patternsMentioning now matches WHOLE WORDS, not substrings — substring matching flagged fragments like "stehen" inside "entstehen" and produced nonsensical tag suggestions. - a token is only proposed with a tag if one is shared by >=40% of its naming patterns, so diffuse common verbs (spread across categories) drop out. Wired into iace-audit propose -> audit-reports/vocab.{md,json}. Residual common-verb noise is left to the human/LLM filter rather than a hand-grown stopword list. Type 4 (coverage blind spots) + P3 (pin accepted proposals into a GT case) remain for slice 6. Co-Authored-By: Claude Opus 4.7 --- ai-compliance-sdk/cmd/iace-audit/propose.go | 31 ++++++++ .../internal/iace/audit/stubs.go | 4 + .../internal/iace/audit/vocabulary.go | 79 +++++++++++++++++-- .../iace/audit/vocabulary_proposer_test.go | 36 +++++++++ 4 files changed, 143 insertions(+), 7 deletions(-) create mode 100644 ai-compliance-sdk/internal/iace/audit/vocabulary_proposer_test.go diff --git a/ai-compliance-sdk/cmd/iace-audit/propose.go b/ai-compliance-sdk/cmd/iace-audit/propose.go index dfab7646..75452b0f 100644 --- a/ai-compliance-sdk/cmd/iace-audit/propose.go +++ b/ai-compliance-sdk/cmd/iace-audit/propose.go @@ -6,8 +6,10 @@ import ( "fmt" "os" "strconv" + "strings" "github.com/breakpilot/ai-compliance-sdk/internal/iace" + "github.com/breakpilot/ai-compliance-sdk/internal/iace/audit" "github.com/breakpilot/ai-compliance-sdk/internal/llm" ) @@ -89,12 +91,25 @@ func cmdPropose(args []string) { writeText("audit-reports/framing.md", iace.RenderFramingQueue(in.MachineType, framing)) writeJSON("audit-reports/framing.json", framing) + // Type 3: vocab->tag proposals (unknown narrative tokens that pattern text + // names as a whole word, with a dominant shared required tag). + vocab := audit.RunVocabulary(map[string]any{"narrative": in.Narrative}) + var vgaps []audit.DictionarySuggestion + for _, s := range vocab.SuggestedDictionaryEntries { + if len(s.SuggestedTags) > 0 { + vgaps = append(vgaps, s) + } + } + writeText("audit-reports/vocab.md", renderVocabQueue(in.MachineType, vgaps)) + writeJSON("audit-reports/vocab.json", vgaps) + printSummary("Method P — Dedup Proposer ("+judge.Name()+")", map[string]int{ "fired_patterns": len(fired), "candidates": len(candidates), "in_queue": len(proposals), "gt_blocked": blocked, "framing_flags": len(framing), + "vocab_gaps": len(vgaps), }) if gt == nil { fmt.Fprintln(os.Stderr, "note: no ground truth provided — GT wall NOT applied (candidates not recall-screened)") @@ -145,3 +160,19 @@ func envFloat(key string, def float64) float64 { } return def } + +func renderVocabQueue(machine string, entries []audit.DictionarySuggestion) string { + var b strings.Builder + fmt.Fprintf(&b, "# Vocab→tag review queue — %s\n\n", machine) + fmt.Fprintf(&b, "%d unknown token(s) appear in pattern text but map to no dictionary tag. Propose-only — a human (or the LLM) confirms the tag, then adds a keyword_dictionary entry and pins a GT case.\n\n", len(entries)) + for i, s := range entries { + tag := "" + if len(s.SuggestedTags) > 0 { + tag = s.SuggestedTags[0] + } + fmt.Fprintf(&b, "## %d. \"%s\" → suggested tag(s): %s\n", i+1, s.Token, strings.Join(s.SuggestedTags, ", ")) + fmt.Fprintf(&b, "- named by %d pattern(s): %s\n", len(s.PatternIDs), strings.Join(s.PatternIDs, ", ")) + fmt.Fprintf(&b, "- suggested action: add keyword_dictionary entry {%q → %s} so narratives mentioning it trigger those patterns; human confirms\n\n", s.Token, tag) + } + return b.String() +} diff --git a/ai-compliance-sdk/internal/iace/audit/stubs.go b/ai-compliance-sdk/internal/iace/audit/stubs.go index 66661168..cd318a14 100644 --- a/ai-compliance-sdk/internal/iace/audit/stubs.go +++ b/ai-compliance-sdk/internal/iace/audit/stubs.go @@ -36,6 +36,10 @@ type DictionarySuggestion struct { Token string `json:"token"` Field string `json:"field"` PatternIDs []string `json:"pattern_ids"` + // SuggestedTags are the RequiredComponentTags shared by the naming patterns, + // ranked by frequency — the candidate tags a keyword_dictionary entry for this + // token would emit so narratives mentioning it can trigger those patterns. + SuggestedTags []string `json:"suggested_tags,omitempty"` } type VocabularyReport struct { diff --git a/ai-compliance-sdk/internal/iace/audit/vocabulary.go b/ai-compliance-sdk/internal/iace/audit/vocabulary.go index b97b427f..a237b58c 100644 --- a/ai-compliance-sdk/internal/iace/audit/vocabulary.go +++ b/ai-compliance-sdk/internal/iace/audit/vocabulary.go @@ -66,14 +66,19 @@ func runVocabulary(form map[string]any) VocabularyReport { // For each unknown token check if any pattern names it patterns := iace.AllPatterns() + byID := make(map[string]iace.HazardPattern, len(patterns)) + for _, p := range patterns { + byID[p.ID] = p + } for _, tok := range report.UnknownTokens { hits := patternsMentioning(tok, patterns) if len(hits) == 0 { continue } report.SuggestedDictionaryEntries = append(report.SuggestedDictionaryEntries, DictionarySuggestion{ - Token: tok, - PatternIDs: hits, + Token: tok, + PatternIDs: hits, + SuggestedTags: suggestTagsFor(hits, byID), }) } sort.Slice(report.SuggestedDictionaryEntries, func(i, j int) bool { @@ -129,18 +134,24 @@ func dictTokenHit(tok string, dict map[string]bool) bool { return false } -// patternsMentioning returns up to 8 pattern IDs whose scenario/trigger/ -// harm/zone text contains the token (case-insensitive substring). +// patternsMentioning returns up to 8 pattern IDs whose scenario/trigger/harm/ +// zone text names the token as a WHOLE WORD. Whole-word (not substring) matching +// is essential: a substring match flags common fragments like "stehen" inside +// "entstehen", producing spurious hits and nonsensical tag suggestions. func patternsMentioning(tok string, patterns []iace.HazardPattern) []string { tokLower := strings.ToLower(tok) seen := map[string]bool{} var out []string for _, p := range patterns { hay := strings.ToLower(p.ScenarioDE + " " + p.TriggerDE + " " + p.HarmDE + " " + p.ZoneDE + " " + p.NameDE) - if !strings.Contains(hay, tokLower) { - continue + matched := false + for _, w := range tokenRE.FindAllString(hay, -1) { + if w == tokLower { + matched = true + break + } } - if seen[p.ID] { + if !matched || seen[p.ID] { continue } seen[p.ID] = true @@ -151,3 +162,57 @@ func patternsMentioning(tok string, patterns []iace.HazardPattern) []string { } return out } + +// suggestTagsFor returns the RequiredComponentTags shared across the naming +// patterns, ranked by how many of them require each tag (ties broken by name), +// top 3. These are the candidate tags a dictionary entry for the token should +// emit so a narrative mentioning the token can trigger those patterns. +func suggestTagsFor(ids []string, byID map[string]iace.HazardPattern) []string { + freq := map[string]int{} + total := 0 + for _, id := range ids { + p, ok := byID[id] + if !ok { + continue + } + total++ + seen := map[string]bool{} + for _, tag := range p.RequiredComponentTags { + if seen[tag] { + continue + } + seen[tag] = true + freq[tag]++ + } + } + if total == 0 { + return nil + } + type tf struct { + tag string + n int + } + ranked := make([]tf, 0, len(freq)) + for t, n := range freq { + ranked = append(ranked, tf{t, n}) + } + sort.Slice(ranked, func(i, j int) bool { + if ranked[i].n != ranked[j].n { + return ranked[i].n > ranked[j].n + } + return ranked[i].tag < ranked[j].tag + }) + // Only suggest a tag shared by >= 40% of the naming patterns. Diffuse tokens + // (common verbs spread across categories) get no dominant tag and are dropped. + var out []string + for _, x := range ranked { + if float64(x.n)/float64(total) < 0.4 { + break + } + out = append(out, x.tag) + if len(out) >= 3 { + break + } + } + return out +} diff --git a/ai-compliance-sdk/internal/iace/audit/vocabulary_proposer_test.go b/ai-compliance-sdk/internal/iace/audit/vocabulary_proposer_test.go new file mode 100644 index 00000000..8f8b0a59 --- /dev/null +++ b/ai-compliance-sdk/internal/iace/audit/vocabulary_proposer_test.go @@ -0,0 +1,36 @@ +package audit + +import ( + "testing" + + "github.com/breakpilot/ai-compliance-sdk/internal/iace" +) + +func TestSuggestTagsFor_RanksSharedRequiredTags(t *testing.T) { + byID := map[string]iace.HazardPattern{ + "P1": {ID: "P1", RequiredComponentTags: []string{"backflow_risk", "dom_warewashing"}}, + "P2": {ID: "P2", RequiredComponentTags: []string{"backflow_risk"}}, + "P3": {ID: "P3", RequiredComponentTags: []string{"sharp_edge"}}, + } + got := suggestTagsFor([]string{"P1", "P2", "P3"}, byID) + if len(got) == 0 || got[0] != "backflow_risk" { + t.Fatalf("want backflow_risk ranked first (2 patterns), got %v", got) + } +} + +func TestSuggestTagsFor_TopThreeStableAlpha(t *testing.T) { + byID := map[string]iace.HazardPattern{ + "P1": {ID: "P1", RequiredComponentTags: []string{"d", "b", "a", "c"}}, + } + got := suggestTagsFor([]string{"P1"}, byID) + if len(got) != 3 || got[0] != "a" || got[1] != "b" || got[2] != "c" { + t.Fatalf("want stable alpha top-3 [a b c], got %v", got) + } +} + +func TestSuggestTagsFor_UnknownPatternIgnored(t *testing.T) { + byID := map[string]iace.HazardPattern{} + if got := suggestTagsFor([]string{"missing"}, byID); len(got) != 0 { + t.Fatalf("want empty for unknown patterns, got %v", got) + } +}