feat(ai-sdk): vocab->tag proposer (P2 slice 5, type 3)

Extends Method C: for each unknown narrative token that pattern text names, suggest the keyword_dictionary tag = the RequiredComponentTags shared by the naming patterns (ranked by frequency, kept only when shared by >=40% of them, top 3). Surfaces real dictionary gaps like "zwischenkreis" -> stored_energy and "updates" -> has_software, which close coverage without hand-editing the dict. Two precision fixes to Method C while here: - patternsMentioning now matches WHOLE WORDS, not substrings — substring matching flagged fragments like "stehen" inside "entstehen" and produced nonsensical tag suggestions. - a token is only proposed with a tag if one is shared by >=40% of its naming patterns, so diffuse common verbs (spread across categories) drop out. Wired into iace-audit propose -> audit-reports/vocab.{md,json}. Residual common-verb noise is left to the human/LLM filter rather than a hand-grown stopword list. Type 4 (coverage blind spots) + P3 (pin accepted proposals into a GT case) remain for slice 6. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-25 09:51:12 +02:00
parent 662aec209a
commit c13aa9183a
4 changed files with 143 additions and 7 deletions
@@ -6,8 +6,10 @@ import (
 	"fmt"
 	"os"
 	"strconv"
+	"strings"

 	"github.com/breakpilot/ai-compliance-sdk/internal/iace"
+	"github.com/breakpilot/ai-compliance-sdk/internal/iace/audit"
 	"github.com/breakpilot/ai-compliance-sdk/internal/llm"
 )

@@ -89,12 +91,25 @@ func cmdPropose(args []string) {
 	writeText("audit-reports/framing.md", iace.RenderFramingQueue(in.MachineType, framing))
 	writeJSON("audit-reports/framing.json", framing)

+	// Type 3: vocab->tag proposals (unknown narrative tokens that pattern text
+	// names as a whole word, with a dominant shared required tag).
+	vocab := audit.RunVocabulary(map[string]any{"narrative": in.Narrative})
+	var vgaps []audit.DictionarySuggestion
+	for _, s := range vocab.SuggestedDictionaryEntries {
+		if len(s.SuggestedTags) > 0 {
+			vgaps = append(vgaps, s)
+		}
+	}
+	writeText("audit-reports/vocab.md", renderVocabQueue(in.MachineType, vgaps))
+	writeJSON("audit-reports/vocab.json", vgaps)
+
 	printSummary("Method P — Dedup Proposer ("+judge.Name()+")", map[string]int{
 		"fired_patterns": len(fired),
 		"candidates":     len(candidates),
 		"in_queue":       len(proposals),
 		"gt_blocked":     blocked,
 		"framing_flags":  len(framing),
+		"vocab_gaps":     len(vgaps),
 	})
 	if gt == nil {
 		fmt.Fprintln(os.Stderr, "note: no ground truth provided — GT wall NOT applied (candidates not recall-screened)")
@@ -145,3 +160,19 @@ func envFloat(key string, def float64) float64 {
 	}
 	return def
 }
+
+func renderVocabQueue(machine string, entries []audit.DictionarySuggestion) string {
+	var b strings.Builder
+	fmt.Fprintf(&b, "# Vocab→tag review queue — %s\n\n", machine)
+	fmt.Fprintf(&b, "%d unknown token(s) appear in pattern text but map to no dictionary tag. Propose-only — a human (or the LLM) confirms the tag, then adds a keyword_dictionary entry and pins a GT case.\n\n", len(entries))
+	for i, s := range entries {
+		tag := "<tag>"
+		if len(s.SuggestedTags) > 0 {
+			tag = s.SuggestedTags[0]
+		}
+		fmt.Fprintf(&b, "## %d. \"%s\"  → suggested tag(s): %s\n", i+1, s.Token, strings.Join(s.SuggestedTags, ", "))
+		fmt.Fprintf(&b, "- named by %d pattern(s): %s\n", len(s.PatternIDs), strings.Join(s.PatternIDs, ", "))
+		fmt.Fprintf(&b, "- suggested action: add keyword_dictionary entry {%q → %s} so narratives mentioning it trigger those patterns; human confirms\n\n", s.Token, tag)
+	}
+	return b.String()
+}
@@ -36,6 +36,10 @@ type DictionarySuggestion struct {
 	Token      string   `json:"token"`
 	Field      string   `json:"field"`
 	PatternIDs []string `json:"pattern_ids"`
+	// SuggestedTags are the RequiredComponentTags shared by the naming patterns,
+	// ranked by frequency — the candidate tags a keyword_dictionary entry for this
+	// token would emit so narratives mentioning it can trigger those patterns.
+	SuggestedTags []string `json:"suggested_tags,omitempty"`
 }

 type VocabularyReport struct {
@@ -66,14 +66,19 @@ func runVocabulary(form map[string]any) VocabularyReport {

 	// For each unknown token check if any pattern names it
 	patterns := iace.AllPatterns()
+	byID := make(map[string]iace.HazardPattern, len(patterns))
+	for _, p := range patterns {
+		byID[p.ID] = p
+	}
 	for _, tok := range report.UnknownTokens {
 		hits := patternsMentioning(tok, patterns)
 		if len(hits) == 0 {
 			continue
 		}
 		report.SuggestedDictionaryEntries = append(report.SuggestedDictionaryEntries, DictionarySuggestion{
-			Token:      tok,
-			PatternIDs: hits,
+			Token:         tok,
+			PatternIDs:    hits,
+			SuggestedTags: suggestTagsFor(hits, byID),
 		})
 	}
 	sort.Slice(report.SuggestedDictionaryEntries, func(i, j int) bool {
@@ -129,18 +134,24 @@ func dictTokenHit(tok string, dict map[string]bool) bool {
 	return false
 }

-// patternsMentioning returns up to 8 pattern IDs whose scenario/trigger/
-// harm/zone text contains the token (case-insensitive substring).
+// patternsMentioning returns up to 8 pattern IDs whose scenario/trigger/harm/
+// zone text names the token as a WHOLE WORD. Whole-word (not substring) matching
+// is essential: a substring match flags common fragments like "stehen" inside
+// "entstehen", producing spurious hits and nonsensical tag suggestions.
 func patternsMentioning(tok string, patterns []iace.HazardPattern) []string {
 	tokLower := strings.ToLower(tok)
 	seen := map[string]bool{}
 	var out []string
 	for _, p := range patterns {
 		hay := strings.ToLower(p.ScenarioDE + " " + p.TriggerDE + " " + p.HarmDE + " " + p.ZoneDE + " " + p.NameDE)
-		if !strings.Contains(hay, tokLower) {
-			continue
+		matched := false
+		for _, w := range tokenRE.FindAllString(hay, -1) {
+			if w == tokLower {
+				matched = true
+				break
+			}
 		}
-		if seen[p.ID] {
+		if !matched || seen[p.ID] {
 			continue
 		}
 		seen[p.ID] = true
@@ -151,3 +162,57 @@ func patternsMentioning(tok string, patterns []iace.HazardPattern) []string {
 	}
 	return out
 }
+
+// suggestTagsFor returns the RequiredComponentTags shared across the naming
+// patterns, ranked by how many of them require each tag (ties broken by name),
+// top 3. These are the candidate tags a dictionary entry for the token should
+// emit so a narrative mentioning the token can trigger those patterns.
+func suggestTagsFor(ids []string, byID map[string]iace.HazardPattern) []string {
+	freq := map[string]int{}
+	total := 0
+	for _, id := range ids {
+		p, ok := byID[id]
+		if !ok {
+			continue
+		}
+		total++
+		seen := map[string]bool{}
+		for _, tag := range p.RequiredComponentTags {
+			if seen[tag] {
+				continue
+			}
+			seen[tag] = true
+			freq[tag]++
+		}
+	}
+	if total == 0 {
+		return nil
+	}
+	type tf struct {
+		tag string
+		n   int
+	}
+	ranked := make([]tf, 0, len(freq))
+	for t, n := range freq {
+		ranked = append(ranked, tf{t, n})
+	}
+	sort.Slice(ranked, func(i, j int) bool {
+		if ranked[i].n != ranked[j].n {
+			return ranked[i].n > ranked[j].n
+		}
+		return ranked[i].tag < ranked[j].tag
+	})
+	// Only suggest a tag shared by >= 40% of the naming patterns. Diffuse tokens
+	// (common verbs spread across categories) get no dominant tag and are dropped.
+	var out []string
+	for _, x := range ranked {
+		if float64(x.n)/float64(total) < 0.4 {
+			break
+		}
+		out = append(out, x.tag)
+		if len(out) >= 3 {
+			break
+		}
+	}
+	return out
+}
@@ -0,0 +1,36 @@
+package audit
+
+import (
+	"testing"
+
+	"github.com/breakpilot/ai-compliance-sdk/internal/iace"
+)
+
+func TestSuggestTagsFor_RanksSharedRequiredTags(t *testing.T) {
+	byID := map[string]iace.HazardPattern{
+		"P1": {ID: "P1", RequiredComponentTags: []string{"backflow_risk", "dom_warewashing"}},
+		"P2": {ID: "P2", RequiredComponentTags: []string{"backflow_risk"}},
+		"P3": {ID: "P3", RequiredComponentTags: []string{"sharp_edge"}},
+	}
+	got := suggestTagsFor([]string{"P1", "P2", "P3"}, byID)
+	if len(got) == 0 || got[0] != "backflow_risk" {
+		t.Fatalf("want backflow_risk ranked first (2 patterns), got %v", got)
+	}
+}
+
+func TestSuggestTagsFor_TopThreeStableAlpha(t *testing.T) {
+	byID := map[string]iace.HazardPattern{
+		"P1": {ID: "P1", RequiredComponentTags: []string{"d", "b", "a", "c"}},
+	}
+	got := suggestTagsFor([]string{"P1"}, byID)
+	if len(got) != 3 || got[0] != "a" || got[1] != "b" || got[2] != "c" {
+		t.Fatalf("want stable alpha top-3 [a b c], got %v", got)
+	}
+}
+
+func TestSuggestTagsFor_UnknownPatternIgnored(t *testing.T) {
+	byID := map[string]iace.HazardPattern{}
+	if got := suggestTagsFor([]string{"missing"}, byID); len(got) != 0 {
+		t.Fatalf("want empty for unknown patterns, got %v", got)
+	}
+}