Files
breakpilot-compliance/ai-compliance-sdk/internal/iace/audit/vocabulary_proposer_test.go
T
Benjamin Admin c13aa9183a feat(ai-sdk): vocab->tag proposer (P2 slice 5, type 3)
Extends Method C: for each unknown narrative token that pattern text names, suggest
the keyword_dictionary tag = the RequiredComponentTags shared by the naming
patterns (ranked by frequency, kept only when shared by >=40% of them, top 3).
Surfaces real dictionary gaps like "zwischenkreis" -> stored_energy and
"updates" -> has_software, which close coverage without hand-editing the dict.

Two precision fixes to Method C while here:
- patternsMentioning now matches WHOLE WORDS, not substrings — substring matching
  flagged fragments like "stehen" inside "entstehen" and produced nonsensical
  tag suggestions.
- a token is only proposed with a tag if one is shared by >=40% of its naming
  patterns, so diffuse common verbs (spread across categories) drop out.

Wired into iace-audit propose -> audit-reports/vocab.{md,json}. Residual
common-verb noise is left to the human/LLM filter rather than a hand-grown
stopword list. Type 4 (coverage blind spots) + P3 (pin accepted proposals into a
GT case) remain for slice 6.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-26 10:27:01 +02:00

37 lines
1.2 KiB
Go

package audit
import (
"testing"
"github.com/breakpilot/ai-compliance-sdk/internal/iace"
)
func TestSuggestTagsFor_RanksSharedRequiredTags(t *testing.T) {
byID := map[string]iace.HazardPattern{
"P1": {ID: "P1", RequiredComponentTags: []string{"backflow_risk", "dom_warewashing"}},
"P2": {ID: "P2", RequiredComponentTags: []string{"backflow_risk"}},
"P3": {ID: "P3", RequiredComponentTags: []string{"sharp_edge"}},
}
got := suggestTagsFor([]string{"P1", "P2", "P3"}, byID)
if len(got) == 0 || got[0] != "backflow_risk" {
t.Fatalf("want backflow_risk ranked first (2 patterns), got %v", got)
}
}
func TestSuggestTagsFor_TopThreeStableAlpha(t *testing.T) {
byID := map[string]iace.HazardPattern{
"P1": {ID: "P1", RequiredComponentTags: []string{"d", "b", "a", "c"}},
}
got := suggestTagsFor([]string{"P1"}, byID)
if len(got) != 3 || got[0] != "a" || got[1] != "b" || got[2] != "c" {
t.Fatalf("want stable alpha top-3 [a b c], got %v", got)
}
}
func TestSuggestTagsFor_UnknownPatternIgnored(t *testing.T) {
byID := map[string]iace.HazardPattern{}
if got := suggestTagsFor([]string{"missing"}, byID); len(got) != 0 {
t.Fatalf("want empty for unknown patterns, got %v", got)
}
}