feat(ai-sdk): Advisor Reasoning Stack — Clarity+G1+Concept-Injector+Context-Scope+Term-Resolution+E4-Curation+Intent-Signal

This commit is contained in:
Claude
2026-07-01 15:27:23 +02:00
parent a606000a20
commit e901447096
12 changed files with 902 additions and 10 deletions
+135
View File
@@ -0,0 +1,135 @@
package ucca
import (
"sort"
"strings"
)
// Clarity is the READ-ONLY, INSTRUMENTED clarity-gate signal emitted alongside a
// retrieve response. It does NOT change retrieval or advisor behaviour yet — the
// advisor still answers normally. Once ~30-50 real questions are collected the
// thresholds get finalised and the gate is activated in the advisor flow.
//
// Ambiguity has two independent sources (empirically measured, 12-question set):
// - retrieval scatter: hits spread across many knowledge spaces (low
// concentration / high domain_count) — the retriever itself can't localise.
// - conceptual generality: a general term the corpus OVER-localises (e.g. "PDCA"
// concentrates on datenschutz but is cross-domain) — only an LLM knows this.
// The middle band is where the LLM-intent classifier must decide.
//
// G1 (explicit scope): when the query NAMES a regulation ("... nach TRGS", "CRA
// ...", "MaschinenVO ..."), that explicit context beats the embedding scatter —
// the gate scopes to the named regulation's knowledge space regardless of
// concentration. This is regulation detection, NOT a broad-term list.
type Clarity struct {
Mode string `json:"mode"` // "answer" | "clarify"
Reason string `json:"reason"` // low_concentration | many_domains | high_confidence_scope | middle_band_llm_needed | explicit_scope | no_domain_signal
Concentration float64 `json:"concentration"` // fraction of tagged hits in the dominant knowledge space
DomainCount int `json:"domain_count"` // distinct knowledge spaces in the hits
DominantContext string `json:"dominant_context"` // knowledge-space id (explicit scope wins if the query names a regulation)
CandidateContexts []ClarityContext `json:"candidate_contexts"` // corpus-grounded chips (spaces actually present)
}
// ClarityContext is one corpus-grounded context chip.
type ClarityContext struct {
ID string `json:"id"`
Label string `json:"label"`
Hits int `json:"hits"`
}
// Tiered thresholds — INSTRUMENTED DEFAULTS, calibrate on 30-50 real questions.
const (
clarityMaxConcentration = 0.45 // <= this => clarify (retrieval scatter)
clarityMinDomains = 4 // >= this => clarify (broad spread)
clarityAnswerConc = 0.75 // >= this => answer (confident scope)
)
// QueryKnowledgeSpace detects an EXPLICIT regulation mention in the query and maps
// it to a knowledge space. Regulation detection (authority), not a broad-term list:
// only fires when the user names a concrete regelwerk. "" if none named.
func QueryKnowledgeSpace(query string) string {
q := " " + strings.ToLower(query) + " "
has := func(subs ...string) bool {
for _, s := range subs {
if strings.Contains(q, s) {
return true
}
}
return false
}
switch {
case has("trgs", "trbs", " asr ", "gefahrstoff", "arbeitsplatzgrenzwert", "arbeitsschutz"):
return "arbeitsschutz"
case has("dsgvo", "gdpr", "bdsg", "tdddg", "ttdsg", " dsk ", "edpb", "datenschutz", " dsfa "):
return "datenschutz"
case has(" cra ", "cyber resilience", "nis2", "nis-2", " dora ", "enisa", "bsig", "kritis"):
return "cyber"
case has("ai act", "ki-vo", "ki-verordnung", "ki-system"):
return "ki"
case has("maschinenverordnung", "maschinenvo", "maschvo", "maschinenrichtlinie", " gpsr ", "produktsicherheit"):
return "produktsicherheit"
case has(" mdr ", "medizinprodukt", "medical device"):
return "produktsicherheit"
default:
return ""
}
}
// ClassifyClarity computes the read-only clarity signal. Deterministic tiers on the
// knowledge-space concentration, PLUS the G1 explicit-scope override: if the query
// names a regulation, that scope wins over the embedding scatter.
func ClassifyClarity(query string, results []LegalSearchResult) Clarity {
counts := map[string]int{}
total := 0
for _, r := range results {
if s := KnowledgeSpaceOf(r.RegulationCode); s != "" {
counts[s]++
total++
}
}
cl := Clarity{Mode: "answer", Reason: "high_confidence_scope", CandidateContexts: []ClarityContext{}}
if total == 0 {
cl.Mode, cl.Reason = "clarify", "no_domain_signal"
if ks := QueryKnowledgeSpace(query); ks != "" {
cl.Mode, cl.Reason, cl.DominantContext = "answer", "explicit_scope", ks
}
return cl
}
type kc struct {
id string
n int
}
ks := make([]kc, 0, len(counts))
for id, n := range counts {
ks = append(ks, kc{id, n})
}
sort.Slice(ks, func(i, j int) bool {
if ks[i].n != ks[j].n {
return ks[i].n > ks[j].n
}
return ks[i].id < ks[j].id
})
cl.DominantContext = ks[0].id
cl.Concentration = float64(ks[0].n) / float64(total)
cl.DomainCount = len(counts)
for _, k := range ks {
cl.CandidateContexts = append(cl.CandidateContexts, ClarityContext{
ID: k.id, Label: KnowledgeSpaceLabel[k.id], Hits: k.n,
})
}
switch {
case cl.Concentration <= clarityMaxConcentration:
cl.Mode, cl.Reason = "clarify", "low_concentration"
case cl.DomainCount >= clarityMinDomains:
cl.Mode, cl.Reason = "clarify", "many_domains"
case cl.Concentration >= clarityAnswerConc:
cl.Mode, cl.Reason = "answer", "high_confidence_scope"
default:
cl.Mode, cl.Reason = "answer", "middle_band_llm_needed"
}
// G1: an explicitly named regulation beats the embedding scatter.
if q := QueryKnowledgeSpace(query); q != "" {
cl.Mode, cl.Reason, cl.DominantContext = "answer", "explicit_scope", q
}
return cl
}