feat(ai-sdk): Advisor Reasoning Stack — Clarity+G1+Concept-Injector+Context-Scope+Term-Resolution+E4-Curation+Intent-Signal
This commit is contained in:
@@ -0,0 +1,65 @@
|
||||
package ucca
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// TermResolution is the E2 (Term Resolution) signal in the Advisor Reasoning Stack.
|
||||
// Expanded drives retrieval internally (unambiguous abbreviations are spelled out so
|
||||
// the embedding/concept layer finds them). Ambiguous is surfaced to the FE, which
|
||||
// resolves it via chat context (E1) or asks the user ("Meinst du X oder Y?"). The
|
||||
// lexicon NEVER auto-maps an ambiguous abbreviation (e.g. DSE) — real-life discipline.
|
||||
type TermResolution struct {
|
||||
Expanded string `json:"-"`
|
||||
Ambiguous []TermAmbiguity `json:"ambiguous,omitempty"`
|
||||
}
|
||||
|
||||
// TermAmbiguity flags one abbreviation the SDK could not resolve deterministically.
|
||||
type TermAmbiguity struct {
|
||||
Abbreviation string `json:"abbreviation"`
|
||||
Candidates []string `json:"candidates"`
|
||||
}
|
||||
|
||||
// abbreviationLexicon maps a (lowercased) abbreviation to its canonical term(s).
|
||||
// >1 candidate = ambiguous → flagged, not expanded. Start small (User-Spec).
|
||||
var abbreviationLexicon = map[string][]string{
|
||||
"dse": {"Datenschutzerklärung", "Datenschutz-Folgenabschätzung"}, // ambiguous — context wins, else ask
|
||||
"dsfa": {"Datenschutz-Folgenabschätzung"},
|
||||
"tom": {"technische und organisatorische Maßnahmen"},
|
||||
"vvt": {"Verzeichnis von Verarbeitungstätigkeiten"},
|
||||
"avv": {"Auftragsverarbeitungsvertrag"},
|
||||
"dsb": {"Datenschutzbeauftragter"},
|
||||
"dpa": {"Data Processing Agreement", "Datenschutzaufsichtsbehörde"}, // ambiguous
|
||||
}
|
||||
|
||||
// ResolveAbbreviations expands unambiguous abbreviations into the query and flags
|
||||
// ambiguous ones. Deterministic: iterates query tokens in order (no map-order
|
||||
// dependence). Whole-word match (case-insensitive) so "TOM" hits but "atom" does not.
|
||||
func ResolveAbbreviations(query string) TermResolution {
|
||||
tr := TermResolution{Expanded: query}
|
||||
words := strings.FieldsFunc(query, func(r rune) bool {
|
||||
return !unicode.IsLetter(r) && !unicode.IsNumber(r)
|
||||
})
|
||||
seen := map[string]bool{}
|
||||
var expansions []string
|
||||
for _, w := range words {
|
||||
lw := strings.ToLower(w)
|
||||
cands, ok := abbreviationLexicon[lw]
|
||||
if !ok || seen[lw] {
|
||||
continue
|
||||
}
|
||||
seen[lw] = true
|
||||
if len(cands) == 1 {
|
||||
expansions = append(expansions, cands[0])
|
||||
} else {
|
||||
tr.Ambiguous = append(tr.Ambiguous, TermAmbiguity{
|
||||
Abbreviation: strings.ToUpper(lw), Candidates: cands,
|
||||
})
|
||||
}
|
||||
}
|
||||
if len(expansions) > 0 {
|
||||
tr.Expanded = query + " " + strings.Join(expansions, " ")
|
||||
}
|
||||
return tr
|
||||
}
|
||||
Reference in New Issue
Block a user