66 lines
2.4 KiB
Go
66 lines
2.4 KiB
Go
package ucca
|
|
|
|
import (
|
|
"strings"
|
|
"unicode"
|
|
)
|
|
|
|
// TermResolution is the E2 (Term Resolution) signal in the Advisor Reasoning Stack.
|
|
// Expanded drives retrieval internally (unambiguous abbreviations are spelled out so
|
|
// the embedding/concept layer finds them). Ambiguous is surfaced to the FE, which
|
|
// resolves it via chat context (E1) or asks the user ("Meinst du X oder Y?"). The
|
|
// lexicon NEVER auto-maps an ambiguous abbreviation (e.g. DSE) — real-life discipline.
|
|
type TermResolution struct {
|
|
Expanded string `json:"-"`
|
|
Ambiguous []TermAmbiguity `json:"ambiguous,omitempty"`
|
|
}
|
|
|
|
// TermAmbiguity flags one abbreviation the SDK could not resolve deterministically.
|
|
type TermAmbiguity struct {
|
|
Abbreviation string `json:"abbreviation"`
|
|
Candidates []string `json:"candidates"`
|
|
}
|
|
|
|
// abbreviationLexicon maps a (lowercased) abbreviation to its canonical term(s).
|
|
// >1 candidate = ambiguous → flagged, not expanded. Start small (User-Spec).
|
|
var abbreviationLexicon = map[string][]string{
|
|
"dse": {"Datenschutzerklärung", "Datenschutz-Folgenabschätzung"}, // ambiguous — context wins, else ask
|
|
"dsfa": {"Datenschutz-Folgenabschätzung"},
|
|
"tom": {"technische und organisatorische Maßnahmen"},
|
|
"vvt": {"Verzeichnis von Verarbeitungstätigkeiten"},
|
|
"avv": {"Auftragsverarbeitungsvertrag"},
|
|
"dsb": {"Datenschutzbeauftragter"},
|
|
"dpa": {"Data Processing Agreement", "Datenschutzaufsichtsbehörde"}, // ambiguous
|
|
}
|
|
|
|
// ResolveAbbreviations expands unambiguous abbreviations into the query and flags
|
|
// ambiguous ones. Deterministic: iterates query tokens in order (no map-order
|
|
// dependence). Whole-word match (case-insensitive) so "TOM" hits but "atom" does not.
|
|
func ResolveAbbreviations(query string) TermResolution {
|
|
tr := TermResolution{Expanded: query}
|
|
words := strings.FieldsFunc(query, func(r rune) bool {
|
|
return !unicode.IsLetter(r) && !unicode.IsNumber(r)
|
|
})
|
|
seen := map[string]bool{}
|
|
var expansions []string
|
|
for _, w := range words {
|
|
lw := strings.ToLower(w)
|
|
cands, ok := abbreviationLexicon[lw]
|
|
if !ok || seen[lw] {
|
|
continue
|
|
}
|
|
seen[lw] = true
|
|
if len(cands) == 1 {
|
|
expansions = append(expansions, cands[0])
|
|
} else {
|
|
tr.Ambiguous = append(tr.Ambiguous, TermAmbiguity{
|
|
Abbreviation: strings.ToUpper(lw), Candidates: cands,
|
|
})
|
|
}
|
|
}
|
|
if len(expansions) > 0 {
|
|
tr.Expanded = query + " " + strings.Join(expansions, " ")
|
|
}
|
|
return tr
|
|
}
|