feat(ai-sdk): Advisor Reasoning Stack — Clarity+G1+Concept-Injector+Context-Scope+Term-Resolution+E4-Curation+Intent-Signal

2026-07-01 15:27:23 +02:00
parent a606000a20
commit e901447096
12 changed files with 902 additions and 10 deletions
@@ -1,6 +1,9 @@
 package handlers

 import (
+	"encoding/json"
+	"fmt"
+	"log"
 	"net/http"
 	"strconv"

@@ -87,6 +90,7 @@ func (h *RAGHandlers) Search(c *gin.Context) {
 type RetrieveRequest struct {
 	Query string `json:"query" binding:"required"`
 	TopK  int    `json:"top_k,omitempty"`
+	Context string `json:"context,omitempty"`
 }

 // Retrieve is the Authority Router endpoint. The Advisor calls this with ONLY a query and stays
@@ -105,6 +109,13 @@ func (h *RAGHandlers) Retrieve(c *gin.Context) {
 		req.TopK = 8
 	}

+	// E2 Term Resolution: expand unambiguous abbreviations (TOM/VVT/AVV/DSB/DSFA) into the
+	// query so retrieval finds them; ambiguous ones (DSE/DPA) are surfaced to the FE — NOT
+	// auto-mapped (chat context E1 wins, else the FE asks).
+	intent := ucca.DetectIntent(req.Query)
+	termRes := ucca.ResolveAbbreviations(req.Query)
+	req.Query = termRes.Expanded
+
 	results, err := h.ragClient.Retrieve(c.Request.Context(), req.Query, req.TopK)
 	if err != nil {
 		c.JSON(http.StatusInternalServerError, gin.H{"error": "RAG retrieve failed: " + err.Error()})
@@ -114,7 +125,42 @@ func (h *RAGHandlers) Retrieve(c *gin.Context) {
 	// Evidence-Type-Schicht: die autoritative typisierte Evidence (Fußnoten/Tabellen/Figuren) aus
 	// dem KB-Wissensraum SEPARAT surfacen, statt sie im Breit-Basis-Text-Merge zu verlieren.
 	// results[] bleibt der Text-Kontext fürs LLM + die Quellen-Liste.
+	// Context scoping (E5): the user explicitly chose a knowledge space (chip), so scope
+	// the evidence HARD to it (wider re-retrieve + domain filter) — no off-domain regelwerke
+	// (MDR/UStG/eIDAS) after a context decision.
+	if req.Context != "" {
+		if wide, werr := h.ragClient.Retrieve(c.Request.Context(), req.Query, 30); werr == nil && len(wide) > 0 {
+			results = ucca.FilterByKnowledgeSpace(wide, req.Context, req.TopK)
+		} else {
+			results = ucca.FilterByKnowledgeSpace(results, req.Context, req.TopK)
+		}
+	}
+
+	// G1 scope-gating: a named regulation scopes the evidence to its knowledge space.
+	// Re-retrieve wider and lead with the named regulation's domain so the L2 answer +
+	// [n] citations are built on scoped evidence, not the embedding-majority domain.
+	if scope := ucca.QueryKnowledgeSpace(req.Query); scope != "" {
+		if wide, werr := h.ragClient.Retrieve(c.Request.Context(), req.Query, 30); werr == nil && len(wide) > 0 {
+			results = ucca.ScopeResults(wide, scope, req.TopK)
+		} else {
+			results = ucca.ScopeResults(results, scope, req.TopK)
+		}
+	}
+
 	ev := h.ragClient.RetrieveEvidence(c.Request.Context(), req.Query)
+	// Concept->Norm recall injector: if the query names a legal concept, fetch its
+	// load-bearing norms (Datenschutzerklärung -> Art. 12/13/14 DSGVO, ...) and inject
+	// them into the evidence set so they surface (embedding similarity misses them).
+	if norms := ucca.ConceptNorms(req.Query); len(norms) > 0 {
+		top := 0.9
+		if len(results) > 0 {
+			top = results[0].Score
+		}
+		injected := h.ragClient.FetchByNormIDs(c.Request.Context(), norms, top-0.001)
+		results = ucca.InjectConceptNorms(results, injected, req.TopK)
+	}
+	clarity := ucca.ClassifyClarity(req.Query, results)
+	traceClarity(req.Query, clarity, results)

 	c.JSON(http.StatusOK, gin.H{
 		"query":      req.Query,
@@ -123,7 +169,11 @@ func (h *RAGHandlers) Retrieve(c *gin.Context) {
 		"assessment": ucca.Assess(results),
 		"footnotes":  footnotesFromEvidence(ev[ucca.EvidenceFootnote]),
 		"tables":     tablesFromEvidence(ev[ucca.EvidenceTable]),
-		"figures":    figuresFromEvidence(ev[ucca.EvidenceFigure]),
+		"evidence":        evidenceFromResults(results),
+		"visual_evidence": visualEvidenceFromEvidence(ev[ucca.EvidenceFigure]),
+		"clarity":   clarity,
+		"term_resolution": termRes.Ambiguous,
+		"interaction_intent": intent,
 	})
 }

@@ -163,23 +213,67 @@ func tablesFromEvidence(rs []ucca.LegalSearchResult) []gin.H {
 	return out
 }

-// figuresFromEvidence maps FIGURE evidence (C8). Empty until C8 populates figure units; image_url/
-// caption/vision_summary get added here when C8 lands — same path, no router change.
-func figuresFromEvidence(rs []ucca.LegalSearchResult) []gin.H {
+// visualEvidenceFromEvidence maps FIGURE evidence to the Visual Evidence contract shape
+// (C8). visual_type/image_ref/vision_summary populate once C8 lands; the shape is stable now.
+func visualEvidenceFromEvidence(rs []ucca.LegalSearchResult) []gin.H {
 	out := make([]gin.H, 0, len(rs))
 	for _, r := range rs {
 		out = append(out, gin.H{
-			"figure_id":        r.CitationUnit,
-			"caption":          r.ArticleLabel,
-			"regulation_code":  r.RegulationCode,
-			"regulation_short": r.RegulationShort,
-			"regulation_name":  r.RegulationName,
-			"section":          r.RefCitationUnit,
+			"visual_id":       r.CitationUnit,
+			"visual_type":     "figure",
+			"caption":         r.ArticleLabel,
+			"document":        evidenceDocName(r),
+			"context":         ucca.KnowledgeSpaceOf(r.RegulationCode),
+			"regulation_code": r.RegulationCode,
+			"section":         r.RefCitationUnit,
+			"image_ref":       "",
+			"vision_summary":  "",
 		})
 	}
 	return out
 }

+// evidenceFromResults maps retrieval hits to the Evidence contract shape the Advisor
+// Evidence Workspace renders (citations[] reference evidence_id). Populated at retrieve
+// time; citations[] (the [n]<->evidence coupling) come from the answer-generation step.
+func evidenceFromResults(rs []ucca.LegalSearchResult) []gin.H {
+	out := make([]gin.H, 0, len(rs))
+	for _, r := range rs {
+		id := r.CitationUnit
+		if id == "" {
+			id = r.ArticleLabel
+		}
+		out = append(out, gin.H{
+			"evidence_id":     id,
+			"document":        evidenceDocName(r),
+			"section":         r.ArticleLabel,
+			"paragraph":       r.Paragraph,
+			"snippet":         evidenceSnippet(r.Text, 280),
+			"url":             r.SourceURL,
+			"regulation_code": r.RegulationCode,
+			"context":         ucca.KnowledgeSpaceOf(r.RegulationCode),
+		})
+	}
+	return out
+}
+
+// evidenceDocName is the human-facing source name (short code, else full name).
+func evidenceDocName(r ucca.LegalSearchResult) string {
+	if r.RegulationShort != "" {
+		return r.RegulationShort
+	}
+	return r.RegulationName
+}
+
+// evidenceSnippet returns a trimmed excerpt of at most n runes.
+func evidenceSnippet(s string, n int) string {
+	rs := []rune(s)
+	if len(rs) <= n {
+		return s
+	}
+	return string(rs[:n]) + "…"
+}
+
 // ListRegulations returns the list of available regulations in the corpus.
 // GET /sdk/v1/rag/regulations
 func (h *RAGHandlers) ListRegulations(c *gin.Context) {
@@ -334,3 +428,29 @@ func (h *RAGHandlers) LegalCorpusStructure(c *gin.Context) {
 		},
 	})
 }
+
+// traceClarity emits a structured CLARITY_TRACE log line per retrieve for the macmini
+// test session, so qualitative user ratings can be correlated with the gate decision.
+func traceClarity(query string, cl ucca.Clarity, results []ucca.LegalSearchResult) {
+	top := make([]string, 0, 3)
+	for i, r := range results {
+		if i >= 3 {
+			break
+		}
+		top = append(top, r.RegulationShort)
+	}
+	chips := make([]string, 0, len(cl.CandidateContexts))
+	for _, c := range cl.CandidateContexts {
+		chips = append(chips, fmt.Sprintf("%s:%d", c.ID, c.Hits))
+	}
+	b, _ := json.Marshal(map[string]interface{}{
+		"query":         query,
+		"mode":          cl.Mode,
+		"reason":        cl.Reason,
+		"concentration": cl.Concentration,
+		"dominant":      cl.DominantContext,
+		"chips":         chips,
+		"top_evidence":  top,
+	})
+	log.Printf("CLARITY_TRACE %s", string(b))
+}
@@ -0,0 +1,33 @@
+package ucca
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestResolveAbbreviations(t *testing.T) {
+	// unambiguous -> expanded, not flagged
+	tr := ResolveAbbreviations("Was ist eine TOM?")
+	if !strings.Contains(tr.Expanded, "technische und organisatorische") {
+		t.Errorf("TOM must be expanded, got %q", tr.Expanded)
+	}
+	if len(tr.Ambiguous) != 0 {
+		t.Errorf("TOM must not be ambiguous, got %v", tr.Ambiguous)
+	}
+	// ambiguous DSE -> flagged, NOT auto-expanded (chat context must win, else FE asks)
+	tr2 := ResolveAbbreviations("welche Infos in eine DSE?")
+	if tr2.Expanded != "welche Infos in eine DSE?" {
+		t.Errorf("DSE must NOT be auto-mapped, got %q", tr2.Expanded)
+	}
+	if len(tr2.Ambiguous) != 1 || tr2.Ambiguous[0].Abbreviation != "DSE" || len(tr2.Ambiguous[0].Candidates) != 2 {
+		t.Errorf("DSE must be flagged ambiguous with 2 candidates, got %v", tr2.Ambiguous)
+	}
+	// no abbreviation -> unchanged
+	if ResolveAbbreviations("Wie ist das Wetter?").Expanded != "Wie ist das Wetter?" {
+		t.Errorf("query without abbreviation must be unchanged")
+	}
+	// substring must NOT match ("atom" contains "tom" but is not the word TOM)
+	if strings.Contains(ResolveAbbreviations("Was ist ein Atom?").Expanded, "organisatorische") {
+		t.Errorf("substring 'tom' in 'Atom' must not trigger expansion")
+	}
+}
@@ -0,0 +1,65 @@
+package ucca
+
+import (
+	"strings"
+	"unicode"
+)
+
+// TermResolution is the E2 (Term Resolution) signal in the Advisor Reasoning Stack.
+// Expanded drives retrieval internally (unambiguous abbreviations are spelled out so
+// the embedding/concept layer finds them). Ambiguous is surfaced to the FE, which
+// resolves it via chat context (E1) or asks the user ("Meinst du X oder Y?"). The
+// lexicon NEVER auto-maps an ambiguous abbreviation (e.g. DSE) — real-life discipline.
+type TermResolution struct {
+	Expanded  string          `json:"-"`
+	Ambiguous []TermAmbiguity `json:"ambiguous,omitempty"`
+}
+
+// TermAmbiguity flags one abbreviation the SDK could not resolve deterministically.
+type TermAmbiguity struct {
+	Abbreviation string   `json:"abbreviation"`
+	Candidates   []string `json:"candidates"`
+}
+
+// abbreviationLexicon maps a (lowercased) abbreviation to its canonical term(s).
+// >1 candidate = ambiguous → flagged, not expanded. Start small (User-Spec).
+var abbreviationLexicon = map[string][]string{
+	"dse":  {"Datenschutzerklärung", "Datenschutz-Folgenabschätzung"}, // ambiguous — context wins, else ask
+	"dsfa": {"Datenschutz-Folgenabschätzung"},
+	"tom":  {"technische und organisatorische Maßnahmen"},
+	"vvt":  {"Verzeichnis von Verarbeitungstätigkeiten"},
+	"avv":  {"Auftragsverarbeitungsvertrag"},
+	"dsb":  {"Datenschutzbeauftragter"},
+	"dpa":  {"Data Processing Agreement", "Datenschutzaufsichtsbehörde"}, // ambiguous
+}
+
+// ResolveAbbreviations expands unambiguous abbreviations into the query and flags
+// ambiguous ones. Deterministic: iterates query tokens in order (no map-order
+// dependence). Whole-word match (case-insensitive) so "TOM" hits but "atom" does not.
+func ResolveAbbreviations(query string) TermResolution {
+	tr := TermResolution{Expanded: query}
+	words := strings.FieldsFunc(query, func(r rune) bool {
+		return !unicode.IsLetter(r) && !unicode.IsNumber(r)
+	})
+	seen := map[string]bool{}
+	var expansions []string
+	for _, w := range words {
+		lw := strings.ToLower(w)
+		cands, ok := abbreviationLexicon[lw]
+		if !ok || seen[lw] {
+			continue
+		}
+		seen[lw] = true
+		if len(cands) == 1 {
+			expansions = append(expansions, cands[0])
+		} else {
+			tr.Ambiguous = append(tr.Ambiguous, TermAmbiguity{
+				Abbreviation: strings.ToUpper(lw), Candidates: cands,
+			})
+		}
+	}
+	if len(expansions) > 0 {
+		tr.Expanded = query + " " + strings.Join(expansions, " ")
+	}
+	return tr
+}
@@ -0,0 +1,135 @@
+package ucca
+
+import (
+	"sort"
+	"strings"
+)
+
+// Clarity is the READ-ONLY, INSTRUMENTED clarity-gate signal emitted alongside a
+// retrieve response. It does NOT change retrieval or advisor behaviour yet — the
+// advisor still answers normally. Once ~30-50 real questions are collected the
+// thresholds get finalised and the gate is activated in the advisor flow.
+//
+// Ambiguity has two independent sources (empirically measured, 12-question set):
+//   - retrieval scatter: hits spread across many knowledge spaces (low
+//     concentration / high domain_count) — the retriever itself can't localise.
+//   - conceptual generality: a general term the corpus OVER-localises (e.g. "PDCA"
+//     concentrates on datenschutz but is cross-domain) — only an LLM knows this.
+//     The middle band is where the LLM-intent classifier must decide.
+//
+// G1 (explicit scope): when the query NAMES a regulation ("... nach TRGS", "CRA
+// ...", "MaschinenVO ..."), that explicit context beats the embedding scatter —
+// the gate scopes to the named regulation's knowledge space regardless of
+// concentration. This is regulation detection, NOT a broad-term list.
+type Clarity struct {
+	Mode              string           `json:"mode"`               // "answer" | "clarify"
+	Reason            string           `json:"reason"`             // low_concentration | many_domains | high_confidence_scope | middle_band_llm_needed | explicit_scope | no_domain_signal
+	Concentration     float64          `json:"concentration"`      // fraction of tagged hits in the dominant knowledge space
+	DomainCount       int              `json:"domain_count"`       // distinct knowledge spaces in the hits
+	DominantContext   string           `json:"dominant_context"`   // knowledge-space id (explicit scope wins if the query names a regulation)
+	CandidateContexts []ClarityContext `json:"candidate_contexts"` // corpus-grounded chips (spaces actually present)
+}
+
+// ClarityContext is one corpus-grounded context chip.
+type ClarityContext struct {
+	ID    string `json:"id"`
+	Label string `json:"label"`
+	Hits  int    `json:"hits"`
+}
+
+// Tiered thresholds — INSTRUMENTED DEFAULTS, calibrate on 30-50 real questions.
+const (
+	clarityMaxConcentration = 0.45 // <= this => clarify (retrieval scatter)
+	clarityMinDomains       = 4    // >= this => clarify (broad spread)
+	clarityAnswerConc       = 0.75 // >= this => answer (confident scope)
+)
+
+// QueryKnowledgeSpace detects an EXPLICIT regulation mention in the query and maps
+// it to a knowledge space. Regulation detection (authority), not a broad-term list:
+// only fires when the user names a concrete regelwerk. "" if none named.
+func QueryKnowledgeSpace(query string) string {
+	q := " " + strings.ToLower(query) + " "
+	has := func(subs ...string) bool {
+		for _, s := range subs {
+			if strings.Contains(q, s) {
+				return true
+			}
+		}
+		return false
+	}
+	switch {
+	case has("trgs", "trbs", " asr ", "gefahrstoff", "arbeitsplatzgrenzwert", "arbeitsschutz"):
+		return "arbeitsschutz"
+	case has("dsgvo", "gdpr", "bdsg", "tdddg", "ttdsg", " dsk ", "edpb", "datenschutz", " dsfa "):
+		return "datenschutz"
+	case has(" cra ", "cyber resilience", "nis2", "nis-2", " dora ", "enisa", "bsig", "kritis"):
+		return "cyber"
+	case has("ai act", "ki-vo", "ki-verordnung", "ki-system"):
+		return "ki"
+	case has("maschinenverordnung", "maschinenvo", "maschvo", "maschinenrichtlinie", " gpsr ", "produktsicherheit"):
+		return "produktsicherheit"
+	case has(" mdr ", "medizinprodukt", "medical device"):
+		return "produktsicherheit"
+	default:
+		return ""
+	}
+}
+
+// ClassifyClarity computes the read-only clarity signal. Deterministic tiers on the
+// knowledge-space concentration, PLUS the G1 explicit-scope override: if the query
+// names a regulation, that scope wins over the embedding scatter.
+func ClassifyClarity(query string, results []LegalSearchResult) Clarity {
+	counts := map[string]int{}
+	total := 0
+	for _, r := range results {
+		if s := KnowledgeSpaceOf(r.RegulationCode); s != "" {
+			counts[s]++
+			total++
+		}
+	}
+	cl := Clarity{Mode: "answer", Reason: "high_confidence_scope", CandidateContexts: []ClarityContext{}}
+	if total == 0 {
+		cl.Mode, cl.Reason = "clarify", "no_domain_signal"
+		if ks := QueryKnowledgeSpace(query); ks != "" {
+			cl.Mode, cl.Reason, cl.DominantContext = "answer", "explicit_scope", ks
+		}
+		return cl
+	}
+	type kc struct {
+		id string
+		n  int
+	}
+	ks := make([]kc, 0, len(counts))
+	for id, n := range counts {
+		ks = append(ks, kc{id, n})
+	}
+	sort.Slice(ks, func(i, j int) bool {
+		if ks[i].n != ks[j].n {
+			return ks[i].n > ks[j].n
+		}
+		return ks[i].id < ks[j].id
+	})
+	cl.DominantContext = ks[0].id
+	cl.Concentration = float64(ks[0].n) / float64(total)
+	cl.DomainCount = len(counts)
+	for _, k := range ks {
+		cl.CandidateContexts = append(cl.CandidateContexts, ClarityContext{
+			ID: k.id, Label: KnowledgeSpaceLabel[k.id], Hits: k.n,
+		})
+	}
+	switch {
+	case cl.Concentration <= clarityMaxConcentration:
+		cl.Mode, cl.Reason = "clarify", "low_concentration"
+	case cl.DomainCount >= clarityMinDomains:
+		cl.Mode, cl.Reason = "clarify", "many_domains"
+	case cl.Concentration >= clarityAnswerConc:
+		cl.Mode, cl.Reason = "answer", "high_confidence_scope"
+	default:
+		cl.Mode, cl.Reason = "answer", "middle_band_llm_needed"
+	}
+	// G1: an explicitly named regulation beats the embedding scatter.
+	if q := QueryKnowledgeSpace(query); q != "" {
+		cl.Mode, cl.Reason, cl.DominantContext = "answer", "explicit_scope", q
+	}
+	return cl
+}
@@ -0,0 +1,64 @@
+package ucca
+
+import "testing"
+
+func TestKnowledgeSpaceOf(t *testing.T) {
+	cases := map[string]string{
+		"DSGVO":                "datenschutz",
+		"BDSG":                 "datenschutz",
+		"DSK SDM B51 ZUGRIFFE": "datenschutz",
+		"EDPS DIGITAL ETHICS":  "datenschutz",
+		"TRGS 900":             "arbeitsschutz",
+		"OSHA 1910 SUBPART O":  "arbeitsschutz",
+		"HGB":                  "wirtschaftsrecht",
+		"BGB":                  "wirtschaftsrecht",
+		"MASCHINENVO":          "produktsicherheit",
+		"MVO":                  "produktsicherheit",
+		"CRA":                  "cyber",
+		"NIST SP800 53R5":      "cyber",
+		"AI ACT":               "ki",
+		"KI-VO":                "ki",
+		"DORA":                 "finanz",
+		"ARG":                  "arbeitsrecht",
+		"":                     "",
+	}
+	for code, want := range cases {
+		if got := KnowledgeSpaceOf(code); got != want {
+			t.Errorf("KnowledgeSpaceOf(%q)=%q want %q", code, got, want)
+		}
+	}
+}
+
+func TestClassifyClarity(t *testing.T) {
+	scattered := []LegalSearchResult{
+		{RegulationCode: "CRA"}, {RegulationCode: "MASCHINENVO"}, {RegulationCode: "EU MDR"},
+		{RegulationCode: "KI-VO"}, {RegulationCode: "TRBS 1111"}, {RegulationCode: "OWASP TOP10"},
+	}
+	if c := ClassifyClarity("Welche Risiken gibt es?", scattered); c.Mode != "clarify" {
+		t.Errorf("scattered: mode=%q reason=%q want clarify", c.Mode, c.Reason)
+	}
+	concentrated := []LegalSearchResult{
+		{RegulationCode: "DSGVO"}, {RegulationCode: "BDSG"}, {RegulationCode: "DSK SDM"},
+		{RegulationCode: "EDPB WP243"}, {RegulationCode: "TDDDG"},
+	}
+	c := ClassifyClarity("Was ist eine DSFA?", concentrated)
+	if c.Mode != "answer" || c.DominantContext != "datenschutz" {
+		t.Errorf("concentrated: mode=%q dominant=%q want answer/datenschutz", c.Mode, c.DominantContext)
+	}
+}
+
+func TestClassifyClarity_ExplicitScope(t *testing.T) {
+	// G1: query names TRGS -> arbeitsschutz wins even though retrieval scatters to datenschutz.
+	scattered := []LegalSearchResult{
+		{RegulationCode: "DSK SDM METHODE"}, {RegulationCode: "DSK SDM V31"}, {RegulationCode: "DSK SDM B41 PLANEN"},
+		{RegulationCode: "DSGVO"}, {RegulationCode: "DSK SDM"}, {RegulationCode: "TRGS 900"}, {RegulationCode: "TRGS 554"},
+	}
+	c := ClassifyClarity("Schwellwertanalyse nach TRGS", scattered)
+	if c.Mode != "answer" || c.Reason != "explicit_scope" || c.DominantContext != "arbeitsschutz" {
+		t.Errorf("explicit TRGS: mode=%q reason=%q dominant=%q want answer/explicit_scope/arbeitsschutz", c.Mode, c.Reason, c.DominantContext)
+	}
+	// no regulation named -> falls through to tiered logic
+	if c := ClassifyClarity("Welche Risiken gibt es?", scattered); c.Reason == "explicit_scope" {
+		t.Errorf("no reg named should not be explicit_scope, got %q", c.Reason)
+	}
+}
@@ -0,0 +1,88 @@
+package ucca
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"net/http"
+)
+
+// FetchByNormIDs loads one representative unit per norm_id from the KB slice
+// collection — the fetch side of the Concept->Norm recall injector. Returns
+// LegalSearchResult with the caller-provided concept-relevance score (there is no
+// similarity query; the injector places them by that score). Returns nil on any
+// error or when no KB slice is configured (graceful degradation).
+func (c *LegalRAGClient) FetchByNormIDs(ctx context.Context, normIDs []string, score float64) []LegalSearchResult {
+	if c.kbSliceCollection == "" || len(normIDs) == 0 {
+		return nil
+	}
+	should := make([]map[string]interface{}, 0, len(normIDs))
+	for _, nid := range normIDs {
+		should = append(should, map[string]interface{}{"key": "norm_id", "match": map[string]interface{}{"value": nid}})
+	}
+	reqBody := map[string]interface{}{
+		"limit":        len(normIDs) * 3,
+		"with_payload": true,
+		"with_vectors": false,
+		"filter":       map[string]interface{}{"should": should},
+	}
+	jsonBody, err := json.Marshal(reqBody)
+	if err != nil {
+		return nil
+	}
+	url := fmt.Sprintf("%s/collections/%s/points/scroll", c.qdrantURL, c.kbSliceCollection)
+	req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(jsonBody))
+	if err != nil {
+		return nil
+	}
+	req.Header.Set("Content-Type", "application/json")
+	if c.qdrantAPIKey != "" {
+		req.Header.Set("api-key", c.qdrantAPIKey)
+	}
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil
+	}
+	defer func() { _ = resp.Body.Close() }()
+	if resp.StatusCode != http.StatusOK {
+		return nil
+	}
+	var scrollResp qdrantScrollResponse
+	if err := json.NewDecoder(resp.Body).Decode(&scrollResp); err != nil {
+		return nil
+	}
+	seen := map[string]bool{}
+	out := make([]LegalSearchResult, 0, len(normIDs))
+	for _, pt := range scrollResp.Result.Points {
+		nid := getString(pt.Payload, "norm_id")
+		if nid == "" || seen[nid] {
+			continue
+		}
+		seen[nid] = true
+		out = append(out, scrollPointToResult(pt.Payload, score))
+	}
+	return out
+}
+
+// scrollPointToResult maps a scroll-point payload to a LegalSearchResult. Mirrors
+// hitsToResults' payload keys; the score is assigned by the caller (concept rank).
+func scrollPointToResult(payload map[string]interface{}, score float64) LegalSearchResult {
+	regCode := getString(payload, "regulation_code")
+	if regCode == "" {
+		regCode = getString(payload, "regulation_id")
+	}
+	return LegalSearchResult{
+		Text:            getString(payload, "chunk_text"),
+		RegulationCode:  regCode,
+		RegulationName:  getString(payload, "regulation_name_de"),
+		RegulationShort: getString(payload, "regulation_short"),
+		Category:        getString(payload, "category"),
+		Article:         getString(payload, "article"),
+		ArticleLabel:    getString(payload, "article_label"),
+		Paragraph:       getString(payload, "paragraph"),
+		SourceURL:       getString(payload, "source_url"),
+		CitationUnit:    getString(payload, "citation_unit"),
+		Score:           score,
+	}
+}
@@ -0,0 +1,97 @@
+package ucca
+
+import (
+	"sort"
+	"strings"
+)
+
+// Legal Concept Ontology — the fachliche IP bridge for the Concept->Norm recall
+// injector. The words users type ("Datenschutzerklärung", "Cookie Banner") are
+// rarely identical to the article titles that actually govern them (Art. 12/13/14
+// DSGVO, § 25 TDDDG). Embedding similarity misses this leap, so these bridges are
+// curated: concept keyword -> load-bearing norm_ids. This is NOT a fallback to
+// hardcoding — it is domain knowledge that surfaces the normatively load-bearing
+// units within the (already correctly retrieved) documents.
+type conceptNorm struct {
+	keywords []string
+	normIDs  []string
+}
+
+var legalConceptOntology = []conceptNorm{
+	{[]string{"datenschutzerklärung", "datenschutzerklaerung", "privacy policy", "datenschutzhinweise", "datenschutzinformation"},
+		[]string{"EU-DSGVO-Art12", "EU-DSGVO-Art13", "EU-DSGVO-Art14"}},
+	{[]string{"cookie banner", "cookie-banner", "cookies", "cookie", "tracking"},
+		[]string{"DE-TDDDG-§25", "EU-DSGVO-Art6", "EU-DSGVO-Art7"}},
+	{[]string{"dsfa", "folgenabschätzung", "folgenabschaetzung", "datenschutz-folgenabschätzung"},
+		[]string{"EU-DSGVO-Art35", "EU-DSGVO-Art36"}},
+	{[]string{"auskunft", "auskunftsrecht", "auskunftsersuchen"},
+		[]string{"EU-DSGVO-Art15"}},
+	{[]string{"löschung", "loeschung", "vergessenwerden", "recht auf vergessen"},
+		[]string{"EU-DSGVO-Art17"}},
+	{[]string{"datenübertragbarkeit", "datenuebertragbarkeit", "portabilität", "portabilitaet"},
+		[]string{"EU-DSGVO-Art20"}},
+	{[]string{"widerspruch", "widerspruchsrecht"},
+		[]string{"EU-DSGVO-Art21"}},
+	{[]string{"datenpanne", "datenschutzverletzung", "data breach", "verletzung des schutzes"},
+		[]string{"EU-DSGVO-Art33", "EU-DSGVO-Art34"}},
+	// E4-Quick-Curation (2026-07-01): resolved abbreviations (E2) pull their core norms.
+	{[]string{"technische und organisatorische maßnahmen", "technische und organisatorische massnahmen"},
+		[]string{"EU-DSGVO-Art32", "EU-DSGVO-Art25", "EU-DSGVO-Art5"}},
+	{[]string{"verzeichnis von verarbeitungstätigkeiten", "verzeichnis von verarbeitungstaetigkeiten", "verarbeitungsverzeichnis"},
+		[]string{"EU-DSGVO-Art30"}},
+	{[]string{"auftragsverarbeitungsvertrag", "auftragsverarbeitung", "auftragsverarbeiter"},
+		[]string{"EU-DSGVO-Art28"}},
+	{[]string{"datenschutzbeauftragt"},
+		[]string{"EU-DSGVO-Art37", "EU-DSGVO-Art38", "EU-DSGVO-Art39"}},
+}
+
+// ConceptNorms returns the load-bearing norm_ids for the concepts named in the
+// query (dedup, order-preserving). Empty if no concept is named.
+func ConceptNorms(query string) []string {
+	q := strings.ToLower(query)
+	seen := map[string]bool{}
+	out := []string{}
+	for _, cn := range legalConceptOntology {
+		for _, kw := range cn.keywords {
+			if strings.Contains(q, kw) {
+				for _, nid := range cn.normIDs {
+					if !seen[nid] {
+						seen[nid] = true
+						out = append(out, nid)
+					}
+				}
+				break
+			}
+		}
+	}
+	return out
+}
+
+// InjectConceptNorms merges concept-injected norm units into the results so the
+// load-bearing norms are VISIBLE in the evidence set. Dedups by citation_unit
+// (skips norms already retrieved), then re-sorts by score — the injected units
+// carry a just-below-top score so they surface high WITHOUT displacing the top
+// document hit (inject, don't blindly dominate). Caps at topK.
+func InjectConceptNorms(results, injected []LegalSearchResult, topK int) []LegalSearchResult {
+	if len(injected) == 0 {
+		return results
+	}
+	present := map[string]bool{}
+	for _, r := range results {
+		if r.CitationUnit != "" {
+			present[r.CitationUnit] = true
+		}
+	}
+	merged := append([]LegalSearchResult{}, results...)
+	for _, in := range injected {
+		if in.CitationUnit != "" && !present[in.CitationUnit] {
+			merged = append(merged, in)
+			present[in.CitationUnit] = true
+		}
+	}
+	sort.SliceStable(merged, func(i, j int) bool { return merged[i].Score > merged[j].Score })
+	if topK > 0 && len(merged) > topK {
+		merged = merged[:topK]
+	}
+	return merged
+}
@@ -0,0 +1,48 @@
+package ucca
+
+import "testing"
+
+func TestConceptNorms(t *testing.T) {
+	q := "Was muss ich beachten wenn ich meine Datenschutzerklärung schreibe für meine Website mit Cookie Banner?"
+	got := ConceptNorms(q)
+	want := map[string]bool{
+		"EU-DSGVO-Art12": true, "EU-DSGVO-Art13": true, "EU-DSGVO-Art14": true,
+		"DE-TDDDG-§25": true, "EU-DSGVO-Art6": true, "EU-DSGVO-Art7": true,
+	}
+	for _, nid := range got {
+		delete(want, nid)
+	}
+	if len(want) > 0 {
+		t.Errorf("ConceptNorms missing %v; got %v", want, got)
+	}
+	if len(ConceptNorms("Wie ist das Wetter heute?")) != 0 {
+		t.Errorf("no concept named should yield no norms")
+	}
+}
+
+func TestInjectConceptNorms(t *testing.T) {
+	results := []LegalSearchResult{
+		{CitationUnit: "DSK OH Telemedien", Score: 0.98},
+		{CitationUnit: "Art. 25 DSGVO", Score: 0.95},
+	}
+	injected := []LegalSearchResult{
+		{CitationUnit: "Art. 13 DSGVO", Score: 0.979},
+		{CitationUnit: "Art. 25 DSGVO", Score: 0.979}, // already present -> must not double
+	}
+	out := InjectConceptNorms(results, injected, 10)
+	if out[0].CitationUnit != "DSK OH Telemedien" {
+		t.Errorf("top document hit must stay #1 (not dominated), got %s", out[0].CitationUnit)
+	}
+	if len(out) != 3 {
+		t.Errorf("expected 3 (Art.25 not duplicated), got %d", len(out))
+	}
+	found := false
+	for _, r := range out {
+		if r.CitationUnit == "Art. 13 DSGVO" {
+			found = true
+		}
+	}
+	if !found {
+		t.Errorf("Art. 13 DSGVO must be injected + visible")
+	}
+}
@@ -0,0 +1,26 @@
+package ucca
+
+import "testing"
+
+func TestFilterByKnowledgeSpace(t *testing.T) {
+	results := []LegalSearchResult{
+		{CitationUnit: "Art. 13 DSGVO", RegulationCode: "DSGVO"},
+		{CitationUnit: "EU Mdr", RegulationCode: "EU MDR"},
+		{CitationUnit: "UStG § 14", RegulationCode: "USTG"},
+		{CitationUnit: "DSK OH Telemedien", RegulationCode: "DSK OH TELEMEDIEN"},
+		{CitationUnit: "eIDAS", RegulationCode: "EIDAS"},
+	}
+	out := FilterByKnowledgeSpace(results, "datenschutz", 10)
+	for _, r := range out {
+		if KnowledgeSpaceOf(r.RegulationCode) != "datenschutz" {
+			t.Errorf("off-domain leaked into scoped result: %s (%s)", r.CitationUnit, r.RegulationCode)
+		}
+	}
+	if len(out) != 2 { // Art. 13 DSGVO + DSK OH Telemedien
+		t.Errorf("expected 2 datenschutz hits, got %d", len(out))
+	}
+	// domain with no hits -> fall back to input (never strand the answer)
+	if len(FilterByKnowledgeSpace(results, "maschinen", 10)) != len(results) {
+		t.Errorf("no-hit domain should fall back to full input")
+	}
+}
@@ -0,0 +1,46 @@
+package ucca
+
+import "strings"
+
+// DetectIntent classifies the INTERACTION INTENT of a query (Advisor Reasoning
+// Stack E3). The same norms answer very differently depending on the TASK the user
+// wants: "Was ist X?" (definition) vs "Wie schreibe ich X?" (anleitung) vs "Prüfe X"
+// (review). The SDK detects the intent deterministically and emits it; the FE picks
+// the answer FORM, so the LLM gets a precise assignment ("write an Anleitung over
+// this evidence") instead of guessing the format. Returns "" (neutral) when no
+// clear task is signalled. First tier of ~20-30 intent types.
+func DetectIntent(query string) string {
+	q := " " + strings.ToLower(query) + " "
+	has := func(subs ...string) bool {
+		for _, s := range subs {
+			if strings.Contains(q, s) {
+				return true
+			}
+		}
+		return false
+	}
+	switch {
+	case has("prüfe", "prüf mein", "überprüfe", "überprüf", "review", "checke mein",
+		"ist mein", "ist meine", "ist unser", "ist unsere", "konform", "stimmt mein",
+		"bewerte mein", "analysiere mein"):
+		return "review"
+	case has("checkliste", "was muss ich alles", "was gehört alles", "was gehört in",
+		"welche punkte muss", "was brauche ich alles"):
+		return "checkliste"
+	case has("vergleich", "unterschied", "worin unterscheid", " vs ", " versus ",
+		"gegenüber", "im gegensatz"):
+		return "vergleich"
+	case has("wie schreibe", "wie erstelle", "wie erstell", "wie mache", "wie baue",
+		"wie setze ich", "wie gehe ich vor", "wie formuliere", "wie richte ich",
+		"anleitung", "schritt für schritt", "schritt-für-schritt", "erstelle mir",
+		"erstell mir", "generiere", "was muss ich beachten", "worauf muss ich achten"):
+		return "anleitung"
+	case has("welche risiken", "welche gefahren", "risikoanalyse", "welche bedrohungen"):
+		return "risikoanalyse"
+	case has("was ist", "was bedeutet", "was versteht man", "was sind", "definition",
+		"erkläre mir", "erklär mir", "was heißt", "was genau ist"):
+		return "definition"
+	default:
+		return ""
+	}
+}
@@ -0,0 +1,22 @@
+package ucca
+
+import "testing"
+
+func TestDetectIntent(t *testing.T) {
+	cases := map[string]string{
+		"Was ist eine Datenschutzerklärung?":                 "definition",
+		"Wie schreibe ich eine Datenschutzerklärung?":        "anleitung",
+		"Was muss ich beachten wenn ich eine DSE schreibe?":  "anleitung",
+		"Prüfe meine Datenschutzerklärung.":                  "review",
+		"Ist meine Datenschutzerklärung konform?":            "review",
+		"Vergleiche DSGVO und BDSG.":                         "vergleich",
+		"Welche Risiken gibt es?":                            "risikoanalyse",
+		"Erstelle mir eine Checkliste für die DSFA.":         "checkliste",
+		"Wie ist das Wetter?":                                "",
+	}
+	for q, want := range cases {
+		if got := DetectIntent(q); got != want {
+			t.Errorf("DetectIntent(%q)=%q want %q", q, got, want)
+		}
+	}
+}
@@ -0,0 +1,148 @@
+package ucca
+
+import "strings"
+
+// KnowledgeSpace is the CHIP-level knowledge domain used by the clarity gate's
+// concentration signal + the user-facing context chips. It is deliberately RICHER
+// than the 4 authority domains in authority.go (data_protection/cyber/ai/
+// product_safety), which drive the EU-primary/subsidiarity rerank. The clarity
+// gate must reflect the FULL corpus breadth (arbeitsschutz, arbeitsrecht,
+// wirtschaftsrecht, finanz, ...) so a broad query surfaces as broad. Kept separate
+// + additive so the tuned authority rerank stays untouched. Corpus-grounded from
+// the 463 real regulation codes (0.3% fall through to "sonstiges").
+
+// knowledgeSpaceExact matches short/ambiguous codes by EXACT string (substring
+// would misfire on 2-3 char codes like "OR"/"AO"/"BGB").
+var knowledgeSpaceExact = map[string]string{
+	"HGB": "wirtschaftsrecht", "BGB": "wirtschaftsrecht", "AO": "wirtschaftsrecht", "OR": "wirtschaftsrecht",
+	"ABGB": "wirtschaftsrecht", "UGB": "wirtschaftsrecht", "IFRS": "wirtschaftsrecht", "BAO": "wirtschaftsrecht",
+	"GMBHG": "wirtschaftsrecht", "AKTG": "wirtschaftsrecht", "INSO": "wirtschaftsrecht", "USTG": "wirtschaftsrecht",
+	"GOBD": "wirtschaftsrecht", "EGBGB": "wirtschaftsrecht", "GEWO": "wirtschaftsrecht", "URHG": "wirtschaftsrecht",
+	"DPF": "datenschutz", "TKG": "datenschutz", "TMG": "datenschutz", "DDG": "datenschutz", "DSG": "datenschutz",
+	"DSV": "datenschutz", "DSM": "datenschutz", "SCC": "datenschutz", "EPRIVACY": "datenschutz",
+	"SCHREMS II": "datenschutz", "CH_REVDSG": "datenschutz", "PLANET49": "datenschutz", "GOOGLE FONTS": "datenschutz",
+	"DSA": "digitale_dienste", "DMA": "digitale_dienste", "DGA": "digitale_dienste", "EHDS": "digitale_dienste",
+	"EIDAS": "digitale_dienste", "EIDAS 2.0": "digitale_dienste", "DATA ACT": "digitale_dienste",
+	"DATAACT": "digitale_dienste", "DIGITAL CONTENT": "digitale_dienste",
+	"MVO": "produktsicherheit", "MACHINERY": "produktsicherheit", "MASCHVO": "produktsicherheit",
+	"MASCHINENVO": "produktsicherheit", "GPSR": "produktsicherheit", "PID": "produktsicherheit",
+	"EAA": "produktsicherheit", "BFSG": "produktsicherheit", "ELEKTROG": "produktsicherheit",
+	"VERPACKG": "produktsicherheit", "BATTVO": "produktsicherheit", "BATTDG": "produktsicherheit", "EU MDR": "produktsicherheit",
+	"DORA": "finanz", "PSD2": "finanz", "MICA": "finanz", "AMLR": "finanz", "VAIT": "finanz", "BAIT": "finanz", "GWG": "finanz",
+	"UWG": "verbraucherschutz", "UCPD": "verbraucherschutz", "VSBG": "verbraucherschutz", "PANGV": "verbraucherschutz",
+	"DL-INFOV": "verbraucherschutz", "OMNIBUS": "verbraucherschutz", "UWG AT": "verbraucherschutz",
+	"PRODHAFTG": "verbraucherschutz", "PRODUKTHAFTUNGS-RL": "verbraucherschutz",
+	"ARG": "arbeitsrecht",
+}
+
+// KnowledgeSpaceLabel maps a knowledge-space id to a user-facing chip label.
+var KnowledgeSpaceLabel = map[string]string{
+	"datenschutz": "Datenschutz", "cyber": "Cybersecurity", "ki": "KI",
+	"produktsicherheit": "Produktsicherheit", "arbeitsschutz": "Arbeitsschutz",
+	"arbeitsrecht": "Arbeitsrecht", "wirtschaftsrecht": "Wirtschaftsrecht",
+	"finanz": "Finanzregulierung", "digitale_dienste": "Digitale Dienste",
+	"verbraucherschutz": "Verbraucherschutz", "lieferkette": "Lieferkette/Nachhaltigkeit",
+	"hinweisgeber": "Hinweisgeberschutz", "sonstiges": "Sonstiges",
+}
+
+// KnowledgeSpaceOf maps a regulation_code to a knowledge space. Robust to code
+// variants (MVO/MASCHVO/MASCHINENVO -> produktsicherheit; DSK SDM / SDM B51 ->
+// datenschutz). Returns "" for empty/untagged codes (not a knowledge space).
+func KnowledgeSpaceOf(code string) string {
+	c := strings.ToUpper(strings.TrimSpace(code))
+	if c == "" || c == "NONE" {
+		return ""
+	}
+	if d, ok := knowledgeSpaceExact[c]; ok {
+		return d
+	}
+	has := func(subs ...string) bool {
+		for _, s := range subs {
+			if strings.Contains(c, s) {
+				return true
+			}
+		}
+		return false
+	}
+	pre := func(subs ...string) bool {
+		for _, s := range subs {
+			if strings.HasPrefix(c, s) {
+				return true
+			}
+		}
+		return false
+	}
+	switch {
+	case pre("TRGS", "TRBS", "ASR", "OSHA") || has("ARBSCHG", "GEFAHRSTOFF"):
+		return "arbeitsschutz"
+	case has("AI ACT", "KI-VO", "KI VERORDNUNG", "GPAI", "AI RMF", "HLEG AI", "GENAI", "OECD AI", "AI PRINCIPLES", "OH KI", "KI BEHOERDEN", "KI SICHERHEIT", "POS KI"):
+		return "ki"
+	case pre("DSGVO", "BDSG", "TDDDG", "DSK", "EDPB", "WP24", "WP25", "WP26", "DSFA", "BFDI", "BAYLDA", "BAYLFB", "EDPS") || has("DATENSCHUTZ", "LOESCHKONZEPT", "LOESCHUNG", "VVT", "TELEMEDIEN", "EU US DPF", "BESCHAEFTIGTENDATEN"):
+		return "datenschutz"
+	case has("CRA", "NIS2", "NISG", "BSIG", "BSI-TR", "BSI_KRITIS", "KRITIS", "ENISA", "NIST", "OWASP", "EUCSA", "EUCC", "CISA", "CYCLONEDX", "SPDX", "SLSA", "OPENTELEMETRY", "CVSS", "SECURE BY DESIGN"):
+		return "cyber"
+	case has("MACHINERY", "MASCH", "BLUE GUIDE", "FDA HFE"):
+		return "produktsicherheit"
+	case has("LKSG", "CSDDD", "CSRD", "TAXONOMY"):
+		return "lieferkette"
+	case has("HINSCHG", "GESCHGEHG"):
+		return "hinweisgeber"
+	case pre("BAG ", "BAG_") || has("ARBVG", "AZG", "ARBZG", "BETRVG", "KSCHG", "MUSCHG", "AGG", "MILOG", "TZBFG", "NACHWG", "BURLG", "611A", "PAY TRANSPARENCY", "ANGG", "MUTTERSCHUTZ"):
+		return "arbeitsrecht"
+	case has("ECOMMERCE", "ECG", "MEDIENG", "VERBRAUCHERRECHTE", "DIGITAL CONTENT"):
+		return "verbraucherschutz"
+	case pre("EUGH", "BVERFG", "BVGE", "BGH", "OGH") || has("EU TAXONOMY"):
+		return "wirtschaftsrecht"
+	default:
+		return "sonstiges"
+	}
+}
+
+// ScopeResults implements G1 scope-gating: when the query names a regulation, its
+// knowledge space's hits LEAD the result set (the L2 answer + [n] citations are
+// built on this order, so scoped answers cite the named regulation instead of the
+// embedding-majority domain). Non-scoped hits backfill to keep topK. Stable within
+// each partition. Returns results unchanged when scope is "".
+func ScopeResults(results []LegalSearchResult, scope string, topK int) []LegalSearchResult {
+	if scope == "" {
+		return results
+	}
+	scoped := make([]LegalSearchResult, 0, len(results))
+	rest := make([]LegalSearchResult, 0, len(results))
+	for _, r := range results {
+		if KnowledgeSpaceOf(r.RegulationCode) == scope {
+			scoped = append(scoped, r)
+		} else {
+			rest = append(rest, r)
+		}
+	}
+	out := append(scoped, rest...)
+	if topK > 0 && len(out) > topK {
+		out = out[:topK]
+	}
+	return out
+}
+
+// FilterByKnowledgeSpace returns ONLY the results in the given knowledge space —
+// a HARD scope with no off-domain backfill. Used by E5 context scoping: when the
+// user explicitly chose a domain chip, off-domain regelwerke (MDR/UStG/eIDAS) must
+// not reappear in the evidence. Falls back to the input when the domain has no hits
+// (never strand the answer). Caps topK.
+func FilterByKnowledgeSpace(results []LegalSearchResult, scope string, topK int) []LegalSearchResult {
+	if scope == "" {
+		return results
+	}
+	out := make([]LegalSearchResult, 0, len(results))
+	for _, r := range results {
+		if KnowledgeSpaceOf(r.RegulationCode) == scope {
+			out = append(out, r)
+		}
+	}
+	if len(out) == 0 {
+		return results
+	}
+	if topK > 0 && len(out) > topK {
+		out = out[:topK]
+	}
+	return out
+}