diff --git a/admin-compliance/lib/sdk/agents/__tests__/advisor-rag.test.ts b/admin-compliance/lib/sdk/agents/__tests__/advisor-rag.test.ts index 54fb8fdb..007bfb80 100644 --- a/admin-compliance/lib/sdk/agents/__tests__/advisor-rag.test.ts +++ b/admin-compliance/lib/sdk/agents/__tests__/advisor-rag.test.ts @@ -51,8 +51,8 @@ describe('advisor-rag', () => { }) }) - describe('queryAdvisorRAG', () => { - it('fragt alle 6 Collections ab und formatiert die Treffer', async () => { + describe('queryAdvisorRAG (Authority Router)', () => { + it('ruft den Router EINMAL auf und formatiert die Treffer', async () => { mockFetch.mockResolvedValue({ ok: true, json: async () => ({ results: [{ text: 'Inhalt A', regulation_short: 'DSGVO', score: 0.9 }] }), @@ -60,19 +60,19 @@ describe('advisor-rag', () => { const result = await mod.queryAdvisorRAG('Was ist eine DSFA?') expect(result).toContain('[Quelle 1: DSGVO]') expect(result).toContain('Inhalt A') - expect(mockFetch).toHaveBeenCalledTimes(mod.COMPLIANCE_COLLECTIONS.length) + expect(mockFetch).toHaveBeenCalledTimes(1) }) - it('ruft die ai-sdk /sdk/v1/rag/search mit collection + top_k auf', async () => { + it('ruft /sdk/v1/rag/retrieve mit query + top_k (ohne collection) auf', async () => { mockFetch.mockResolvedValue({ ok: true, json: async () => ({ results: [] }) }) await mod.queryAdvisorRAG('test') expect(mockFetch).toHaveBeenCalledWith( - expect.stringContaining('/sdk/v1/rag/search'), + expect.stringContaining('/sdk/v1/rag/retrieve'), expect.objectContaining({ method: 'POST' }), ) const body = JSON.parse(mockFetch.mock.calls[0][1].body) - expect(body).toMatchObject({ query: 'test', top_k: 3 }) - expect(mod.COMPLIANCE_COLLECTIONS).toContain(body.collection) + expect(body).toMatchObject({ query: 'test', top_k: 8 }) + expect(body.collection).toBeUndefined() }) it('liefert leeren String wenn das RAG-Backend nicht erreichbar ist (graceful)', async () => { @@ -80,10 +80,5 @@ describe('advisor-rag', () => { const result = await mod.queryAdvisorRAG('test') expect(result).toBe('') }) - - it('umfasst genau die 6 Compliance-Collections', () => { - expect(mod.COMPLIANCE_COLLECTIONS).toHaveLength(6) - expect(mod.COMPLIANCE_COLLECTIONS).toContain('bp_compliance_recht') - }) }) }) diff --git a/admin-compliance/lib/sdk/agents/advisor-rag.ts b/admin-compliance/lib/sdk/agents/advisor-rag.ts index 1eef3648..b42d1cd2 100644 --- a/admin-compliance/lib/sdk/agents/advisor-rag.ts +++ b/admin-compliance/lib/sdk/agents/advisor-rag.ts @@ -1,12 +1,13 @@ /** * Compliance-Advisor RAG-Suche. * - * Fragt die ai-compliance-sdk (`/sdk/v1/rag/search`) ab statt des frueheren - * `rag-service:8097` (auf prod nicht erreichbar). Die ai-sdk embeddet die Query - * mit bge-m3 (prod: ollama-embed) und sucht in den Qdrant-Compliance-Collections - * — damit profitiert der Advisor vom reicheren Embedding. + * Fragt den Authority Router der ai-compliance-sdk (`/sdk/v1/rag/retrieve`) mit NUR der + * Query ab — der Router waehlt selbst die Collections (Broad-Authority-Base + KB-2026.1-Slice + * bei in-scope), embeddet mit bge-m3 (prod: ollama-embed), merged + authority-ranked. Der + * Advisor bleibt damit collection-agnostisch (Vertrag: Compiler -> Collections -> Retriever + * -> Advisor); die fruehere Multi-Collection-Logik liegt jetzt im Retriever. * - * Fehler je Collection werden geschluckt (graceful: Antwort ohne diesen Treffer). + * Fehler werden geschluckt (graceful: Antwort ohne RAG-Kontext). * Fundstellen via article_label sind live ab dem Prod-Re-Ingest 2026-06. */ @@ -17,16 +18,6 @@ const DEFAULT_USER = '00000000-0000-0000-0000-000000000001' const DEFAULT_TENANT = process.env.DEFAULT_TENANT_ID || '9282a473-5c95-4b3a-bf78-0ecc0ec71d3e' -// Compliance-relevante Collections (ai-sdk-Whitelist `AllowedCollections`). -export const COMPLIANCE_COLLECTIONS = [ - 'bp_compliance_gesetze', - 'bp_compliance_ce', - 'bp_compliance_datenschutz', - 'bp_dsfa_corpus', - 'bp_compliance_recht', - 'bp_legal_templates', -] as const - interface SdkRagResult { text?: string regulation_code?: string @@ -68,39 +59,36 @@ export function mapSdkResults(results: SdkRagResult[] | undefined): ScoredPassag .filter((p) => p.content) } -async function searchCollection(collection: string, query: string): Promise { +/** + * Authority Router: EIN collection-agnostischer Aufruf an die ai-sdk (`/sdk/v1/rag/retrieve`). + * Der Router waehlt die Collections (Broad-Authority-Base + KB-2026.1-Slice bei in-scope), + * merged + authority-ranked sie und liefert die Top-Passagen. Der Advisor weiss damit nichts + * mehr ueber einzelne Collections — die fruehere Multi-Collection-Logik liegt jetzt im Retriever. + * Fehler werden geschluckt (graceful: Antwort ohne RAG-Kontext). + */ +export async function queryAdvisorRAG(query: string): Promise { + let passages: ScoredPassage[] = [] try { - const res = await fetch(`${SDK_URL}/sdk/v1/rag/search`, { + const res = await fetch(`${SDK_URL}/sdk/v1/rag/retrieve`, { method: 'POST', headers: { 'Content-Type': 'application/json', 'X-User-ID': DEFAULT_USER, 'X-Tenant-ID': DEFAULT_TENANT, }, - body: JSON.stringify({ query, collection, top_k: 3 }), - signal: AbortSignal.timeout(10000), + body: JSON.stringify({ query, top_k: 8 }), + signal: AbortSignal.timeout(15000), }) - if (!res.ok) return [] - const data = await res.json() - return mapSdkResults(data.results) + if (res.ok) { + const data = await res.json() + passages = mapSdkResults(data.results) + } } catch { - return [] + // graceful: keine Verbindung -> Antwort ohne RAG-Kontext } -} - -/** - * Fragt alle Compliance-Collections parallel ab und liefert die Top-8-Passagen - * als formatierten Kontextblock (oder '' wenn nichts erreichbar/gefunden). - */ -export async function queryAdvisorRAG(query: string): Promise { - const settled = await Promise.all( - COMPLIANCE_COLLECTIONS.map((c) => searchCollection(c, query)), - ) - const all = settled.flat() - if (all.length === 0) return '' - all.sort((a, b) => b.score - a.score) - return all - .slice(0, 8) + // Der Router liefert bereits authority-geordnete Top-K; Reihenfolge bewahren. + if (passages.length === 0) return '' + return passages .map((r, i) => `[Quelle ${i + 1}: ${r.source}]\n${r.content}`) .join('\n\n---\n\n') } diff --git a/ai-compliance-sdk/internal/api/handlers/rag_handlers.go b/ai-compliance-sdk/internal/api/handlers/rag_handlers.go index eac9e17a..1842c43b 100644 --- a/ai-compliance-sdk/internal/api/handlers/rag_handlers.go +++ b/ai-compliance-sdk/internal/api/handlers/rag_handlers.go @@ -82,6 +82,43 @@ func (h *RAGHandlers) Search(c *gin.Context) { }) } +// RetrieveRequest is the Authority Router request: a query only, no collection — the router decides +// which collections to query (broad authority base + the in-scope KB-2026.1 slice). +type RetrieveRequest struct { + Query string `json:"query" binding:"required"` + TopK int `json:"top_k,omitempty"` +} + +// Retrieve is the Authority Router endpoint. The Advisor calls this with ONLY a query and stays +// collection-agnostic; the router fans out over the authority base + the in-scope slice, merges by +// authority score, and returns the unified top-K. Response shape matches Search (query/results/ +// count/assessment) so existing consumers parse it unchanged. +// POST /sdk/v1/rag/retrieve +func (h *RAGHandlers) Retrieve(c *gin.Context) { + var req RetrieveRequest + if err := c.ShouldBindJSON(&req); err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) + return + } + + if req.TopK <= 0 || req.TopK > 20 { + req.TopK = 8 + } + + results, err := h.ragClient.Retrieve(c.Request.Context(), req.Query, req.TopK) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "RAG retrieve failed: " + err.Error()}) + return + } + + c.JSON(http.StatusOK, gin.H{ + "query": req.Query, + "results": results, + "count": len(results), + "assessment": ucca.Assess(results), + }) +} + // ListRegulations returns the list of available regulations in the corpus. // GET /sdk/v1/rag/regulations func (h *RAGHandlers) ListRegulations(c *gin.Context) { diff --git a/ai-compliance-sdk/internal/app/routes.go b/ai-compliance-sdk/internal/app/routes.go index 562f956b..1f8d6fb0 100644 --- a/ai-compliance-sdk/internal/app/routes.go +++ b/ai-compliance-sdk/internal/app/routes.go @@ -159,6 +159,7 @@ func registerRAGRoutes(v1 *gin.RouterGroup, h *handlers.RAGHandlers) { ragRoutes := v1.Group("/rag") { ragRoutes.POST("/search", h.Search) + ragRoutes.POST("/retrieve", h.Retrieve) ragRoutes.GET("/regulations", h.ListRegulations) ragRoutes.GET("/corpus-status", h.CorpusStatus) ragRoutes.GET("/corpus-versions/:collection", h.CorpusVersionHistory) @@ -358,7 +359,6 @@ func registerWhistleblowerRoutes(v1 *gin.RouterGroup, h *handlers.WhistleblowerH } } - func registerMaximizerRoutes(v1 *gin.RouterGroup, h *handlers.MaximizerHandlers) { m := v1.Group("/maximizer") { diff --git a/ai-compliance-sdk/internal/ucca/authority_router.go b/ai-compliance-sdk/internal/ucca/authority_router.go new file mode 100644 index 00000000..edd7c0d6 --- /dev/null +++ b/ai-compliance-sdk/internal/ucca/authority_router.go @@ -0,0 +1,114 @@ +package ucca + +import ( + "context" + "os" + "sort" + "strings" + "sync" +) + +// routerBaseCollections is the broad authority base the Authority Router fans out over. It mirrors +// the Advisor's historical multi-collection set; the KB-2026.1 slice is added separately when the +// query is in scope. Override via RAG_ROUTER_COLLECTIONS (comma-separated) per environment. +func (c *LegalRAGClient) routerBaseCollections() []string { + if v := strings.TrimSpace(os.Getenv("RAG_ROUTER_COLLECTIONS")); v != "" { + var out []string + for _, p := range strings.Split(v, ",") { + if s := strings.TrimSpace(p); s != "" { + out = append(out, s) + } + } + if len(out) > 0 { + return out + } + } + return []string{ + "bp_compliance_gesetze", + "bp_compliance_ce", + "bp_compliance_datenschutz", + "bp_dsfa_corpus", + "bp_compliance_recht", + "bp_legal_templates", + } +} + +const routerPerCollectionTopK = 3 + +// Retrieve is the Authority Router entry point: callers (the Advisor) pass ONLY a query and stay +// collection-agnostic. The router fans out over the broad authority base and ADDS the KB-2026.1 +// slice when the query is in scope (inKBScope), then merges all hits, deduplicates, and returns the +// top-K by authority score. This moves the former Advisor-side collection fan-out into the retrieval +// layer (the "Retriever" tier of the quality pyramid), so the proven KB-2026.1 slice gain reaches +// the product path without the Advisor knowing about individual collections. +// +// The merged set is ordered by the per-collection authority score that rerankByAuthority already +// produced inside searchInternal — i.e. binding-vs-guidance ordering is preserved across the merge. +// Per-collection failures (e.g. a collection absent on an environment) degrade gracefully. +func (c *LegalRAGClient) Retrieve(ctx context.Context, query string, topK int) ([]LegalSearchResult, error) { + if topK <= 0 { + topK = 8 + } + + collections := c.routerBaseCollections() + if c.kbScopeRoutingEnabled && c.kbSliceCollection != "" && inKBScope(query) { + collections = append(collections, c.kbSliceCollection) + } + + // Warm the full-text indexes sequentially first so the concurrent fan-out below only READS the + // shared textIndexEnsured map (the writes happen here, serialized) — closes the cold-start map + // race deterministically. Best-effort: a missing collection just stays un-indexed (hybrid then + // falls back to dense, or the per-collection search degrades to nothing). + if c.hybridEnabled { + for _, coll := range collections { + _ = c.ensureTextIndex(ctx, coll) + } + } + + out := make([][]LegalSearchResult, len(collections)) + var wg sync.WaitGroup + for i, coll := range collections { + wg.Add(1) + go func(i int, coll string) { + defer wg.Done() + if res, err := c.searchInternal(ctx, coll, query, nil, routerPerCollectionTopK); err == nil { + out[i] = res + } + }(i, coll) + } + wg.Wait() + + merged := make([]LegalSearchResult, 0, len(collections)*routerPerCollectionTopK) + for _, r := range out { + merged = append(merged, r...) + } + merged = dedupResults(merged) + sort.SliceStable(merged, func(a, b int) bool { return merged[a].Score > merged[b].Score }) + if len(merged) > topK { + merged = merged[:topK] + } + return merged, nil +} + +// dedupResults removes duplicate passages that can appear when collections overlap, keeping the +// highest-scoring occurrence. Identity = regulation_code + article_label + a text prefix. +func dedupResults(in []LegalSearchResult) []LegalSearchResult { + pos := make(map[string]int, len(in)) + out := make([]LegalSearchResult, 0, len(in)) + for _, r := range in { + text := r.Text + if len(text) > 80 { + text = text[:80] + } + key := r.RegulationCode + "|" + r.ArticleLabel + "|" + text + if idx, ok := pos[key]; ok { + if r.Score > out[idx].Score { + out[idx] = r + } + continue + } + pos[key] = len(out) + out = append(out, r) + } + return out +} diff --git a/ai-compliance-sdk/internal/ucca/authority_router_e2e_test.go b/ai-compliance-sdk/internal/ucca/authority_router_e2e_test.go new file mode 100644 index 00000000..893378fc --- /dev/null +++ b/ai-compliance-sdk/internal/ucca/authority_router_e2e_test.go @@ -0,0 +1,134 @@ +package ucca + +import ( + "context" + "encoding/json" + "os" + "strconv" + "strings" + "testing" +) + +type benchQ struct { + ID string `json:"id"` + Document string `json:"document"` + Question string `json:"question"` +} + +// docTokens maps a bench question's expected document to acceptable regulation_code/label substrings. +func docTokens(document string) []string { + d := strings.ToUpper(document) + var t []string + for _, wp := range []string{"WP243", "WP248", "WP260"} { + if strings.Contains(d, wp) { + t = append(t, wp) + } + } + dns := strings.ReplaceAll(d, " ", "") + for _, gl := range []struct{ key, tok string }{{"07/2020", "GL07"}, {"05/2020", "GL05"}, {"09/2022", "GL09"}} { + if strings.Contains(dns, gl.key) { + t = append(t, gl.tok) + } + } + if strings.Contains(d, "TDDDG") { + t = append(t, "TDDDG") + } + if strings.Contains(d, "DSGVO") || strings.Contains(d, "ART. 13") || strings.Contains(d, "ART. 14") { + t = append(t, "DSGVO") + } + if strings.Contains(d, "BDSG") { + t = append(t, "BDSG") + } + if strings.Contains(d, "CRA") { + t = append(t, "CRA") + } + if strings.Contains(d, "MASCH") { + t = append(t, "MASCH", "MACHINERY", "MVO") + } + return t +} + +func hitDoc(results []LegalSearchResult, toks []string) bool { + for _, r := range results { + s := strings.ReplaceAll(strings.ToUpper(r.RegulationCode+" "+r.ArticleLabel), " ", "") + for _, tk := range toks { + if strings.Contains(s, strings.ReplaceAll(tk, " ", "")) { + return true + } + } + } + return false +} + +// TestAuthorityRouterCB100 (RUN_E2E=1) drives the REAL Retrieve() over the ComplianceBench-100 against +// the live collections: NEW (scope routing on → slice added for in-scope queries) vs OLD (routing off +// → broad base only). It is the regression gate that the router actually delivers the proven slice +// gain (+28/0-regr in the offline simulation) through the production Go code path. +func TestAuthorityRouterCB100(t *testing.T) { + if os.Getenv("RUN_E2E") != "1" { + t.Skip("set RUN_E2E=1 + QDRANT_URL/OLLAMA_URL/QDRANT_API_KEY + BENCH_PATH") + } + path := os.Getenv("BENCH_PATH") + if path == "" { + path = "/tmp/compliance_bench.json" + } + raw, err := os.ReadFile(path) + if err != nil { + t.Fatalf("bench read: %v", err) + } + var doc struct { + Questions []benchQ `json:"questions"` + } + if err := json.Unmarshal(raw, &doc); err != nil { + t.Fatalf("bench parse: %v", err) + } + + // BENCH_STRIDE samples every Kth question (stratified across DS/CRA/MaschVO) so the gate stays + // tractable against the remote dev Qdrant; default 1 = full CB-100. + stride := 1 + if s := os.Getenv("BENCH_STRIDE"); s != "" { + if n, err := strconv.Atoi(s); err == nil && n > 0 { + stride = n + } + } + + c := NewLegalRAGClient() + ctx := context.Background() + var n, oldHit, newHit, gain, regr int + for i, q := range doc.Questions { + if i%stride != 0 { + continue + } + n++ + toks := docTokens(q.Document) + c.kbScopeRoutingEnabled = false + oldRes, _ := c.Retrieve(ctx, q.Question, 8) + c.kbScopeRoutingEnabled = true + newRes, _ := c.Retrieve(ctx, q.Question, 8) + oh, nh := hitDoc(oldRes, toks), hitDoc(newRes, toks) + if oh { + oldHit++ + } + if nh { + newHit++ + } + flip := "=" + switch { + case !oh && nh: + gain++ + flip = "GAIN" + case oh && !nh: + regr++ + flip = "REGR" + } + t.Logf("%-9s [%-14s] OLD=%-5v NEW=%-5v %s", q.ID, q.Document, oh, nh, flip) + } + t.Logf("CB-100 sample (stride=%d) via Retrieve(): N=%d | OLD-hit %d | NEW-hit %d | GAIN %d | REGR %d", + stride, n, oldHit, newHit, gain, regr) + if newHit <= oldHit || gain < 3 { + t.Errorf("router must add slice gains: NEW(%d) must exceed OLD(%d), gain=%d", newHit, oldHit, gain) + } + if regr > 2 { + t.Errorf("too many regressions through the router: %d", regr) + } +} diff --git a/ai-compliance-sdk/internal/ucca/authority_router_test.go b/ai-compliance-sdk/internal/ucca/authority_router_test.go new file mode 100644 index 00000000..c2c25a55 --- /dev/null +++ b/ai-compliance-sdk/internal/ucca/authority_router_test.go @@ -0,0 +1,67 @@ +package ucca + +import ( + "os" + "testing" +) + +func TestRouterBaseCollections(t *testing.T) { + c := &LegalRAGClient{} + os.Unsetenv("RAG_ROUTER_COLLECTIONS") + def := c.routerBaseCollections() + if len(def) != 6 || def[1] != "bp_compliance_ce" { + t.Fatalf("default base collections unexpected: %v", def) + } + + os.Setenv("RAG_ROUTER_COLLECTIONS", " bp_compliance_ce , kb_2026_1_build ,, ") + defer os.Unsetenv("RAG_ROUTER_COLLECTIONS") + got := c.routerBaseCollections() + if len(got) != 2 || got[0] != "bp_compliance_ce" || got[1] != "kb_2026_1_build" { + t.Fatalf("env override parse failed (trim/empty): %v", got) + } +} + +func TestRouterSliceSelection(t *testing.T) { + // The router appends the slice exactly when the query is in scope (inKBScope) and routing is on. + // Mirror the selection logic so a regression in either is caught without a live Qdrant. + c := &LegalRAGClient{kbSliceCollection: "kb_2026_1_build", kbScopeRoutingEnabled: true} + sel := func(q string) bool { + colls := c.routerBaseCollections() + if c.kbScopeRoutingEnabled && c.kbSliceCollection != "" && inKBScope(q) { + colls = append(colls, c.kbSliceCollection) + } + for _, x := range colls { + if x == c.kbSliceCollection { + return true + } + } + return false + } + if !sel("Welche neun Kriterien nennt WP248 fuer ein hohes Risiko?") { + t.Error("in-scope guidance query must include the slice") + } + if sel("Was sagt NIST SP 800-53 zu Access Control?") { + t.Error("out-of-scope query must NOT include the slice") + } + c.kbScopeRoutingEnabled = false + if sel("Welche Kriterien nennt WP248?") { + t.Error("routing disabled => slice never included") + } +} + +func TestDedupResults(t *testing.T) { + in := []LegalSearchResult{ + {RegulationCode: "EDPB WP248", ArticleLabel: "III.B", Text: "lorem", Score: 0.7}, + {RegulationCode: "EDPB WP248", ArticleLabel: "III.B", Text: "lorem", Score: 0.9}, // dup, higher score + {RegulationCode: "DSGVO", ArticleLabel: "Art. 35", Text: "ipsum", Score: 0.8}, + } + out := dedupResults(in) + if len(out) != 2 { + t.Fatalf("expected 2 deduped, got %d", len(out)) + } + for _, r := range out { + if r.RegulationCode == "EDPB WP248" && r.Score != 0.9 { + t.Errorf("dedup must keep highest score, got %v", r.Score) + } + } +}