From e2c74fd243945bdd1c4dd283f3a07dd955118616 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Tue, 30 Jun 2026 11:49:34 +0200 Subject: [PATCH] =?UTF-8?q?feat(ucca):=20Blue-Green=20=E2=80=9Eauthoritati?= =?UTF-8?q?ve=20slice=20promotion"=20=E2=80=94=20KB-2026.1=20Scope-Routing?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Additiv (KEIN CE-Ersatz): faellt eine Query in den KB-2026.1-Scope (DP/CRA/MaschVO/ NIS2/DataAct/DORA/AIAct + EDPB/DSK-Guidance), wird die hochwertige Slice-Collection `kb_2026_1_build` abgefragt; sonst bleibt der breite Default `bp_compliance_ce`. Damit werden die Guidance-Intent- + Multi-Reg-Fixes (PR #42/#43) fuer den Slice LIVE, Broad-Corpus (OWASP/NIST/ENISA/IFRS/ISO) unangetastet -> 0 Regressionen by construction. - resolveCollection(query, requested): explizit angefragte Collection unveraendert; Default-Request -> Slice bei inKBScope, sonst CE. Env RAG_KB_SCOPE_ROUTING=false = Rollback ohne Redeploy; RAG_KB_SLICE_COLLECTION ueberschreibt den Slice-Namen. - inKBScope: detectRegulations (in-Slice-Regelwerke) + DP-Guidance-Marker (edpb/dsk/wp/gl) + DP/Compliance-Topics. Bewusst NICHT die generischen Verben aus guidanceIntentSignals (sagt/laut) und NICHT enisa/bsi/nist/owasp (die liegen in CE) -> konservativ, in-scope->Slice. Validierung: Unit (Scoping + resolveCollection); dev-e2e (RUN_E2E, geroutetes Search() gegen dev): WP248/MaschVO/CRA+MaschVO -> Slice (Treffer da, fehlen in dev-ce); NIST -> CE (NIST-Treffer). Co-Authored-By: Claude Opus 4.7 --- .../internal/ucca/kb_scope_routing.go | 52 +++++++++ .../internal/ucca/kb_scope_routing_test.go | 101 ++++++++++++++++++ .../internal/ucca/legal_rag_client.go | 42 +++++--- 3 files changed, 181 insertions(+), 14 deletions(-) create mode 100644 ai-compliance-sdk/internal/ucca/kb_scope_routing.go create mode 100644 ai-compliance-sdk/internal/ucca/kb_scope_routing_test.go diff --git a/ai-compliance-sdk/internal/ucca/kb_scope_routing.go b/ai-compliance-sdk/internal/ucca/kb_scope_routing.go new file mode 100644 index 00000000..5f765f81 --- /dev/null +++ b/ai-compliance-sdk/internal/ucca/kb_scope_routing.go @@ -0,0 +1,52 @@ +package ucca + +import "strings" + +// kbScopeTopics are high-precision data-protection / compliance topic markers that place a query in +// the KB-2026.1 authoritative slice even when it does NOT name a regulation. Conservative by design: +// an unmatched query falls back to the broad CE default (no regression) — the slice is only used when +// the query is confidently in-scope. +var kbScopeTopics = []string{ + // DP-Guidance-Marker, die IN der Slice liegen (EDPB/DSK/WP/GL) — bewusst NICHT die generischen + // Verben aus guidanceIntentSignals (sagt/laut/empfiehlt/auslegung) und NICHT enisa/bsi/nist/owasp + // (die liegen im breiten CE-Pool, nicht in der Slice). + "edpb", "dsk", "datenschutzausschuss", "orientierungshilfe", + "wp2", "wp 2", "wp29", "working paper", "gl 0", + "datenschutz", "dsgvo", "gdpr", "dsfa", "folgenabschätzung", "folgenabschaetzung", + "einwilligung", "auftragsverarbeit", "betroffenenrecht", "auskunftsrecht", + "verarbeitungsverzeichnis", "datenschutzbeauftragt", "verzeichnis von verarbeitung", + "cookie", "tracking", "transparenzpflicht", "datenpanne", "meldepflicht", + "technische und organisatorische maßnahmen", + "cyber resilience", "schwachstelle", "vulnerability", "sicherheitsupdate", + "maschinensicherheit", "wesentliche veränderung", "wesentliche veraenderung", + "konformitätsbewertung", "konformitaetsbewertung", "ce-kennzeichnung", +} + +// inKBScope reports whether the query belongs to the KB-2026.1 authoritative slice. True when it +// names an in-slice regulation (detectRegulations), asks for guidance (EDPB/DSK/WP/GL), or hits a +// data-protection / compliance topic marker. +func inKBScope(query string) bool { + if len(detectRegulations(query)) > 0 { + return true + } + q := strings.ToLower(query) + for _, t := range kbScopeTopics { + if strings.Contains(q, t) { + return true + } + } + return false +} + +// resolveCollection applies the Blue-Green „authoritative slice promotion" routing. An explicitly +// requested collection is honoured unchanged; the DEFAULT (empty) request is routed to the KB-2026.1 +// slice when the query is in-scope, else to the broad CE default. Disable via RAG_KB_SCOPE_ROUTING=false. +func (c *LegalRAGClient) resolveCollection(query, requested string) string { + if requested != "" { + return requested + } + if c.kbScopeRoutingEnabled && c.kbSliceCollection != "" && inKBScope(query) { + return c.kbSliceCollection + } + return c.collection +} diff --git a/ai-compliance-sdk/internal/ucca/kb_scope_routing_test.go b/ai-compliance-sdk/internal/ucca/kb_scope_routing_test.go new file mode 100644 index 00000000..2a2823f8 --- /dev/null +++ b/ai-compliance-sdk/internal/ucca/kb_scope_routing_test.go @@ -0,0 +1,101 @@ +package ucca + +import ( + "context" + "fmt" + "os" + "strings" + "testing" +) + +func TestInKBScope(t *testing.T) { + inScope := []string{ + "Welche neun Kriterien nennt WP248 fuer ein hohes Risiko?", + "Wie greifen CRA und Maschinenverordnung bei einer vernetzten Maschine ineinander?", + "Wann ist eine Datenschutz-Folgenabschaetzung erforderlich?", + "Welche Anforderungen stellt die DSGVO an die Einwilligung?", + "Brauche ich einen Datenschutzbeauftragten?", + "Wann muss eine aktiv ausgenutzte Schwachstelle gemeldet werden?", + } + outScope := []string{ + "Welche OWASP-Kontrollen gibt es fuer Authentifizierung?", + "Was sagt NIST SP 800-53 zu Access Control?", + "Wie funktioniert ISO 27001 Zertifizierung?", + "Welche IFRS-Standards gelten fuer Leasing?", + } + for _, q := range inScope { + if !inKBScope(q) { + t.Errorf("inKBScope(%q) = false, want true", q) + } + } + for _, q := range outScope { + if inKBScope(q) { + t.Errorf("inKBScope(%q) = true, want false", q) + } + } +} + +func TestResolveCollection(t *testing.T) { + c := &LegalRAGClient{collection: "bp_compliance_ce", kbSliceCollection: "kb_2026_1_build", kbScopeRoutingEnabled: true} + if got := c.resolveCollection("Welche Kriterien nennt WP248?", ""); got != "kb_2026_1_build" { + t.Errorf("in-scope default -> %s, want kb_2026_1_build", got) + } + if got := c.resolveCollection("Was sagt NIST SP 800-53?", ""); got != "bp_compliance_ce" { + t.Errorf("out-of-scope default -> %s, want bp_compliance_ce", got) + } + if got := c.resolveCollection("Welche Kriterien nennt WP248?", "explicit_coll"); got != "explicit_coll" { + t.Errorf("explicit request must be honoured -> %s", got) + } + c.kbScopeRoutingEnabled = false + if got := c.resolveCollection("Welche Kriterien nennt WP248?", ""); got != "bp_compliance_ce" { + t.Errorf("disabled routing -> %s, want bp_compliance_ce", got) + } +} + +// TestKBScopeRoutingE2E (RUN_E2E=1) verifies the routing against the REAL collections: a default +// Search() of an in-scope query must hit the KB-2026.1 slice (WP248/MaschVO live there but NOT in +// the broad CE pool = clean discriminator); an out-of-scope query stays on CE. +func TestKBScopeRoutingE2E(t *testing.T) { + if os.Getenv("RUN_E2E") != "1" { + t.Skip("set RUN_E2E=1 + QDRANT_URL/OLLAMA_URL/QDRANT_API_KEY") + } + c := NewLegalRAGClient() + cases := []struct { + q string + wantToken string // expected in top-8 when routed to the slice + wantInKB bool + }{ + {"Welche neun Kriterien nennt WP248 fuer ein voraussichtlich hohes Risiko?", "WP248", true}, + {"Welche grundlegenden Sicherheits- und Gesundheitsschutzanforderungen enthaelt Anhang III der Maschinenverordnung?", "MASCH", true}, + {"Wie greifen CRA und Maschinenverordnung bei einer vernetzten Maschine ineinander?", "MASCH", true}, + {"Was sagt NIST SP 800-53 zu Access Control?", "", false}, + } + for _, tc := range cases { + routed := c.resolveCollection(tc.q, "") + res, err := c.Search(context.Background(), tc.q, nil, 8) + if err != nil { + t.Fatalf("%q: %v", tc.q, err) + } + codes := map[string]bool{} + for _, r := range res { + codes[strings.ToUpper(r.RegulationCode)] = true + } + hit := false + if tc.wantToken != "" { + for cd := range codes { + if strings.Contains(cd, tc.wantToken) { + hit = true + break + } + } + } + col := make([]string, 0, len(codes)) + for cd := range codes { + col = append(col, cd) + } + fmt.Printf("inKB=%-5v routed=%-16s wantTok=%-6s found=%-5v | %v\n", tc.wantInKB, routed, tc.wantToken, hit, col) + if tc.wantInKB && tc.wantToken != "" && !hit { + t.Errorf("%q routed to %s but %s not in top-8 (slice not active?)", tc.q, routed, tc.wantToken) + } + } +} diff --git a/ai-compliance-sdk/internal/ucca/legal_rag_client.go b/ai-compliance-sdk/internal/ucca/legal_rag_client.go index c8c0c3c7..db4993b7 100644 --- a/ai-compliance-sdk/internal/ucca/legal_rag_client.go +++ b/ai-compliance-sdk/internal/ucca/legal_rag_client.go @@ -21,6 +21,12 @@ type LegalRAGClient struct { textIndexEnsured map[string]bool hybridEnabled bool graphEnabled bool + + // Blue-Green „authoritative slice promotion" (additiv, KEIN CE-Ersatz): faellt eine Query + // in den KB-2026.1-Scope (DP/CRA/MaschVO/NIS2/DataAct/DORA/AIAct + EDPB/DSK-Guidance), wird + // die hochwertige Slice-Collection abgefragt; sonst bleibt der breite Default (bp_compliance_ce). + kbSliceCollection string + kbScopeRoutingEnabled bool } // NewLegalRAGClient creates a new Legal RAG client using Ollama bge-m3 embeddings. @@ -45,15 +51,25 @@ func NewLegalRAGClient() *LegalRAGClient { // zur Begruendung/Vollstaendigkeit genutzt, nicht zur Pool-Expansion (Default). graphEnabled := os.Getenv("RAG_GRAPH_EXPANSION") == "true" + // KB-2026.1 authoritative slice (Blue-Green, additiv). Routing default AN; Rollback ohne + // Redeploy ueber RAG_KB_SCOPE_ROUTING=false (dann faellt alles auf den CE-Default zurueck). + kbSlice := os.Getenv("RAG_KB_SLICE_COLLECTION") + if kbSlice == "" { + kbSlice = "kb_2026_1_build" + } + kbScopeRouting := os.Getenv("RAG_KB_SCOPE_ROUTING") != "false" + return &LegalRAGClient{ - qdrantURL: qdrantURL, - qdrantAPIKey: qdrantAPIKey, - ollamaURL: ollamaURL, - embeddingModel: "bge-m3", - collection: "bp_compliance_ce", - textIndexEnsured: make(map[string]bool), - hybridEnabled: hybridEnabled, - graphEnabled: graphEnabled, + qdrantURL: qdrantURL, + qdrantAPIKey: qdrantAPIKey, + ollamaURL: ollamaURL, + embeddingModel: "bge-m3", + collection: "bp_compliance_ce", + textIndexEnsured: make(map[string]bool), + hybridEnabled: hybridEnabled, + graphEnabled: graphEnabled, + kbSliceCollection: kbSlice, + kbScopeRoutingEnabled: kbScopeRouting, httpClient: &http.Client{ Timeout: 60 * time.Second, }, @@ -63,15 +79,13 @@ func NewLegalRAGClient() *LegalRAGClient { // SearchCollection queries a specific Qdrant collection for relevant passages. // If collection is empty, it falls back to the default collection (bp_compliance_ce). func (c *LegalRAGClient) SearchCollection(ctx context.Context, collection string, query string, regulationIDs []string, topK int) ([]LegalSearchResult, error) { - if collection == "" { - collection = c.collection - } - return c.searchInternal(ctx, collection, query, regulationIDs, topK) + return c.searchInternal(ctx, c.resolveCollection(query, collection), query, regulationIDs, topK) } -// Search queries the compliance CE corpus for relevant passages. +// Search queries the compliance corpus for relevant passages. The target collection is resolved by +// the Blue-Green slice routing: the KB-2026.1 slice for in-scope queries, else the broad CE default. func (c *LegalRAGClient) Search(ctx context.Context, query string, regulationIDs []string, topK int) ([]LegalSearchResult, error) { - return c.searchInternal(ctx, c.collection, query, regulationIDs, topK) + return c.searchInternal(ctx, c.resolveCollection(query, ""), query, regulationIDs, topK) } // searchInternal performs the actual search against a given collection.