Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| e2c74fd243 | |||
| 8ed99c255d |
@@ -0,0 +1,52 @@
|
||||
package ucca
|
||||
|
||||
import "strings"
|
||||
|
||||
// kbScopeTopics are high-precision data-protection / compliance topic markers that place a query in
|
||||
// the KB-2026.1 authoritative slice even when it does NOT name a regulation. Conservative by design:
|
||||
// an unmatched query falls back to the broad CE default (no regression) — the slice is only used when
|
||||
// the query is confidently in-scope.
|
||||
var kbScopeTopics = []string{
|
||||
// DP-Guidance-Marker, die IN der Slice liegen (EDPB/DSK/WP/GL) — bewusst NICHT die generischen
|
||||
// Verben aus guidanceIntentSignals (sagt/laut/empfiehlt/auslegung) und NICHT enisa/bsi/nist/owasp
|
||||
// (die liegen im breiten CE-Pool, nicht in der Slice).
|
||||
"edpb", "dsk", "datenschutzausschuss", "orientierungshilfe",
|
||||
"wp2", "wp 2", "wp29", "working paper", "gl 0",
|
||||
"datenschutz", "dsgvo", "gdpr", "dsfa", "folgenabschätzung", "folgenabschaetzung",
|
||||
"einwilligung", "auftragsverarbeit", "betroffenenrecht", "auskunftsrecht",
|
||||
"verarbeitungsverzeichnis", "datenschutzbeauftragt", "verzeichnis von verarbeitung",
|
||||
"cookie", "tracking", "transparenzpflicht", "datenpanne", "meldepflicht",
|
||||
"technische und organisatorische maßnahmen",
|
||||
"cyber resilience", "schwachstelle", "vulnerability", "sicherheitsupdate",
|
||||
"maschinensicherheit", "wesentliche veränderung", "wesentliche veraenderung",
|
||||
"konformitätsbewertung", "konformitaetsbewertung", "ce-kennzeichnung",
|
||||
}
|
||||
|
||||
// inKBScope reports whether the query belongs to the KB-2026.1 authoritative slice. True when it
|
||||
// names an in-slice regulation (detectRegulations), asks for guidance (EDPB/DSK/WP/GL), or hits a
|
||||
// data-protection / compliance topic marker.
|
||||
func inKBScope(query string) bool {
|
||||
if len(detectRegulations(query)) > 0 {
|
||||
return true
|
||||
}
|
||||
q := strings.ToLower(query)
|
||||
for _, t := range kbScopeTopics {
|
||||
if strings.Contains(q, t) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// resolveCollection applies the Blue-Green „authoritative slice promotion" routing. An explicitly
|
||||
// requested collection is honoured unchanged; the DEFAULT (empty) request is routed to the KB-2026.1
|
||||
// slice when the query is in-scope, else to the broad CE default. Disable via RAG_KB_SCOPE_ROUTING=false.
|
||||
func (c *LegalRAGClient) resolveCollection(query, requested string) string {
|
||||
if requested != "" {
|
||||
return requested
|
||||
}
|
||||
if c.kbScopeRoutingEnabled && c.kbSliceCollection != "" && inKBScope(query) {
|
||||
return c.kbSliceCollection
|
||||
}
|
||||
return c.collection
|
||||
}
|
||||
@@ -0,0 +1,101 @@
|
||||
package ucca
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestInKBScope(t *testing.T) {
|
||||
inScope := []string{
|
||||
"Welche neun Kriterien nennt WP248 fuer ein hohes Risiko?",
|
||||
"Wie greifen CRA und Maschinenverordnung bei einer vernetzten Maschine ineinander?",
|
||||
"Wann ist eine Datenschutz-Folgenabschaetzung erforderlich?",
|
||||
"Welche Anforderungen stellt die DSGVO an die Einwilligung?",
|
||||
"Brauche ich einen Datenschutzbeauftragten?",
|
||||
"Wann muss eine aktiv ausgenutzte Schwachstelle gemeldet werden?",
|
||||
}
|
||||
outScope := []string{
|
||||
"Welche OWASP-Kontrollen gibt es fuer Authentifizierung?",
|
||||
"Was sagt NIST SP 800-53 zu Access Control?",
|
||||
"Wie funktioniert ISO 27001 Zertifizierung?",
|
||||
"Welche IFRS-Standards gelten fuer Leasing?",
|
||||
}
|
||||
for _, q := range inScope {
|
||||
if !inKBScope(q) {
|
||||
t.Errorf("inKBScope(%q) = false, want true", q)
|
||||
}
|
||||
}
|
||||
for _, q := range outScope {
|
||||
if inKBScope(q) {
|
||||
t.Errorf("inKBScope(%q) = true, want false", q)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveCollection(t *testing.T) {
|
||||
c := &LegalRAGClient{collection: "bp_compliance_ce", kbSliceCollection: "kb_2026_1_build", kbScopeRoutingEnabled: true}
|
||||
if got := c.resolveCollection("Welche Kriterien nennt WP248?", ""); got != "kb_2026_1_build" {
|
||||
t.Errorf("in-scope default -> %s, want kb_2026_1_build", got)
|
||||
}
|
||||
if got := c.resolveCollection("Was sagt NIST SP 800-53?", ""); got != "bp_compliance_ce" {
|
||||
t.Errorf("out-of-scope default -> %s, want bp_compliance_ce", got)
|
||||
}
|
||||
if got := c.resolveCollection("Welche Kriterien nennt WP248?", "explicit_coll"); got != "explicit_coll" {
|
||||
t.Errorf("explicit request must be honoured -> %s", got)
|
||||
}
|
||||
c.kbScopeRoutingEnabled = false
|
||||
if got := c.resolveCollection("Welche Kriterien nennt WP248?", ""); got != "bp_compliance_ce" {
|
||||
t.Errorf("disabled routing -> %s, want bp_compliance_ce", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestKBScopeRoutingE2E (RUN_E2E=1) verifies the routing against the REAL collections: a default
|
||||
// Search() of an in-scope query must hit the KB-2026.1 slice (WP248/MaschVO live there but NOT in
|
||||
// the broad CE pool = clean discriminator); an out-of-scope query stays on CE.
|
||||
func TestKBScopeRoutingE2E(t *testing.T) {
|
||||
if os.Getenv("RUN_E2E") != "1" {
|
||||
t.Skip("set RUN_E2E=1 + QDRANT_URL/OLLAMA_URL/QDRANT_API_KEY")
|
||||
}
|
||||
c := NewLegalRAGClient()
|
||||
cases := []struct {
|
||||
q string
|
||||
wantToken string // expected in top-8 when routed to the slice
|
||||
wantInKB bool
|
||||
}{
|
||||
{"Welche neun Kriterien nennt WP248 fuer ein voraussichtlich hohes Risiko?", "WP248", true},
|
||||
{"Welche grundlegenden Sicherheits- und Gesundheitsschutzanforderungen enthaelt Anhang III der Maschinenverordnung?", "MASCH", true},
|
||||
{"Wie greifen CRA und Maschinenverordnung bei einer vernetzten Maschine ineinander?", "MASCH", true},
|
||||
{"Was sagt NIST SP 800-53 zu Access Control?", "", false},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
routed := c.resolveCollection(tc.q, "")
|
||||
res, err := c.Search(context.Background(), tc.q, nil, 8)
|
||||
if err != nil {
|
||||
t.Fatalf("%q: %v", tc.q, err)
|
||||
}
|
||||
codes := map[string]bool{}
|
||||
for _, r := range res {
|
||||
codes[strings.ToUpper(r.RegulationCode)] = true
|
||||
}
|
||||
hit := false
|
||||
if tc.wantToken != "" {
|
||||
for cd := range codes {
|
||||
if strings.Contains(cd, tc.wantToken) {
|
||||
hit = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
col := make([]string, 0, len(codes))
|
||||
for cd := range codes {
|
||||
col = append(col, cd)
|
||||
}
|
||||
fmt.Printf("inKB=%-5v routed=%-16s wantTok=%-6s found=%-5v | %v\n", tc.wantInKB, routed, tc.wantToken, hit, col)
|
||||
if tc.wantInKB && tc.wantToken != "" && !hit {
|
||||
t.Errorf("%q routed to %s but %s not in top-8 (slice not active?)", tc.q, routed, tc.wantToken)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -21,6 +21,12 @@ type LegalRAGClient struct {
|
||||
textIndexEnsured map[string]bool
|
||||
hybridEnabled bool
|
||||
graphEnabled bool
|
||||
|
||||
// Blue-Green „authoritative slice promotion" (additiv, KEIN CE-Ersatz): faellt eine Query
|
||||
// in den KB-2026.1-Scope (DP/CRA/MaschVO/NIS2/DataAct/DORA/AIAct + EDPB/DSK-Guidance), wird
|
||||
// die hochwertige Slice-Collection abgefragt; sonst bleibt der breite Default (bp_compliance_ce).
|
||||
kbSliceCollection string
|
||||
kbScopeRoutingEnabled bool
|
||||
}
|
||||
|
||||
// NewLegalRAGClient creates a new Legal RAG client using Ollama bge-m3 embeddings.
|
||||
@@ -45,15 +51,25 @@ func NewLegalRAGClient() *LegalRAGClient {
|
||||
// zur Begruendung/Vollstaendigkeit genutzt, nicht zur Pool-Expansion (Default).
|
||||
graphEnabled := os.Getenv("RAG_GRAPH_EXPANSION") == "true"
|
||||
|
||||
// KB-2026.1 authoritative slice (Blue-Green, additiv). Routing default AN; Rollback ohne
|
||||
// Redeploy ueber RAG_KB_SCOPE_ROUTING=false (dann faellt alles auf den CE-Default zurueck).
|
||||
kbSlice := os.Getenv("RAG_KB_SLICE_COLLECTION")
|
||||
if kbSlice == "" {
|
||||
kbSlice = "kb_2026_1_build"
|
||||
}
|
||||
kbScopeRouting := os.Getenv("RAG_KB_SCOPE_ROUTING") != "false"
|
||||
|
||||
return &LegalRAGClient{
|
||||
qdrantURL: qdrantURL,
|
||||
qdrantAPIKey: qdrantAPIKey,
|
||||
ollamaURL: ollamaURL,
|
||||
embeddingModel: "bge-m3",
|
||||
collection: "bp_compliance_ce",
|
||||
textIndexEnsured: make(map[string]bool),
|
||||
hybridEnabled: hybridEnabled,
|
||||
graphEnabled: graphEnabled,
|
||||
qdrantURL: qdrantURL,
|
||||
qdrantAPIKey: qdrantAPIKey,
|
||||
ollamaURL: ollamaURL,
|
||||
embeddingModel: "bge-m3",
|
||||
collection: "bp_compliance_ce",
|
||||
textIndexEnsured: make(map[string]bool),
|
||||
hybridEnabled: hybridEnabled,
|
||||
graphEnabled: graphEnabled,
|
||||
kbSliceCollection: kbSlice,
|
||||
kbScopeRoutingEnabled: kbScopeRouting,
|
||||
httpClient: &http.Client{
|
||||
Timeout: 60 * time.Second,
|
||||
},
|
||||
@@ -63,15 +79,13 @@ func NewLegalRAGClient() *LegalRAGClient {
|
||||
// SearchCollection queries a specific Qdrant collection for relevant passages.
|
||||
// If collection is empty, it falls back to the default collection (bp_compliance_ce).
|
||||
func (c *LegalRAGClient) SearchCollection(ctx context.Context, collection string, query string, regulationIDs []string, topK int) ([]LegalSearchResult, error) {
|
||||
if collection == "" {
|
||||
collection = c.collection
|
||||
}
|
||||
return c.searchInternal(ctx, collection, query, regulationIDs, topK)
|
||||
return c.searchInternal(ctx, c.resolveCollection(query, collection), query, regulationIDs, topK)
|
||||
}
|
||||
|
||||
// Search queries the compliance CE corpus for relevant passages.
|
||||
// Search queries the compliance corpus for relevant passages. The target collection is resolved by
|
||||
// the Blue-Green slice routing: the KB-2026.1 slice for in-scope queries, else the broad CE default.
|
||||
func (c *LegalRAGClient) Search(ctx context.Context, query string, regulationIDs []string, topK int) ([]LegalSearchResult, error) {
|
||||
return c.searchInternal(ctx, c.collection, query, regulationIDs, topK)
|
||||
return c.searchInternal(ctx, c.resolveCollection(query, ""), query, regulationIDs, topK)
|
||||
}
|
||||
|
||||
// searchInternal performs the actual search against a given collection.
|
||||
|
||||
Reference in New Issue
Block a user