e2c74fd243
CI / detect-changes (pull_request) Successful in 12s
CI / branch-name (pull_request) Successful in 1s
CI / guardrail-integrity (pull_request) Successful in 9s
CI / secret-scan (pull_request) Successful in 10s
CI / dep-audit (pull_request) Failing after 56s
CI / sbom-scan (pull_request) Failing after 1m1s
CI / build-sha-integrity (pull_request) Successful in 6s
CI / validate-canonical-controls (pull_request) Successful in 3s
CI / loc-budget (pull_request) Successful in 18s
CI / go-lint (pull_request) Successful in 52s
CI / python-lint (pull_request) Failing after 15s
CI / nodejs-lint (pull_request) Failing after 1m12s
CI / nodejs-build (pull_request) Successful in 3m4s
CI / test-go (pull_request) Successful in 1m2s
CI / iace-gt-coverage (pull_request) Successful in 19s
CI / test-python-backend (pull_request) Successful in 27s
CI / test-python-document-crawler (pull_request) Successful in 19s
CI / test-python-dsms-gateway (pull_request) Successful in 15s
Additiv (KEIN CE-Ersatz): faellt eine Query in den KB-2026.1-Scope (DP/CRA/MaschVO/ NIS2/DataAct/DORA/AIAct + EDPB/DSK-Guidance), wird die hochwertige Slice-Collection `kb_2026_1_build` abgefragt; sonst bleibt der breite Default `bp_compliance_ce`. Damit werden die Guidance-Intent- + Multi-Reg-Fixes (PR #42/#43) fuer den Slice LIVE, Broad-Corpus (OWASP/NIST/ENISA/IFRS/ISO) unangetastet -> 0 Regressionen by construction. - resolveCollection(query, requested): explizit angefragte Collection unveraendert; Default-Request -> Slice bei inKBScope, sonst CE. Env RAG_KB_SCOPE_ROUTING=false = Rollback ohne Redeploy; RAG_KB_SLICE_COLLECTION ueberschreibt den Slice-Namen. - inKBScope: detectRegulations (in-Slice-Regelwerke) + DP-Guidance-Marker (edpb/dsk/wp/gl) + DP/Compliance-Topics. Bewusst NICHT die generischen Verben aus guidanceIntentSignals (sagt/laut) und NICHT enisa/bsi/nist/owasp (die liegen in CE) -> konservativ, in-scope->Slice. Validierung: Unit (Scoping + resolveCollection); dev-e2e (RUN_E2E, geroutetes Search() gegen dev): WP248/MaschVO/CRA+MaschVO -> Slice (Treffer da, fehlen in dev-ce); NIST -> CE (NIST-Treffer). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
102 lines
3.5 KiB
Go
102 lines
3.5 KiB
Go
package ucca
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"strings"
|
|
"testing"
|
|
)
|
|
|
|
func TestInKBScope(t *testing.T) {
|
|
inScope := []string{
|
|
"Welche neun Kriterien nennt WP248 fuer ein hohes Risiko?",
|
|
"Wie greifen CRA und Maschinenverordnung bei einer vernetzten Maschine ineinander?",
|
|
"Wann ist eine Datenschutz-Folgenabschaetzung erforderlich?",
|
|
"Welche Anforderungen stellt die DSGVO an die Einwilligung?",
|
|
"Brauche ich einen Datenschutzbeauftragten?",
|
|
"Wann muss eine aktiv ausgenutzte Schwachstelle gemeldet werden?",
|
|
}
|
|
outScope := []string{
|
|
"Welche OWASP-Kontrollen gibt es fuer Authentifizierung?",
|
|
"Was sagt NIST SP 800-53 zu Access Control?",
|
|
"Wie funktioniert ISO 27001 Zertifizierung?",
|
|
"Welche IFRS-Standards gelten fuer Leasing?",
|
|
}
|
|
for _, q := range inScope {
|
|
if !inKBScope(q) {
|
|
t.Errorf("inKBScope(%q) = false, want true", q)
|
|
}
|
|
}
|
|
for _, q := range outScope {
|
|
if inKBScope(q) {
|
|
t.Errorf("inKBScope(%q) = true, want false", q)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestResolveCollection(t *testing.T) {
|
|
c := &LegalRAGClient{collection: "bp_compliance_ce", kbSliceCollection: "kb_2026_1_build", kbScopeRoutingEnabled: true}
|
|
if got := c.resolveCollection("Welche Kriterien nennt WP248?", ""); got != "kb_2026_1_build" {
|
|
t.Errorf("in-scope default -> %s, want kb_2026_1_build", got)
|
|
}
|
|
if got := c.resolveCollection("Was sagt NIST SP 800-53?", ""); got != "bp_compliance_ce" {
|
|
t.Errorf("out-of-scope default -> %s, want bp_compliance_ce", got)
|
|
}
|
|
if got := c.resolveCollection("Welche Kriterien nennt WP248?", "explicit_coll"); got != "explicit_coll" {
|
|
t.Errorf("explicit request must be honoured -> %s", got)
|
|
}
|
|
c.kbScopeRoutingEnabled = false
|
|
if got := c.resolveCollection("Welche Kriterien nennt WP248?", ""); got != "bp_compliance_ce" {
|
|
t.Errorf("disabled routing -> %s, want bp_compliance_ce", got)
|
|
}
|
|
}
|
|
|
|
// TestKBScopeRoutingE2E (RUN_E2E=1) verifies the routing against the REAL collections: a default
|
|
// Search() of an in-scope query must hit the KB-2026.1 slice (WP248/MaschVO live there but NOT in
|
|
// the broad CE pool = clean discriminator); an out-of-scope query stays on CE.
|
|
func TestKBScopeRoutingE2E(t *testing.T) {
|
|
if os.Getenv("RUN_E2E") != "1" {
|
|
t.Skip("set RUN_E2E=1 + QDRANT_URL/OLLAMA_URL/QDRANT_API_KEY")
|
|
}
|
|
c := NewLegalRAGClient()
|
|
cases := []struct {
|
|
q string
|
|
wantToken string // expected in top-8 when routed to the slice
|
|
wantInKB bool
|
|
}{
|
|
{"Welche neun Kriterien nennt WP248 fuer ein voraussichtlich hohes Risiko?", "WP248", true},
|
|
{"Welche grundlegenden Sicherheits- und Gesundheitsschutzanforderungen enthaelt Anhang III der Maschinenverordnung?", "MASCH", true},
|
|
{"Wie greifen CRA und Maschinenverordnung bei einer vernetzten Maschine ineinander?", "MASCH", true},
|
|
{"Was sagt NIST SP 800-53 zu Access Control?", "", false},
|
|
}
|
|
for _, tc := range cases {
|
|
routed := c.resolveCollection(tc.q, "")
|
|
res, err := c.Search(context.Background(), tc.q, nil, 8)
|
|
if err != nil {
|
|
t.Fatalf("%q: %v", tc.q, err)
|
|
}
|
|
codes := map[string]bool{}
|
|
for _, r := range res {
|
|
codes[strings.ToUpper(r.RegulationCode)] = true
|
|
}
|
|
hit := false
|
|
if tc.wantToken != "" {
|
|
for cd := range codes {
|
|
if strings.Contains(cd, tc.wantToken) {
|
|
hit = true
|
|
break
|
|
}
|
|
}
|
|
}
|
|
col := make([]string, 0, len(codes))
|
|
for cd := range codes {
|
|
col = append(col, cd)
|
|
}
|
|
fmt.Printf("inKB=%-5v routed=%-16s wantTok=%-6s found=%-5v | %v\n", tc.wantInKB, routed, tc.wantToken, hit, col)
|
|
if tc.wantInKB && tc.wantToken != "" && !hit {
|
|
t.Errorf("%q routed to %s but %s not in top-8 (slice not active?)", tc.q, routed, tc.wantToken)
|
|
}
|
|
}
|
|
}
|