Merge pull request 'fix(ucca): Guidance-Intent für direkt benannte WP/GL-Dokumente' (#42) from fix/legal-rag-guidance-intent into main
CI / branch-name (push) Has been skipped
CI / detect-changes (push) Successful in 7s
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Successful in 6s
CI / validate-canonical-controls (push) Successful in 5s
CI / loc-budget (push) Successful in 20s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Successful in 1m0s
CI / iace-gt-coverage (push) Successful in 17s
CI / test-python-backend (push) Has been skipped
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped

This commit was merged in pull request #42.
This commit is contained in:
2026-06-29 18:42:27 +00:00
2 changed files with 109 additions and 0 deletions
@@ -28,6 +28,10 @@ var guidanceIntentSignals = []string{
"edpb", "europäischer datenschutzausschuss", "europaeischer datenschutzausschuss", "edpb", "europäischer datenschutzausschuss", "europaeischer datenschutzausschuss",
"dsk", "enisa", "bsi", "leitlinie", "guideline", "orientierungshilfe", "dsk", "enisa", "bsi", "leitlinie", "guideline", "orientierungshilfe",
"auslegung", "empfiehlt", "empfehlung", "sagt", "laut", "auslegung", "empfiehlt", "empfehlung", "sagt", "laut",
// Guidance-Dokumente direkt benannt (WP29-Working-Papers WP2xx + EDPB-Guidelines "GL 0x/20xx"):
// "Welche Kriterien nennt WP248 ..." / "Was sagt GL 07/2020 ..." tragen Guidance-Intent ohne
// die Verben oben. Fix: queryWantsGuidance verfehlte rein-doc-namige Formulierungen.
"wp2", "wp 2", "wp29", "working paper", "gl 0",
} }
// controlIntentSignals mark a query that asks HOW to implement / which controls or // controlIntentSignals mark a query that asks HOW to implement / which controls or
@@ -0,0 +1,105 @@
package ucca
import (
"context"
"encoding/json"
"fmt"
"os"
"strings"
"testing"
)
// TestGuidanceFixE2E runs the 10 hard cases through the REAL LegalRAGClient against the
// homogeneous build collection. Guarded by RUN_E2E=1. Reports the rank of the expected
// document within the returned top-K — proving whether the guidanceIntentSignals fix lifts
// guidance (WP248/WP260) back into the prompt. Toggle RAG_HYBRID_SEARCH to compare modes.
func TestGuidanceFixE2E(t *testing.T) {
if os.Getenv("RUN_E2E") != "1" {
t.Skip("set RUN_E2E=1 + QDRANT_URL/OLLAMA_URL to run")
}
c := NewLegalRAGClient()
coll := os.Getenv("E2E_COLLECTION")
if coll == "" {
coll = "bp_compliance_kb_2026_1_build"
}
cases := []struct{ id, q, expect string }{
{"GQ-0012", "Welche neun Kriterien nennt WP248 fuer ein voraussichtlich hohes Risiko?", "WP248"},
{"GQ-0013", "Ab wie vielen der WP248-Kriterien ist in der Regel eine Datenschutz-Folgenabschaetzung erforderlich?", "WP248"},
{"GQ-0023", "Welche Anforderungen stellt WP260 an eine klare und einfache Sprache?", "WP260"},
{"GQ-0024", "Was versteht WP260 unter Layered Privacy Notices?", "WP260"},
{"GQ-0054", "Welche grundlegenden Cybersecurity-Anforderungen enthaelt Annex I Part I?", "CRA"},
{"GQ-0060", "Wann muss eine aktiv ausgenutzte Schwachstelle gemeldet werden?", "CRA"},
{"GQ-0074", "Benoetigt eine SPS ohne Netzwerkanschluss eine CRA-Bewertung?", "CRA"},
{"GQ-0079", "Welche grundlegenden Sicherheits- und Gesundheitsschutzanforderungen enthaelt Anhang III?", "MASCHVO"},
{"GQ-0091", "Welche Anforderungen gelten fuer wesentliche Veraenderungen einer Maschine?", "MASCHVO"},
{"GQ-0070", "Wie greifen CRA und Maschinenverordnung bei einer vernetzten Maschine ineinander?", "CRA"},
}
fmt.Printf("\n### hybrid=%v collection=%s\n", os.Getenv("RAG_HYBRID_SEARCH") != "false", coll)
for _, tc := range cases {
res, err := c.SearchCollection(context.Background(), coll, tc.q, nil, 8)
if err != nil {
t.Fatalf("%s: %v", tc.id, err)
}
rank := -1
for i, r := range res {
lab := strings.ToUpper(r.RegulationCode + " " + r.ArticleLabel)
if strings.Contains(lab, tc.expect) {
rank = i + 1
break
}
}
top1 := ""
if len(res) > 0 {
top1 = res[0].RegulationCode + " (" + res[0].SourceClass + ")"
}
status := "FAIL"
if rank > 0 {
status = "OK"
}
fmt.Printf("%-9s expect=%-8s rank_in_top8=%-2d %-5s top1=%s\n", tc.id, tc.expect, rank, status, top1)
}
}
// TestBenchE2E runs the FULL ComplianceBench (E2E_BENCH_FILE) through the real client and
// prints, per question, the ordered top-8 regulation codes. Diffing BEFORE vs AFTER proves
// the fix only perturbs guidance-intent queries (gated on queryWantsGuidance) and never the
// norm questions — the Knowledge-Freeze regression guard.
func TestBenchE2E(t *testing.T) {
if os.Getenv("RUN_E2E") != "1" {
t.Skip("set RUN_E2E=1 + E2E_BENCH_FILE")
}
path := os.Getenv("E2E_BENCH_FILE")
if path == "" {
t.Skip("E2E_BENCH_FILE not set")
}
raw, err := os.ReadFile(path)
if err != nil {
t.Fatal(err)
}
var bench struct {
Questions []struct {
ID string `json:"id"`
Question string `json:"question"`
} `json:"questions"`
}
if err := json.Unmarshal(raw, &bench); err != nil {
t.Fatal(err)
}
c := NewLegalRAGClient()
coll := os.Getenv("E2E_COLLECTION")
if coll == "" {
coll = "bp_compliance_kb_2026_1_build"
}
fmt.Printf("### BENCH n=%d hybrid=%v\n", len(bench.Questions), os.Getenv("RAG_HYBRID_SEARCH") != "false")
for _, q := range bench.Questions {
res, err := c.SearchCollection(context.Background(), coll, q.Question, nil, 8)
if err != nil {
t.Fatalf("%s: %v", q.ID, err)
}
codes := make([]string, 0, len(res))
for _, r := range res {
codes = append(codes, strings.ReplaceAll(r.RegulationCode, ";", ","))
}
fmt.Printf("BENCH|%s|%s\n", q.ID, strings.Join(codes, ";"))
}
}