Merge pull request 'fix(ucca): Guidance-Intent für direkt benannte WP/GL-Dokumente' (#42) from fix/legal-rag-guidance-intent into main
CI / branch-name (push) Has been skipped
CI / detect-changes (push) Successful in 7s
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Successful in 6s
CI / validate-canonical-controls (push) Successful in 5s
CI / loc-budget (push) Successful in 20s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Successful in 1m0s
CI / iace-gt-coverage (push) Successful in 17s
CI / test-python-backend (push) Has been skipped
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped

This commit was merged in pull request #42.
This commit is contained in:
2026-06-29 18:42:27 +00:00
2 changed files with 109 additions and 0 deletions
@@ -28,6 +28,10 @@ var guidanceIntentSignals = []string{
"edpb", "europäischer datenschutzausschuss", "europaeischer datenschutzausschuss",
"dsk", "enisa", "bsi", "leitlinie", "guideline", "orientierungshilfe",
"auslegung", "empfiehlt", "empfehlung", "sagt", "laut",
// Guidance-Dokumente direkt benannt (WP29-Working-Papers WP2xx + EDPB-Guidelines "GL 0x/20xx"):
// "Welche Kriterien nennt WP248 ..." / "Was sagt GL 07/2020 ..." tragen Guidance-Intent ohne
// die Verben oben. Fix: queryWantsGuidance verfehlte rein-doc-namige Formulierungen.
"wp2", "wp 2", "wp29", "working paper", "gl 0",
}
// controlIntentSignals mark a query that asks HOW to implement / which controls or
@@ -0,0 +1,105 @@
package ucca
import (
"context"
"encoding/json"
"fmt"
"os"
"strings"
"testing"
)
// TestGuidanceFixE2E runs the 10 hard cases through the REAL LegalRAGClient against the
// homogeneous build collection. Guarded by RUN_E2E=1. Reports the rank of the expected
// document within the returned top-K — proving whether the guidanceIntentSignals fix lifts
// guidance (WP248/WP260) back into the prompt. Toggle RAG_HYBRID_SEARCH to compare modes.
func TestGuidanceFixE2E(t *testing.T) {
if os.Getenv("RUN_E2E") != "1" {
t.Skip("set RUN_E2E=1 + QDRANT_URL/OLLAMA_URL to run")
}
c := NewLegalRAGClient()
coll := os.Getenv("E2E_COLLECTION")
if coll == "" {
coll = "bp_compliance_kb_2026_1_build"
}
cases := []struct{ id, q, expect string }{
{"GQ-0012", "Welche neun Kriterien nennt WP248 fuer ein voraussichtlich hohes Risiko?", "WP248"},
{"GQ-0013", "Ab wie vielen der WP248-Kriterien ist in der Regel eine Datenschutz-Folgenabschaetzung erforderlich?", "WP248"},
{"GQ-0023", "Welche Anforderungen stellt WP260 an eine klare und einfache Sprache?", "WP260"},
{"GQ-0024", "Was versteht WP260 unter Layered Privacy Notices?", "WP260"},
{"GQ-0054", "Welche grundlegenden Cybersecurity-Anforderungen enthaelt Annex I Part I?", "CRA"},
{"GQ-0060", "Wann muss eine aktiv ausgenutzte Schwachstelle gemeldet werden?", "CRA"},
{"GQ-0074", "Benoetigt eine SPS ohne Netzwerkanschluss eine CRA-Bewertung?", "CRA"},
{"GQ-0079", "Welche grundlegenden Sicherheits- und Gesundheitsschutzanforderungen enthaelt Anhang III?", "MASCHVO"},
{"GQ-0091", "Welche Anforderungen gelten fuer wesentliche Veraenderungen einer Maschine?", "MASCHVO"},
{"GQ-0070", "Wie greifen CRA und Maschinenverordnung bei einer vernetzten Maschine ineinander?", "CRA"},
}
fmt.Printf("\n### hybrid=%v collection=%s\n", os.Getenv("RAG_HYBRID_SEARCH") != "false", coll)
for _, tc := range cases {
res, err := c.SearchCollection(context.Background(), coll, tc.q, nil, 8)
if err != nil {
t.Fatalf("%s: %v", tc.id, err)
}
rank := -1
for i, r := range res {
lab := strings.ToUpper(r.RegulationCode + " " + r.ArticleLabel)
if strings.Contains(lab, tc.expect) {
rank = i + 1
break
}
}
top1 := ""
if len(res) > 0 {
top1 = res[0].RegulationCode + " (" + res[0].SourceClass + ")"
}
status := "FAIL"
if rank > 0 {
status = "OK"
}
fmt.Printf("%-9s expect=%-8s rank_in_top8=%-2d %-5s top1=%s\n", tc.id, tc.expect, rank, status, top1)
}
}
// TestBenchE2E runs the FULL ComplianceBench (E2E_BENCH_FILE) through the real client and
// prints, per question, the ordered top-8 regulation codes. Diffing BEFORE vs AFTER proves
// the fix only perturbs guidance-intent queries (gated on queryWantsGuidance) and never the
// norm questions — the Knowledge-Freeze regression guard.
func TestBenchE2E(t *testing.T) {
if os.Getenv("RUN_E2E") != "1" {
t.Skip("set RUN_E2E=1 + E2E_BENCH_FILE")
}
path := os.Getenv("E2E_BENCH_FILE")
if path == "" {
t.Skip("E2E_BENCH_FILE not set")
}
raw, err := os.ReadFile(path)
if err != nil {
t.Fatal(err)
}
var bench struct {
Questions []struct {
ID string `json:"id"`
Question string `json:"question"`
} `json:"questions"`
}
if err := json.Unmarshal(raw, &bench); err != nil {
t.Fatal(err)
}
c := NewLegalRAGClient()
coll := os.Getenv("E2E_COLLECTION")
if coll == "" {
coll = "bp_compliance_kb_2026_1_build"
}
fmt.Printf("### BENCH n=%d hybrid=%v\n", len(bench.Questions), os.Getenv("RAG_HYBRID_SEARCH") != "false")
for _, q := range bench.Questions {
res, err := c.SearchCollection(context.Background(), coll, q.Question, nil, 8)
if err != nil {
t.Fatalf("%s: %v", q.ID, err)
}
codes := make([]string, 0, len(res))
for _, r := range res {
codes = append(codes, strings.ReplaceAll(r.RegulationCode, ";", ","))
}
fmt.Printf("BENCH|%s|%s\n", q.ID, strings.Join(codes, ";"))
}
}