fix(ucca): guidance-intent erkennt direkt benannte Guidance-Dokumente
CI / detect-changes (pull_request) Successful in 7s
CI / branch-name (pull_request) Successful in 1s
CI / guardrail-integrity (pull_request) Successful in 5s
CI / secret-scan (pull_request) Successful in 9s
CI / dep-audit (pull_request) Failing after 56s
CI / sbom-scan (pull_request) Failing after 58s
CI / build-sha-integrity (pull_request) Successful in 7s
CI / validate-canonical-controls (pull_request) Successful in 7s
CI / loc-budget (pull_request) Successful in 21s
CI / go-lint (pull_request) Successful in 48s
CI / python-lint (pull_request) Failing after 17s
CI / nodejs-lint (pull_request) Failing after 1m9s
CI / nodejs-build (pull_request) Successful in 3m2s
CI / test-go (pull_request) Successful in 1m3s
CI / iace-gt-coverage (pull_request) Successful in 18s
CI / test-python-backend (pull_request) Successful in 28s
CI / test-python-document-crawler (pull_request) Successful in 14s
CI / test-python-dsms-gateway (pull_request) Successful in 11s
CI / detect-changes (pull_request) Successful in 7s
CI / branch-name (pull_request) Successful in 1s
CI / guardrail-integrity (pull_request) Successful in 5s
CI / secret-scan (pull_request) Successful in 9s
CI / dep-audit (pull_request) Failing after 56s
CI / sbom-scan (pull_request) Failing after 58s
CI / build-sha-integrity (pull_request) Successful in 7s
CI / validate-canonical-controls (pull_request) Successful in 7s
CI / loc-budget (pull_request) Successful in 21s
CI / go-lint (pull_request) Successful in 48s
CI / python-lint (pull_request) Failing after 17s
CI / nodejs-lint (pull_request) Failing after 1m9s
CI / nodejs-build (pull_request) Successful in 3m2s
CI / test-go (pull_request) Successful in 1m3s
CI / iace-gt-coverage (pull_request) Successful in 18s
CI / test-python-backend (pull_request) Successful in 28s
CI / test-python-document-crawler (pull_request) Successful in 14s
CI / test-python-dsms-gateway (pull_request) Successful in 11s
queryWantsGuidance verfehlte rein dokument-namige Fragen ("Welche Kriterien
nennt WP248 ...", "Was sagt GL 07/2020 ..."): guidanceIntentSignals enthielt
zwar Herausgeber (edpb/dsk/enisa) und Verben (empfiehlt/laut), aber keine
Working-Paper-/Guideline-Identifier. Dadurch loeste der Authority-Lift nicht
aus -> binding_law (bzw. im homogenen Korpus sogar off-domain MaschVO/CRA)
verdraengte die Guidance aus den Top-K.
Fix: WP2xx / GL 0x / "working paper" als Guidance-Signal ergaenzt. Generisch
ueber alle WP-/GL-Dokumente, KEINE doc-spezifische Regel (Query->Intent, nicht
Query->konkretes Dokument).
Validierung (homogener Build-Korpus, bge-m3 + Qdrant Cosine):
- 10 Hard Cases: 8/10 -> 10/10 (WP248/WP260 zurueck in Top-8)
- ComplianceBench-100: 0/100 Norm-Fragen veraendert (Freeze-Regression gruen),
18/18 Guidance-Intent-Fragen verbessert (binding -> korrekte Guidance-Klasse)
- Hybrid == Dense (Keyword-RRF war NICHT die Ursache, der Lift-Gate war es)
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -28,6 +28,10 @@ var guidanceIntentSignals = []string{
|
||||
"edpb", "europäischer datenschutzausschuss", "europaeischer datenschutzausschuss",
|
||||
"dsk", "enisa", "bsi", "leitlinie", "guideline", "orientierungshilfe",
|
||||
"auslegung", "empfiehlt", "empfehlung", "sagt", "laut",
|
||||
// Guidance-Dokumente direkt benannt (WP29-Working-Papers WP2xx + EDPB-Guidelines "GL 0x/20xx"):
|
||||
// "Welche Kriterien nennt WP248 ..." / "Was sagt GL 07/2020 ..." tragen Guidance-Intent ohne
|
||||
// die Verben oben. Fix: queryWantsGuidance verfehlte rein-doc-namige Formulierungen.
|
||||
"wp2", "wp 2", "wp29", "working paper", "gl 0",
|
||||
}
|
||||
|
||||
// controlIntentSignals mark a query that asks HOW to implement / which controls or
|
||||
|
||||
@@ -0,0 +1,105 @@
|
||||
package ucca
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestGuidanceFixE2E runs the 10 hard cases through the REAL LegalRAGClient against the
|
||||
// homogeneous build collection. Guarded by RUN_E2E=1. Reports the rank of the expected
|
||||
// document within the returned top-K — proving whether the guidanceIntentSignals fix lifts
|
||||
// guidance (WP248/WP260) back into the prompt. Toggle RAG_HYBRID_SEARCH to compare modes.
|
||||
func TestGuidanceFixE2E(t *testing.T) {
|
||||
if os.Getenv("RUN_E2E") != "1" {
|
||||
t.Skip("set RUN_E2E=1 + QDRANT_URL/OLLAMA_URL to run")
|
||||
}
|
||||
c := NewLegalRAGClient()
|
||||
coll := os.Getenv("E2E_COLLECTION")
|
||||
if coll == "" {
|
||||
coll = "bp_compliance_kb_2026_1_build"
|
||||
}
|
||||
cases := []struct{ id, q, expect string }{
|
||||
{"GQ-0012", "Welche neun Kriterien nennt WP248 fuer ein voraussichtlich hohes Risiko?", "WP248"},
|
||||
{"GQ-0013", "Ab wie vielen der WP248-Kriterien ist in der Regel eine Datenschutz-Folgenabschaetzung erforderlich?", "WP248"},
|
||||
{"GQ-0023", "Welche Anforderungen stellt WP260 an eine klare und einfache Sprache?", "WP260"},
|
||||
{"GQ-0024", "Was versteht WP260 unter Layered Privacy Notices?", "WP260"},
|
||||
{"GQ-0054", "Welche grundlegenden Cybersecurity-Anforderungen enthaelt Annex I Part I?", "CRA"},
|
||||
{"GQ-0060", "Wann muss eine aktiv ausgenutzte Schwachstelle gemeldet werden?", "CRA"},
|
||||
{"GQ-0074", "Benoetigt eine SPS ohne Netzwerkanschluss eine CRA-Bewertung?", "CRA"},
|
||||
{"GQ-0079", "Welche grundlegenden Sicherheits- und Gesundheitsschutzanforderungen enthaelt Anhang III?", "MASCHVO"},
|
||||
{"GQ-0091", "Welche Anforderungen gelten fuer wesentliche Veraenderungen einer Maschine?", "MASCHVO"},
|
||||
{"GQ-0070", "Wie greifen CRA und Maschinenverordnung bei einer vernetzten Maschine ineinander?", "CRA"},
|
||||
}
|
||||
fmt.Printf("\n### hybrid=%v collection=%s\n", os.Getenv("RAG_HYBRID_SEARCH") != "false", coll)
|
||||
for _, tc := range cases {
|
||||
res, err := c.SearchCollection(context.Background(), coll, tc.q, nil, 8)
|
||||
if err != nil {
|
||||
t.Fatalf("%s: %v", tc.id, err)
|
||||
}
|
||||
rank := -1
|
||||
for i, r := range res {
|
||||
lab := strings.ToUpper(r.RegulationCode + " " + r.ArticleLabel)
|
||||
if strings.Contains(lab, tc.expect) {
|
||||
rank = i + 1
|
||||
break
|
||||
}
|
||||
}
|
||||
top1 := ""
|
||||
if len(res) > 0 {
|
||||
top1 = res[0].RegulationCode + " (" + res[0].SourceClass + ")"
|
||||
}
|
||||
status := "FAIL"
|
||||
if rank > 0 {
|
||||
status = "OK"
|
||||
}
|
||||
fmt.Printf("%-9s expect=%-8s rank_in_top8=%-2d %-5s top1=%s\n", tc.id, tc.expect, rank, status, top1)
|
||||
}
|
||||
}
|
||||
|
||||
// TestBenchE2E runs the FULL ComplianceBench (E2E_BENCH_FILE) through the real client and
|
||||
// prints, per question, the ordered top-8 regulation codes. Diffing BEFORE vs AFTER proves
|
||||
// the fix only perturbs guidance-intent queries (gated on queryWantsGuidance) and never the
|
||||
// norm questions — the Knowledge-Freeze regression guard.
|
||||
func TestBenchE2E(t *testing.T) {
|
||||
if os.Getenv("RUN_E2E") != "1" {
|
||||
t.Skip("set RUN_E2E=1 + E2E_BENCH_FILE")
|
||||
}
|
||||
path := os.Getenv("E2E_BENCH_FILE")
|
||||
if path == "" {
|
||||
t.Skip("E2E_BENCH_FILE not set")
|
||||
}
|
||||
raw, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
var bench struct {
|
||||
Questions []struct {
|
||||
ID string `json:"id"`
|
||||
Question string `json:"question"`
|
||||
} `json:"questions"`
|
||||
}
|
||||
if err := json.Unmarshal(raw, &bench); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
c := NewLegalRAGClient()
|
||||
coll := os.Getenv("E2E_COLLECTION")
|
||||
if coll == "" {
|
||||
coll = "bp_compliance_kb_2026_1_build"
|
||||
}
|
||||
fmt.Printf("### BENCH n=%d hybrid=%v\n", len(bench.Questions), os.Getenv("RAG_HYBRID_SEARCH") != "false")
|
||||
for _, q := range bench.Questions {
|
||||
res, err := c.SearchCollection(context.Background(), coll, q.Question, nil, 8)
|
||||
if err != nil {
|
||||
t.Fatalf("%s: %v", q.ID, err)
|
||||
}
|
||||
codes := make([]string, 0, len(res))
|
||||
for _, r := range res {
|
||||
codes = append(codes, strings.ReplaceAll(r.RegulationCode, ";", ","))
|
||||
}
|
||||
fmt.Printf("BENCH|%s|%s\n", q.ID, strings.Join(codes, ";"))
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user