From 4818fc51c2b4749a5a9338128355a51d4533a833 Mon Sep 17 00:00:00 2001
From: Benjamin Admin <benjaminadmin@MacBook-Pro.local>
Date: Mon, 29 Jun 2026 14:44:53 +0200
Subject: [PATCH] fix(ucca): guidance-intent erkennt direkt benannte
 Guidance-Dokumente

queryWantsGuidance verfehlte rein dokument-namige Fragen ("Welche Kriterien
nennt WP248 ...", "Was sagt GL 07/2020 ..."): guidanceIntentSignals enthielt
zwar Herausgeber (edpb/dsk/enisa) und Verben (empfiehlt/laut), aber keine
Working-Paper-/Guideline-Identifier. Dadurch loeste der Authority-Lift nicht
aus -> binding_law (bzw. im homogenen Korpus sogar off-domain MaschVO/CRA)
verdraengte die Guidance aus den Top-K.

Fix: WP2xx / GL 0x / "working paper" als Guidance-Signal ergaenzt. Generisch
ueber alle WP-/GL-Dokumente, KEINE doc-spezifische Regel (Query->Intent, nicht
Query->konkretes Dokument).

Validierung (homogener Build-Korpus, bge-m3 + Qdrant Cosine):
- 10 Hard Cases: 8/10 -> 10/10 (WP248/WP260 zurueck in Top-8)
- ComplianceBench-100: 0/100 Norm-Fragen veraendert (Freeze-Regression gruen),
  18/18 Guidance-Intent-Fragen verbessert (binding -> korrekte Guidance-Klasse)
- Hybrid == Dense (Keyword-RRF war NICHT die Ursache, der Lift-Gate war es)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../internal/ucca/authority_rerank.go         |   4 +
 .../internal/ucca/guidance_fix_e2e_test.go    | 105 ++++++++++++++++++
 2 files changed, 109 insertions(+)
 create mode 100644 ai-compliance-sdk/internal/ucca/guidance_fix_e2e_test.go

diff --git a/ai-compliance-sdk/internal/ucca/authority_rerank.go b/ai-compliance-sdk/internal/ucca/authority_rerank.go
index 79042937..e6a30b85 100644
--- a/ai-compliance-sdk/internal/ucca/authority_rerank.go
+++ b/ai-compliance-sdk/internal/ucca/authority_rerank.go
@@ -28,6 +28,10 @@ var guidanceIntentSignals = []string{
 	"edpb", "europäischer datenschutzausschuss", "europaeischer datenschutzausschuss",
 	"dsk", "enisa", "bsi", "leitlinie", "guideline", "orientierungshilfe",
 	"auslegung", "empfiehlt", "empfehlung", "sagt", "laut",
+	// Guidance-Dokumente direkt benannt (WP29-Working-Papers WP2xx + EDPB-Guidelines "GL 0x/20xx"):
+	// "Welche Kriterien nennt WP248 ..." / "Was sagt GL 07/2020 ..." tragen Guidance-Intent ohne
+	// die Verben oben. Fix: queryWantsGuidance verfehlte rein-doc-namige Formulierungen.
+	"wp2", "wp 2", "wp29", "working paper", "gl 0",
 }
 
 // controlIntentSignals mark a query that asks HOW to implement / which controls or
diff --git a/ai-compliance-sdk/internal/ucca/guidance_fix_e2e_test.go b/ai-compliance-sdk/internal/ucca/guidance_fix_e2e_test.go
new file mode 100644
index 00000000..d97c6e16
--- /dev/null
+++ b/ai-compliance-sdk/internal/ucca/guidance_fix_e2e_test.go
@@ -0,0 +1,105 @@
+package ucca
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"os"
+	"strings"
+	"testing"
+)
+
+// TestGuidanceFixE2E runs the 10 hard cases through the REAL LegalRAGClient against the
+// homogeneous build collection. Guarded by RUN_E2E=1. Reports the rank of the expected
+// document within the returned top-K — proving whether the guidanceIntentSignals fix lifts
+// guidance (WP248/WP260) back into the prompt. Toggle RAG_HYBRID_SEARCH to compare modes.
+func TestGuidanceFixE2E(t *testing.T) {
+	if os.Getenv("RUN_E2E") != "1" {
+		t.Skip("set RUN_E2E=1 + QDRANT_URL/OLLAMA_URL to run")
+	}
+	c := NewLegalRAGClient()
+	coll := os.Getenv("E2E_COLLECTION")
+	if coll == "" {
+		coll = "bp_compliance_kb_2026_1_build"
+	}
+	cases := []struct{ id, q, expect string }{
+		{"GQ-0012", "Welche neun Kriterien nennt WP248 fuer ein voraussichtlich hohes Risiko?", "WP248"},
+		{"GQ-0013", "Ab wie vielen der WP248-Kriterien ist in der Regel eine Datenschutz-Folgenabschaetzung erforderlich?", "WP248"},
+		{"GQ-0023", "Welche Anforderungen stellt WP260 an eine klare und einfache Sprache?", "WP260"},
+		{"GQ-0024", "Was versteht WP260 unter Layered Privacy Notices?", "WP260"},
+		{"GQ-0054", "Welche grundlegenden Cybersecurity-Anforderungen enthaelt Annex I Part I?", "CRA"},
+		{"GQ-0060", "Wann muss eine aktiv ausgenutzte Schwachstelle gemeldet werden?", "CRA"},
+		{"GQ-0074", "Benoetigt eine SPS ohne Netzwerkanschluss eine CRA-Bewertung?", "CRA"},
+		{"GQ-0079", "Welche grundlegenden Sicherheits- und Gesundheitsschutzanforderungen enthaelt Anhang III?", "MASCHVO"},
+		{"GQ-0091", "Welche Anforderungen gelten fuer wesentliche Veraenderungen einer Maschine?", "MASCHVO"},
+		{"GQ-0070", "Wie greifen CRA und Maschinenverordnung bei einer vernetzten Maschine ineinander?", "CRA"},
+	}
+	fmt.Printf("\n### hybrid=%v collection=%s\n", os.Getenv("RAG_HYBRID_SEARCH") != "false", coll)
+	for _, tc := range cases {
+		res, err := c.SearchCollection(context.Background(), coll, tc.q, nil, 8)
+		if err != nil {
+			t.Fatalf("%s: %v", tc.id, err)
+		}
+		rank := -1
+		for i, r := range res {
+			lab := strings.ToUpper(r.RegulationCode + " " + r.ArticleLabel)
+			if strings.Contains(lab, tc.expect) {
+				rank = i + 1
+				break
+			}
+		}
+		top1 := ""
+		if len(res) > 0 {
+			top1 = res[0].RegulationCode + " (" + res[0].SourceClass + ")"
+		}
+		status := "FAIL"
+		if rank > 0 {
+			status = "OK"
+		}
+		fmt.Printf("%-9s expect=%-8s rank_in_top8=%-2d  %-5s  top1=%s\n", tc.id, tc.expect, rank, status, top1)
+	}
+}
+
+// TestBenchE2E runs the FULL ComplianceBench (E2E_BENCH_FILE) through the real client and
+// prints, per question, the ordered top-8 regulation codes. Diffing BEFORE vs AFTER proves
+// the fix only perturbs guidance-intent queries (gated on queryWantsGuidance) and never the
+// norm questions — the Knowledge-Freeze regression guard.
+func TestBenchE2E(t *testing.T) {
+	if os.Getenv("RUN_E2E") != "1" {
+		t.Skip("set RUN_E2E=1 + E2E_BENCH_FILE")
+	}
+	path := os.Getenv("E2E_BENCH_FILE")
+	if path == "" {
+		t.Skip("E2E_BENCH_FILE not set")
+	}
+	raw, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatal(err)
+	}
+	var bench struct {
+		Questions []struct {
+			ID       string `json:"id"`
+			Question string `json:"question"`
+		} `json:"questions"`
+	}
+	if err := json.Unmarshal(raw, &bench); err != nil {
+		t.Fatal(err)
+	}
+	c := NewLegalRAGClient()
+	coll := os.Getenv("E2E_COLLECTION")
+	if coll == "" {
+		coll = "bp_compliance_kb_2026_1_build"
+	}
+	fmt.Printf("### BENCH n=%d hybrid=%v\n", len(bench.Questions), os.Getenv("RAG_HYBRID_SEARCH") != "false")
+	for _, q := range bench.Questions {
+		res, err := c.SearchCollection(context.Background(), coll, q.Question, nil, 8)
+		if err != nil {
+			t.Fatalf("%s: %v", q.ID, err)
+		}
+		codes := make([]string, 0, len(res))
+		for _, r := range res {
+			codes = append(codes, strings.ReplaceAll(r.RegulationCode, ";", ","))
+		}
+		fmt.Printf("BENCH|%s|%s\n", q.ID, strings.Join(codes, ";"))
+	}
+}