From f11b2e035f9d194fc6c6a98567205449a20f07aa Mon Sep 17 00:00:00 2001
From: Benjamin_Boenisch <benjamin.boenisch@gmx.de>
Date: Wed, 24 Jun 2026 09:01:25 +0000
Subject: [PATCH] feat(ai-sdk): controlled interpretation-intent guidance
 override (#34)

---
 .../internal/ucca/authority_rerank.go         | 71 +++++++++++++++---
 .../internal/ucca/legal_rag_intent_test.go    | 72 +++++++++++++++++++
 2 files changed, 133 insertions(+), 10 deletions(-)
 create mode 100644 ai-compliance-sdk/internal/ucca/legal_rag_intent_test.go

diff --git a/ai-compliance-sdk/internal/ucca/authority_rerank.go b/ai-compliance-sdk/internal/ucca/authority_rerank.go
index b4eaff03..a4b7d3d6 100644
--- a/ai-compliance-sdk/internal/ucca/authority_rerank.go
+++ b/ai-compliance-sdk/internal/ucca/authority_rerank.go
@@ -1,20 +1,61 @@
 package ucca
 
-import "sort"
+import (
+	"sort"
+	"strings"
+)
 
 // Re-ranking coefficients (validated in the offline golden harness; Phase A — conservative).
 const (
-	authorityCoef     = 0.40 // * weight/100
-	jurisdictionGain  = 0.05 // binding/guidance from DE or EU
-	foreignPenalty    = 0.60 // foreign law on a DE/EU question (demoted, not removed)
-	unknownPenalty    = 0.08
-	domainMatchGain   = 0.15
-	offDomainPenalty  = 0.10 // off-domain binding (demoted, not removed)
-	scopePenalty      = 0.25 // BDSG Teil 3 (law enforcement) on a general DP question
-	topicGain         = 0.18 // amplifier only
-	supersededPenalty = 0.50 // superseded Alt-Quelle (pre-eu-v1): demoted, nicht versteckt
+	authorityCoef        = 0.40 // * weight/100
+	jurisdictionGain     = 0.05 // binding/guidance from DE or EU
+	foreignPenalty       = 0.60 // foreign law on a DE/EU question (demoted, not removed)
+	unknownPenalty       = 0.08
+	domainMatchGain      = 0.15
+	offDomainPenalty     = 0.10 // off-domain binding (demoted, not removed)
+	scopePenalty         = 0.25 // BDSG Teil 3 (law enforcement) on a general DP question
+	topicGain            = 0.18 // amplifier only
+	supersededPenalty    = 0.50 // superseded Alt-Quelle (pre-eu-v1): demoted, nicht versteckt
+	guidanceIntentGain   = 0.25 // controlled guidance override on explicit interpretation intent
+	guidanceIntentMargin = 0.05 // ...only if the guideline is semantically competitive with binding
 )
 
+// guidanceIntentSignals mark a query that EXPLICITLY asks for an interpretation /
+// recommendation by a guidance body, rather than for the binding obligation. Only
+// then may a (semantically competitive) guideline outrank the binding norm.
+var guidanceIntentSignals = []string{
+	"edpb", "europäischer datenschutzausschuss", "europaeischer datenschutzausschuss",
+	"dsk", "enisa", "bsi", "leitlinie", "guideline", "orientierungshilfe",
+	"auslegung", "empfiehlt", "empfehlung", "sagt", "laut",
+}
+
+// queryWantsGuidance reports whether the query explicitly asks for guidance/interpretation.
+func queryWantsGuidance(query string) bool {
+	q := strings.ToLower(query)
+	for _, sig := range guidanceIntentSignals {
+		if strings.Contains(q, sig) {
+			return true
+		}
+	}
+	return false
+}
+
+// bestBindingSemantic returns the highest RAW semantic score among binding-law
+// results (0 if none / intent not requested). Used as the guard threshold so an
+// off-topic guideline cannot ride the interpretation-intent boost.
+func bestBindingSemantic(results []LegalSearchResult, wantsGuidance bool) float64 {
+	if !wantsGuidance {
+		return 0
+	}
+	best := 0.0
+	for _, r := range results {
+		if r.SourceClass == "binding_law" && r.Score > best {
+			best = r.Score
+		}
+	}
+	return best
+}
+
 // authorityScore computes the normative relevance of a result for a query. It augments the
 // semantic score with authority/jurisdiction/domain/scope/topic signals. Exposed for tests.
 func authorityScore(query string, r LegalSearchResult, qDomain string, qForeign bool) float64 {
@@ -62,11 +103,21 @@ func rerankByAuthority(query string, results []LegalSearchResult) []LegalSearchR
 	}
 	qDomain := queryDomain(query)
 	qForeign := queryIsForeign(query)
+	wantsGuidance := queryWantsGuidance(query)
+	bestBindingSem := bestBindingSemantic(results, wantsGuidance)
 
 	out := make([]LegalSearchResult, len(results))
 	copy(out, results)
 	for i := range out {
 		out[i].Score = authorityScore(query, out[i], qDomain, qForeign)
+		// Interpretations-Intent (eng begrenzt): NUR wenn die Query explizit nach
+		// Guidance/Auslegung fragt UND die Leitlinie semantisch konkurrenzfaehig ist
+		// (>= bester binding-Treffer - margin), darf supervisory_guidance die bindende
+		// Norm ueberholen. Sonst bleibt binding > guidance (Normfrage unveraendert).
+		if wantsGuidance && out[i].SourceClass == "supervisory_guidance" &&
+			results[i].Score >= bestBindingSem-guidanceIntentMargin {
+			out[i].Score += guidanceIntentGain
+		}
 	}
 	sort.SliceStable(out, func(a, b int) bool {
 		return out[a].Score > out[b].Score
diff --git a/ai-compliance-sdk/internal/ucca/legal_rag_intent_test.go b/ai-compliance-sdk/internal/ucca/legal_rag_intent_test.go
new file mode 100644
index 00000000..a24ec59c
--- /dev/null
+++ b/ai-compliance-sdk/internal/ucca/legal_rag_intent_test.go
@@ -0,0 +1,72 @@
+package ucca
+
+import "testing"
+
+func intentRes(reg, sourceClass string, sem float64, weight int) LegalSearchResult {
+	return LegalSearchResult{
+		RegulationShort: reg, SourceClass: sourceClass, Score: sem,
+		AuthorityWeight: weight, Jurisdiction: "EU",
+	}
+}
+
+func TestQueryWantsGuidance(t *testing.T) {
+	wants := []string{
+		"Was empfiehlt der EDPB zum DSB?",
+		"Was sagt die ENISA zu Security Updates?",
+		"laut DSK ...",
+		"Orientierungshilfe zur DSFA",
+		"Welche BSI-Empfehlung gilt?",
+		"Auslegung der Aufsichtsbehörde",
+	}
+	plain := []string{
+		"Ab wann braucht man einen Datenschutzbeauftragten?",
+		"Welche Anforderungen bestehen an Security Updates?",
+	}
+	for _, q := range wants {
+		if !queryWantsGuidance(q) {
+			t.Errorf("should detect interpretation intent: %q", q)
+		}
+	}
+	for _, q := range plain {
+		if queryWantsGuidance(q) {
+			t.Errorf("should NOT detect intent (norm question): %q", q)
+		}
+	}
+}
+
+func TestRerank_NormQuestion_BindingStaysTop(t *testing.T) {
+	// No intent signal → binding wins even though guidance is semantically higher.
+	results := []LegalSearchResult{
+		intentRes("EDPB DPO", "supervisory_guidance", 0.64, 70),
+		intentRes("DSGVO", "binding_law", 0.58, 100),
+	}
+	out := rerankByAuthority("Ab wann braucht man einen Datenschutzbeauftragten?", results)
+	if out[0].SourceClass != "binding_law" {
+		t.Errorf("norm question: binding must stay Top-1, got %s", out[0].SourceClass)
+	}
+}
+
+func TestRerank_InterpretationQuestion_GuidanceMayWin(t *testing.T) {
+	// Explicit intent + guidance semantically competitive → guidance wins.
+	results := []LegalSearchResult{
+		intentRes("EDPB DPO", "supervisory_guidance", 0.64, 70),
+		intentRes("DSGVO", "binding_law", 0.58, 100),
+	}
+	out := rerankByAuthority("Was empfiehlt der EDPB zum Datenschutzbeauftragten?", results)
+	if out[0].SourceClass != "supervisory_guidance" {
+		t.Errorf("interpretation question: guidance should win Top-1, got %s", out[0].SourceClass)
+	}
+}
+
+func TestRerank_OffTopicGuidance_BlockedByGuard(t *testing.T) {
+	// Intent present, but guidance semantic is far below the best binding hit →
+	// the margin guard keeps binding on top (no off-topic guideline override).
+	results := []LegalSearchResult{
+		intentRes("EDPB DPO", "supervisory_guidance", 0.40, 70),
+		intentRes("DSGVO", "binding_law", 0.58, 100),
+	}
+	out := rerankByAuthority("Was empfiehlt der EDPB zum Datenschutzbeauftragten?", results)
+	if out[0].SourceClass != "binding_law" {
+		t.Errorf("off-topic guidance must not win even with intent, got %s", out[0].SourceClass)
+	}
+}