fix(ai-sdk): make interpretation-intent override reliably win

PR #34's fixed +0.25 guidance gain was too small live: for "Was empfiehlt der EDPB zum DSB?" the binding Art. 37 (1.381) still edged out the boosted EDPB guidance (1.348), because the live authority score gives the binding article a topic/domain bonus the (partly English) guidance chunk does not match. Replace the fixed gain with a deterministic lift: a semantically competitive guideline (raw semantic >= best_binding_semantic - 0.05) is lifted just ABOVE the best binding FINAL score (ordered by semantic), so authority/topic/domain bonuses can no longer edge it out. Obligation questions (no intent signal) are untouched — binding stays Top-1; off-topic guidance stays demoted. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
feat(ai-sdk): controlled interpretation-intent guidance override (#34 )
2026-06-24 11:21:59 +02:00 · 2026-06-24 09:01:25 +00:00
2 changed files with 152 additions and 10 deletions
@@ -1,6 +1,9 @@
 package ucca
-import "sort"
+import (
 	"sort"
 	"strings"
 )
 // Re-ranking coefficients (validated in the offline golden harness; Phase A — conservative).
 const (
@@ -13,8 +16,46 @@ const (
 	scopePenalty         = 0.25 // BDSG Teil 3 (law enforcement) on a general DP question
 	topicGain            = 0.18 // amplifier only
 	supersededPenalty    = 0.50 // superseded Alt-Quelle (pre-eu-v1): demoted, nicht versteckt
 	guidanceIntentGain   = 0.10 // epsilon a qualifying guideline is lifted ABOVE the best binding hit
 	guidanceIntentMargin = 0.05 // ...only if the guideline is semantically competitive with binding
 )
 // guidanceIntentSignals mark a query that EXPLICITLY asks for an interpretation /
 // recommendation by a guidance body, rather than for the binding obligation. Only
 // then may a (semantically competitive) guideline outrank the binding norm.
 var guidanceIntentSignals = []string{
 	"edpb", "europäischer datenschutzausschuss", "europaeischer datenschutzausschuss",
 	"dsk", "enisa", "bsi", "leitlinie", "guideline", "orientierungshilfe",
 	"auslegung", "empfiehlt", "empfehlung", "sagt", "laut",
 }
 // queryWantsGuidance reports whether the query explicitly asks for guidance/interpretation.
 func queryWantsGuidance(query string) bool {
 	q := strings.ToLower(query)
 	for _, sig := range guidanceIntentSignals {
 		if strings.Contains(q, sig) {
 			return true
 		}
 	}
 	return false
 }
 // bestBindingSemantic returns the highest RAW semantic score among binding-law
 // results (0 if none / intent not requested). Used as the guard threshold so an
 // off-topic guideline cannot ride the interpretation-intent boost.
 func bestBindingSemantic(results []LegalSearchResult, wantsGuidance bool) float64 {
 	if !wantsGuidance {
 		return 0
 	}
 	best := 0.0
 	for _, r := range results {
 		if r.SourceClass == "binding_law" && r.Score > best {
 			best = r.Score
 		}
 	}
 	return best
 }
 // authorityScore computes the normative relevance of a result for a query. It augments the
 // semantic score with authority/jurisdiction/domain/scope/topic signals. Exposed for tests.
 func authorityScore(query string, r LegalSearchResult, qDomain string, qForeign bool) float64 {
@@ -62,14 +103,43 @@ func rerankByAuthority(query string, results []LegalSearchResult) []LegalSearchR
 	}
 	qDomain := queryDomain(query)
 	qForeign := queryIsForeign(query)
 	wantsGuidance := queryWantsGuidance(query)
 	bestBindingSem := bestBindingSemantic(results, wantsGuidance)
 	out := make([]LegalSearchResult, len(results))
 	copy(out, results)
 	for i := range out {
 		out[i].Score = authorityScore(query, out[i], qDomain, qForeign)
 	}
 	if wantsGuidance {
 		applyGuidanceIntent(out, results, bestBindingSem)
 	}
 	sort.SliceStable(out, func(a, b int) bool {
 		return out[a].Score > out[b].Score
 	})
 	return out
 }
 // applyGuidanceIntent lifts semantically-competitive guidance just ABOVE the best
 // binding hit (ordered by semantic), so an EXPLICIT interpretation question can
 // return guidance Top-1. Obligation questions (no intent → not called) keep
 // binding on top. Guidance below the semantic margin is left untouched, so an
 // off-topic guideline can never ride the override — and the lift is computed from
 // the binding FINAL score, so authority/topic/domain bonuses cannot edge it out.
 func applyGuidanceIntent(out, raw []LegalSearchResult, bestBindingSem float64) {
 	bestBindingFinal := 0.0
 	for i := range out {
 		if out[i].SourceClass == "binding_law" && out[i].Score > bestBindingFinal {
 			bestBindingFinal = out[i].Score
 		}
 	}
 	for i := range out {
 		if out[i].SourceClass != "supervisory_guidance" || raw[i].Score < bestBindingSem-guidanceIntentMargin {
 			continue
 		}
 		lifted := bestBindingFinal + guidanceIntentGain + (raw[i].Score - bestBindingSem)
 		if lifted > out[i].Score {
 			out[i].Score = lifted
 		}
 	}
 }
@@ -0,0 +1,72 @@
 package ucca
 import "testing"
 func intentRes(reg, sourceClass string, sem float64, weight int) LegalSearchResult {
 	return LegalSearchResult{
 		RegulationShort: reg, SourceClass: sourceClass, Score: sem,
 		AuthorityWeight: weight, Jurisdiction: "EU",
 	}
 }
 func TestQueryWantsGuidance(t *testing.T) {
 	wants := []string{
 		"Was empfiehlt der EDPB zum DSB?",
 		"Was sagt die ENISA zu Security Updates?",
 		"laut DSK ...",
 		"Orientierungshilfe zur DSFA",
 		"Welche BSI-Empfehlung gilt?",
 		"Auslegung der Aufsichtsbehörde",
 	}
 	plain := []string{
 		"Ab wann braucht man einen Datenschutzbeauftragten?",
 		"Welche Anforderungen bestehen an Security Updates?",
 	}
 	for _, q := range wants {
 		if !queryWantsGuidance(q) {
 			t.Errorf("should detect interpretation intent: %q", q)
 		}
 	}
 	for _, q := range plain {
 		if queryWantsGuidance(q) {
 			t.Errorf("should NOT detect intent (norm question): %q", q)
 		}
 	}
 }
 func TestRerank_NormQuestion_BindingStaysTop(t *testing.T) {
 	// No intent signal → binding wins even though guidance is semantically higher.
 	results := []LegalSearchResult{
 		intentRes("EDPB DPO", "supervisory_guidance", 0.64, 70),
 		intentRes("DSGVO", "binding_law", 0.58, 100),
 	}
 	out := rerankByAuthority("Ab wann braucht man einen Datenschutzbeauftragten?", results)
 	if out[0].SourceClass != "binding_law" {
 		t.Errorf("norm question: binding must stay Top-1, got %s", out[0].SourceClass)
 	}
 }
 func TestRerank_InterpretationQuestion_GuidanceMayWin(t *testing.T) {
 	// Explicit intent + guidance semantically competitive → guidance wins.
 	results := []LegalSearchResult{
 		intentRes("EDPB DPO", "supervisory_guidance", 0.64, 70),
 		intentRes("DSGVO", "binding_law", 0.58, 100),
 	}
 	out := rerankByAuthority("Was empfiehlt der EDPB zum Datenschutzbeauftragten?", results)
 	if out[0].SourceClass != "supervisory_guidance" {
 		t.Errorf("interpretation question: guidance should win Top-1, got %s", out[0].SourceClass)
 	}
 }
 func TestRerank_OffTopicGuidance_BlockedByGuard(t *testing.T) {
 	// Intent present, but guidance semantic is far below the best binding hit →
 	// the margin guard keeps binding on top (no off-topic guideline override).
 	results := []LegalSearchResult{
 		intentRes("EDPB DPO", "supervisory_guidance", 0.40, 70),
 		intentRes("DSGVO", "binding_law", 0.58, 100),
 	}
 	out := rerankByAuthority("Was empfiehlt der EDPB zum Datenschutzbeauftragten?", results)
 	if out[0].SourceClass != "binding_law" {
 		t.Errorf("off-topic guidance must not win even with intent, got %s", out[0].SourceClass)
 	}
 }
Author	SHA1	Message	Date
Benjamin Admin	24499a25e9	fix(ai-sdk): make interpretation-intent override reliably win CI / detect-changes (pull_request) Successful in 5s Details CI / branch-name (pull_request) Successful in 1s Details CI / guardrail-integrity (pull_request) Successful in 4s Details CI / secret-scan (pull_request) Successful in 7s Details CI / dep-audit (pull_request) Failing after 53s Details CI / sbom-scan (pull_request) Failing after 51s Details CI / build-sha-integrity (pull_request) Successful in 5s Details CI / validate-canonical-controls (pull_request) Successful in 3s Details CI / loc-budget (pull_request) Successful in 15s Details CI / go-lint (pull_request) Successful in 53s Details CI / python-lint (pull_request) Failing after 15s Details CI / nodejs-lint (pull_request) Failing after 1m13s Details CI / nodejs-build (pull_request) Successful in 3m8s Details CI / test-go (pull_request) Successful in 58s Details CI / iace-gt-coverage (pull_request) Successful in 15s Details CI / test-python-backend (pull_request) Successful in 27s Details CI / test-python-document-crawler (pull_request) Successful in 19s Details CI / test-python-dsms-gateway (pull_request) Successful in 13s Details PR #34's fixed +0.25 guidance gain was too small live: for "Was empfiehlt der EDPB zum DSB?" the binding Art. 37 (1.381) still edged out the boosted EDPB guidance (1.348), because the live authority score gives the binding article a topic/domain bonus the (partly English) guidance chunk does not match. Replace the fixed gain with a deterministic lift: a semantically competitive guideline (raw semantic >= best_binding_semantic - 0.05) is lifted just ABOVE the best binding FINAL score (ordered by semantic), so authority/topic/domain bonuses can no longer edge it out. Obligation questions (no intent signal) are untouched — binding stays Top-1; off-topic guidance stays demoted. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-06-24 11:21:59 +02:00
Benjamin_Boenisch	f11b2e035f	feat(ai-sdk): controlled interpretation-intent guidance override (#34 ) CI / detect-changes (push) Successful in 5s Details CI / branch-name (push) Has been skipped Details CI / guardrail-integrity (push) Has been skipped Details CI / secret-scan (push) Has been skipped Details CI / dep-audit (push) Has been skipped Details CI / sbom-scan (push) Has been skipped Details CI / build-sha-integrity (push) Successful in 5s Details CI / validate-canonical-controls (push) Successful in 3s Details CI / loc-budget (push) Successful in 17s Details CI / go-lint (push) Has been skipped Details CI / python-lint (push) Has been skipped Details CI / nodejs-lint (push) Has been skipped Details CI / nodejs-build (push) Has been skipped Details CI / test-go (push) Successful in 57s Details CI / iace-gt-coverage (push) Successful in 15s Details CI / test-python-backend (push) Has been skipped Details CI / test-python-document-crawler (push) Has been skipped Details CI / test-python-dsms-gateway (push) Has been skipped Details	2026-06-24 09:01:25 +00:00