From 623d80b6c8a02fb019fca4c50404c1d8e76cfa9e Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Fri, 26 Jun 2026 21:28:12 +0200 Subject: [PATCH] fix(ai-sdk): national-law subsidiarity in authority rerank (DSGVO > BDSG for general questions) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The authority reranker (wired in legal_rag_client.go:168) had no national-subsidiarity dimension, so a general BDSG paragraph could outrank the primary DSGVO article. Surfaced by the KB-2026.1 BDSG pilot (dp_05/08/11 + cr_07). - authorityScore: DE binding_law in an EU-primary domain WITHOUT a co-primary topic match -> soft demote (subsidiarityPen 0.18), not exclusion. National special rules stay co-primary via the topic ontology (DSB Art.37+§38, special categories Art.9+§22, ...). - queryDomain: fall back to a regulation-name mention (DSGVO/BDSG/CRA) so a question phrased around the act is domain-scoped even without a topical keyword (fixes cr_07: BDSG Teil-3 §64). - data_protection keyword stem 'auftragsverarbeit' (catches Auftragsverarbeitungsvertrag). Pure ranking logic, no data manipulation; soft demotes keep national rules visible. Build result (DSGVO+BDSG): degraded=0, must_not=0. go build/vet/test ./... green; 6 new table tests. Co-Authored-By: Claude Opus 4.7 --- ai-compliance-sdk/internal/ucca/authority.go | 22 +++++- .../internal/ucca/authority_rerank.go | 10 +++ .../internal/ucca/authority_rerank_test.go | 67 +++++++++++++++++++ 3 files changed, 98 insertions(+), 1 deletion(-) diff --git a/ai-compliance-sdk/internal/ucca/authority.go b/ai-compliance-sdk/internal/ucca/authority.go index 6627b57e..691fea4b 100644 --- a/ai-compliance-sdk/internal/ucca/authority.go +++ b/ai-compliance-sdk/internal/ucca/authority.go @@ -112,7 +112,7 @@ var domains = []domainDef{ {"data_protection", []string{"DSGVO", "GDPR", "BDSG", "EDPB", "DSK", "BfDI", "BayLfD", "DPF"}, []string{"personenbezogen", "betroffene", "datenschutz", "datenschutzbeauftrag", "dsb", - "datenpanne", "auskunft", "loesch", "lösch", "einwilligung", "besondere kategorien", "auftragsverarbeiter"}}, + "datenpanne", "auskunft", "loesch", "lösch", "einwilligung", "besondere kategorien", "auftragsverarbeit"}}, {"cyber", []string{"CRA", "NIS2", "NIS-2", "ENISA", "DORA", "EUCC"}, []string{"security update", "sicherheitsupdate", "sicherheitsaktualisierung", "schwachstelle", "sbom", @@ -126,6 +126,16 @@ var domains = []domainDef{ nil}, } +// euPrimaryDomains are domains whose PRIMARY binding act is an EU regulation/directive +// (DSGVO, CRA/NIS2, AI Act, MaschinenVO). In these domains a NATIONAL implementing law +// (e.g. BDSG) is subsidiary for general questions — see nationalSubsidiarityPenalty. +var euPrimaryDomains = map[string]bool{ + "data_protection": true, + "cyber": true, + "ai": true, + "product_safety": true, +} + func queryDomain(query string) string { ql := strings.ToLower(query) for _, d := range domains { @@ -135,6 +145,16 @@ func queryDomain(query string) string { } } } + // Fallback: an explicit regulation mention (e.g. "DSGVO", "BDSG", "CRA") also signals the + // domain — so a question phrased around the act ("... gilt die DSGVO ...") is scoped even + // without a topical keyword. Keyword match wins first (more specific). + for _, d := range domains { + for _, reg := range d.regs { + if strings.Contains(ql, strings.ToLower(reg)) { + return d.name + } + } + } return "" } diff --git a/ai-compliance-sdk/internal/ucca/authority_rerank.go b/ai-compliance-sdk/internal/ucca/authority_rerank.go index 611b0111..79042937 100644 --- a/ai-compliance-sdk/internal/ucca/authority_rerank.go +++ b/ai-compliance-sdk/internal/ucca/authority_rerank.go @@ -14,6 +14,7 @@ const ( domainMatchGain = 0.15 offDomainPenalty = 0.10 // off-domain binding (demoted, not removed) scopePenalty = 0.25 // BDSG Teil 3 (law enforcement) on a general DP question + subsidiarityPen = 0.18 // national implementing law (BDSG) on a general EU-primary question: SOFT demote, not exclusion topicGain = 0.18 // amplifier only supersededPenalty = 0.50 // superseded Alt-Quelle (pre-eu-v1): demoted, nicht versteckt intentLiftGain = 0.10 // epsilon a qualifying interpretative source is lifted ABOVE the best binding @@ -102,6 +103,15 @@ func authorityScore(query string, r LegalSearchResult, qDomain string, qForeign if qDomain == "data_protection" && scopeClass(r) == "law_enforcement" { score -= scopePenalty } + // Subsidiarity: a national implementing law (DE binding, e.g. BDSG) is subsidiary to the + // primary EU act for GENERAL questions in an EU-primary domain — UNLESS the query hits a + // topic where the national norm is co-primary (DSB §38, special categories §22, ...). The + // topic boost below lifts those; here we only SOFT-demote the non-topic national norm, so + // it stays visible and can still win on a strongly matching topic. No hard exclusion. + if euPrimaryDomains[qDomain] && info.sourceClass == "binding_law" && + info.jurisdiction == "DE" && !resultMatchesTopic(query, r) { + score -= subsidiarityPen + } if resultMatchesTopic(query, r) { score += topicGain // Verstaerker, kein Override } diff --git a/ai-compliance-sdk/internal/ucca/authority_rerank_test.go b/ai-compliance-sdk/internal/ucca/authority_rerank_test.go index 65e5f16c..3da6acf7 100644 --- a/ai-compliance-sdk/internal/ucca/authority_rerank_test.go +++ b/ai-compliance-sdk/internal/ucca/authority_rerank_test.go @@ -72,6 +72,73 @@ func TestRerankByAuthority_Acceptance(t *testing.T) { } }) + // Subsidiarity (KB-2026.1 BDSG-pilot regression): a national implementing § that is NOT a + // co-primary topic norm must not outrank the primary DSGVO article on a general question. + t.Run("subsidiarity dp_05: BDSG §23 below DSGVO Art.6 (Rechtsgrundlage)", func(t *testing.T) { + in := []LegalSearchResult{ + bindingRes("§ 23 BDSG", "BDSG", "DE", 0.70), + bindingRes("Art. 6 DSGVO", "DSGVO", "EU", 0.66), + } + out := rerankByAuthority("Welche Rechtsgrundlagen erlauben eine Verarbeitung personenbezogener Daten?", in) + if out[0].RegulationShort != "DSGVO" { + t.Fatalf("DSGVO Art.6 must beat general BDSG §, got %q", out[0].ArticleLabel) + } + if len(out) != 2 { + t.Fatalf("BDSG must stay visible (soft demote), got len=%d", len(out)) + } + }) + + t.Run("subsidiarity dp_08: BDSG §70 below DSGVO Art.28 (Auftragsverarbeitung)", func(t *testing.T) { + in := []LegalSearchResult{ + bindingRes("§ 70 BDSG", "BDSG", "DE", 0.70), // Teil 3 → scope + subsidiarity + bindingRes("Art. 28 DSGVO", "DSGVO", "EU", 0.66), + } + out := rerankByAuthority("Was muss ein Auftragsverarbeitungsvertrag enthalten?", in) + if out[0].RegulationShort != "DSGVO" { + t.Fatalf("DSGVO Art.28 must beat BDSG §70, got %q", out[0].ArticleLabel) + } + }) + + t.Run("subsidiarity dp_11: BDSG §22 below DSGVO Art.32 on a TOM question", func(t *testing.T) { + in := []LegalSearchResult{ + bindingRes("§ 22 BDSG", "BDSG", "DE", 0.70), + bindingRes("Art. 32 DSGVO", "DSGVO", "EU", 0.66), + } + out := rerankByAuthority("Welche technischen und organisatorischen Massnahmen verlangt das Datenschutzrecht?", in) + if out[0].RegulationShort != "DSGVO" { + t.Fatalf("DSGVO Art.32 must beat BDSG §22 on a non-topic TOM question, got %q", out[0].ArticleLabel) + } + }) + + t.Run("cr_07: a 'DSGVO' mention scopes the domain so BDSG Teil-3 §64 is demoted", func(t *testing.T) { + in := []LegalSearchResult{ + bindingRes("§ 64 BDSG", "BDSG", "DE", 0.70), // Teil 3 (law enforcement) + bindingRes("Art. 32 DSGVO", "DSGVO", "EU", 0.66), + } + // Query has no DP keyword but names the DSGVO → domain fallback scopes it data_protection, + // so scope+subsidiarity demote the law-enforcement § below the primary norm. + out := rerankByAuthority("Welche rechtliche Grundlage gilt fuer technische und organisatorische Massnahmen - DSGVO oder ein Standard?", in) + if out[0].RegulationShort != "DSGVO" { + t.Fatalf("DSGVO must win on a DSGVO-mention question, got %q", out[0].ArticleLabel) + } + }) + + t.Run("co-primary dp_01: BDSG §38 stays top on a DSB question (national special rule)", func(t *testing.T) { + in := []LegalSearchResult{ + bindingRes("§ 38 BDSG", "BDSG", "DE", 0.66), + bindingRes("Art. 37 DSGVO", "DSGVO", "EU", 0.64), + } + out := rerankByAuthority("Ab wann muss ein Datenschutzbeauftragter benannt werden?", in) + // DSB topic → §38 is co-primary (topic-matched, NOT subsidiarity-demoted) and keeps its + // semantic lead; Art. 37 stays a close second. Both remain top-2. + if out[0].RegulationShort != "BDSG" { + t.Fatalf("BDSG §38 (DSB co-primary) must stay top, got %q", out[0].ArticleLabel) + } + if out[1].RegulationShort != "DSGVO" { + t.Fatalf("Art. 37 DSGVO must stay co-primary second, got %q", out[1].ArticleLabel) + } + }) + t.Run("nothing is dropped and topic amplifies", func(t *testing.T) { in := []LegalSearchResult{ guidanceRes("ENISA", "ENISA", 0.72),