fix(ai-sdk): national-law subsidiarity in authority rerank (DSGVO > BDSG) #40

Merged
Benjamin_Boenisch merged 1 commits from feat/authority-rerank-subsidiarity into main 2026-06-27 04:12:08 +00:00
3 changed files with 98 additions and 1 deletions
Showing only changes of commit 623d80b6c8 - Show all commits
+21 -1
View File
@@ -112,7 +112,7 @@ var domains = []domainDef{
{"data_protection",
[]string{"DSGVO", "GDPR", "BDSG", "EDPB", "DSK", "BfDI", "BayLfD", "DPF"},
[]string{"personenbezogen", "betroffene", "datenschutz", "datenschutzbeauftrag", "dsb",
"datenpanne", "auskunft", "loesch", "lösch", "einwilligung", "besondere kategorien", "auftragsverarbeiter"}},
"datenpanne", "auskunft", "loesch", "lösch", "einwilligung", "besondere kategorien", "auftragsverarbeit"}},
{"cyber",
[]string{"CRA", "NIS2", "NIS-2", "ENISA", "DORA", "EUCC"},
[]string{"security update", "sicherheitsupdate", "sicherheitsaktualisierung", "schwachstelle", "sbom",
@@ -126,6 +126,16 @@ var domains = []domainDef{
nil},
}
// euPrimaryDomains are domains whose PRIMARY binding act is an EU regulation/directive
// (DSGVO, CRA/NIS2, AI Act, MaschinenVO). In these domains a NATIONAL implementing law
// (e.g. BDSG) is subsidiary for general questions — see nationalSubsidiarityPenalty.
var euPrimaryDomains = map[string]bool{
"data_protection": true,
"cyber": true,
"ai": true,
"product_safety": true,
}
func queryDomain(query string) string {
ql := strings.ToLower(query)
for _, d := range domains {
@@ -135,6 +145,16 @@ func queryDomain(query string) string {
}
}
}
// Fallback: an explicit regulation mention (e.g. "DSGVO", "BDSG", "CRA") also signals the
// domain — so a question phrased around the act ("... gilt die DSGVO ...") is scoped even
// without a topical keyword. Keyword match wins first (more specific).
for _, d := range domains {
for _, reg := range d.regs {
if strings.Contains(ql, strings.ToLower(reg)) {
return d.name
}
}
}
return ""
}
@@ -14,6 +14,7 @@ const (
domainMatchGain = 0.15
offDomainPenalty = 0.10 // off-domain binding (demoted, not removed)
scopePenalty = 0.25 // BDSG Teil 3 (law enforcement) on a general DP question
subsidiarityPen = 0.18 // national implementing law (BDSG) on a general EU-primary question: SOFT demote, not exclusion
topicGain = 0.18 // amplifier only
supersededPenalty = 0.50 // superseded Alt-Quelle (pre-eu-v1): demoted, nicht versteckt
intentLiftGain = 0.10 // epsilon a qualifying interpretative source is lifted ABOVE the best binding
@@ -102,6 +103,15 @@ func authorityScore(query string, r LegalSearchResult, qDomain string, qForeign
if qDomain == "data_protection" && scopeClass(r) == "law_enforcement" {
score -= scopePenalty
}
// Subsidiarity: a national implementing law (DE binding, e.g. BDSG) is subsidiary to the
// primary EU act for GENERAL questions in an EU-primary domain — UNLESS the query hits a
// topic where the national norm is co-primary (DSB §38, special categories §22, ...). The
// topic boost below lifts those; here we only SOFT-demote the non-topic national norm, so
// it stays visible and can still win on a strongly matching topic. No hard exclusion.
if euPrimaryDomains[qDomain] && info.sourceClass == "binding_law" &&
info.jurisdiction == "DE" && !resultMatchesTopic(query, r) {
score -= subsidiarityPen
}
if resultMatchesTopic(query, r) {
score += topicGain // Verstaerker, kein Override
}
@@ -72,6 +72,73 @@ func TestRerankByAuthority_Acceptance(t *testing.T) {
}
})
// Subsidiarity (KB-2026.1 BDSG-pilot regression): a national implementing § that is NOT a
// co-primary topic norm must not outrank the primary DSGVO article on a general question.
t.Run("subsidiarity dp_05: BDSG §23 below DSGVO Art.6 (Rechtsgrundlage)", func(t *testing.T) {
in := []LegalSearchResult{
bindingRes("§ 23 BDSG", "BDSG", "DE", 0.70),
bindingRes("Art. 6 DSGVO", "DSGVO", "EU", 0.66),
}
out := rerankByAuthority("Welche Rechtsgrundlagen erlauben eine Verarbeitung personenbezogener Daten?", in)
if out[0].RegulationShort != "DSGVO" {
t.Fatalf("DSGVO Art.6 must beat general BDSG §, got %q", out[0].ArticleLabel)
}
if len(out) != 2 {
t.Fatalf("BDSG must stay visible (soft demote), got len=%d", len(out))
}
})
t.Run("subsidiarity dp_08: BDSG §70 below DSGVO Art.28 (Auftragsverarbeitung)", func(t *testing.T) {
in := []LegalSearchResult{
bindingRes("§ 70 BDSG", "BDSG", "DE", 0.70), // Teil 3 → scope + subsidiarity
bindingRes("Art. 28 DSGVO", "DSGVO", "EU", 0.66),
}
out := rerankByAuthority("Was muss ein Auftragsverarbeitungsvertrag enthalten?", in)
if out[0].RegulationShort != "DSGVO" {
t.Fatalf("DSGVO Art.28 must beat BDSG §70, got %q", out[0].ArticleLabel)
}
})
t.Run("subsidiarity dp_11: BDSG §22 below DSGVO Art.32 on a TOM question", func(t *testing.T) {
in := []LegalSearchResult{
bindingRes("§ 22 BDSG", "BDSG", "DE", 0.70),
bindingRes("Art. 32 DSGVO", "DSGVO", "EU", 0.66),
}
out := rerankByAuthority("Welche technischen und organisatorischen Massnahmen verlangt das Datenschutzrecht?", in)
if out[0].RegulationShort != "DSGVO" {
t.Fatalf("DSGVO Art.32 must beat BDSG §22 on a non-topic TOM question, got %q", out[0].ArticleLabel)
}
})
t.Run("cr_07: a 'DSGVO' mention scopes the domain so BDSG Teil-3 §64 is demoted", func(t *testing.T) {
in := []LegalSearchResult{
bindingRes("§ 64 BDSG", "BDSG", "DE", 0.70), // Teil 3 (law enforcement)
bindingRes("Art. 32 DSGVO", "DSGVO", "EU", 0.66),
}
// Query has no DP keyword but names the DSGVO → domain fallback scopes it data_protection,
// so scope+subsidiarity demote the law-enforcement § below the primary norm.
out := rerankByAuthority("Welche rechtliche Grundlage gilt fuer technische und organisatorische Massnahmen - DSGVO oder ein Standard?", in)
if out[0].RegulationShort != "DSGVO" {
t.Fatalf("DSGVO must win on a DSGVO-mention question, got %q", out[0].ArticleLabel)
}
})
t.Run("co-primary dp_01: BDSG §38 stays top on a DSB question (national special rule)", func(t *testing.T) {
in := []LegalSearchResult{
bindingRes("§ 38 BDSG", "BDSG", "DE", 0.66),
bindingRes("Art. 37 DSGVO", "DSGVO", "EU", 0.64),
}
out := rerankByAuthority("Ab wann muss ein Datenschutzbeauftragter benannt werden?", in)
// DSB topic → §38 is co-primary (topic-matched, NOT subsidiarity-demoted) and keeps its
// semantic lead; Art. 37 stays a close second. Both remain top-2.
if out[0].RegulationShort != "BDSG" {
t.Fatalf("BDSG §38 (DSB co-primary) must stay top, got %q", out[0].ArticleLabel)
}
if out[1].RegulationShort != "DSGVO" {
t.Fatalf("Art. 37 DSGVO must stay co-primary second, got %q", out[1].ArticleLabel)
}
})
t.Run("nothing is dropped and topic amplifies", func(t *testing.T) {
in := []LegalSearchResult{
guidanceRes("ENISA", "ENISA", 0.72),