From c28c5329587e10f1031a328c56f8a654b32d9ad1 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Wed, 24 Jun 2026 00:22:37 +0200 Subject: [PATCH] feat(ai-sdk): demote superseded pre-eu-v1 sources in authority rerank The old pre-eu-v1 corpus chunks (un-annotated CRA/AI Act/DORA/NIS2/DSGVO duplicates + the old Machinery Directive and its guide) are tagged status=superseded / use_for_primary=false in the vector store. Honor that in the rerank: a superseded result takes a fixed penalty so the eu-v1 norm wins default questions, while the old source stays in the pool (demoted, not hidden) and remains findable for history / transition questions. Verified on dev: "CRA Sicherheitsupdates" now returns CRA Anhang I (eu-v1) at #1 instead of an un-annotated old chunk; MaschinenVO outranks the old Machinery Directive/guide; superseded chunks remain retrievable lower down. Co-Authored-By: Claude Opus 4.7 --- .../internal/ucca/authority_rerank.go | 23 +++++++++----- .../internal/ucca/legal_rag_client.go | 1 + .../internal/ucca/legal_rag_supersede_test.go | 30 +++++++++++++++++++ .../internal/ucca/legal_rag_types.go | 4 +++ 4 files changed, 50 insertions(+), 8 deletions(-) create mode 100644 ai-compliance-sdk/internal/ucca/legal_rag_supersede_test.go diff --git a/ai-compliance-sdk/internal/ucca/authority_rerank.go b/ai-compliance-sdk/internal/ucca/authority_rerank.go index 6c6232eb..b4eaff03 100644 --- a/ai-compliance-sdk/internal/ucca/authority_rerank.go +++ b/ai-compliance-sdk/internal/ucca/authority_rerank.go @@ -4,14 +4,15 @@ import "sort" // Re-ranking coefficients (validated in the offline golden harness; Phase A — conservative). const ( - authorityCoef = 0.40 // * weight/100 - jurisdictionGain = 0.05 // binding/guidance from DE or EU - foreignPenalty = 0.60 // foreign law on a DE/EU question (demoted, not removed) - unknownPenalty = 0.08 - domainMatchGain = 0.15 - offDomainPenalty = 0.10 // off-domain binding (demoted, not removed) - scopePenalty = 0.25 // BDSG Teil 3 (law enforcement) on a general DP question - topicGain = 0.18 // amplifier only + authorityCoef = 0.40 // * weight/100 + jurisdictionGain = 0.05 // binding/guidance from DE or EU + foreignPenalty = 0.60 // foreign law on a DE/EU question (demoted, not removed) + unknownPenalty = 0.08 + domainMatchGain = 0.15 + offDomainPenalty = 0.10 // off-domain binding (demoted, not removed) + scopePenalty = 0.25 // BDSG Teil 3 (law enforcement) on a general DP question + topicGain = 0.18 // amplifier only + supersededPenalty = 0.50 // superseded Alt-Quelle (pre-eu-v1): demoted, nicht versteckt ) // authorityScore computes the normative relevance of a result for a query. It augments the @@ -20,6 +21,12 @@ func authorityScore(query string, r LegalSearchResult, qDomain string, qForeign info := classifyAuthority(r) score := r.Score + authorityCoef*float64(info.weight)/100.0 + if r.Superseded { + // Alt-Quelle (pre-eu-v1): Default-Fragen sollen die eu-v1-Norm sehen. Demoted, + // nicht entfernt — fuer Historie/Uebergangsfragen bleibt sie auffindbar. + score -= supersededPenalty + } + if info.jurisdiction == "CH" && !qForeign { score -= foreignPenalty // Fremdrecht bei DE/EU-Frage: demoted, nicht geloescht } else { diff --git a/ai-compliance-sdk/internal/ucca/legal_rag_client.go b/ai-compliance-sdk/internal/ucca/legal_rag_client.go index ee52f052..2942265c 100644 --- a/ai-compliance-sdk/internal/ucca/legal_rag_client.go +++ b/ai-compliance-sdk/internal/ucca/legal_rag_client.go @@ -148,6 +148,7 @@ func (c *LegalRAGClient) searchInternal(ctx context.Context, collection string, CitationUnit: getString(hit.Payload, "citation_unit"), ReferencesOut: getStringSlice(hit.Payload, "references_out"), ReferencesIn: getStringSlice(hit.Payload, "references_in"), + Superseded: getString(hit.Payload, "status") == "superseded", } } diff --git a/ai-compliance-sdk/internal/ucca/legal_rag_supersede_test.go b/ai-compliance-sdk/internal/ucca/legal_rag_supersede_test.go new file mode 100644 index 00000000..c8d3e1e4 --- /dev/null +++ b/ai-compliance-sdk/internal/ucca/legal_rag_supersede_test.go @@ -0,0 +1,30 @@ +package ucca + +import "testing" + +// A superseded alt-source must rank below the same result when it is NOT +// superseded (the eu-v1 norm), but only demoted — the penalty is finite, so it +// stays in the pool and remains findable for history/transition questions. +func TestAuthorityScore_SupersededIsDemotedNotRemoved(t *testing.T) { + fresh := LegalSearchResult{ + Score: 0.65, SourceClass: "binding_law", AuthorityWeight: 100, + Jurisdiction: "EU", RegulationShort: "CRA", Article: "13", + } + old := fresh + old.Superseded = true + + sFresh := authorityScore("CRA Sicherheitsupdates Hersteller", fresh, "", false) + sOld := authorityScore("CRA Sicherheitsupdates Hersteller", old, "", false) + + if sOld >= sFresh { + t.Errorf("superseded must score lower: fresh=%.3f superseded=%.3f", sFresh, sOld) + } + gap := sFresh - sOld + if gap < supersededPenalty-0.001 || gap > supersededPenalty+0.001 { + t.Errorf("demotion should equal supersededPenalty (%.2f), got %.3f", supersededPenalty, gap) + } + // Still a positive, finite score → present in the pool, not hidden. + if sOld <= -1 { + t.Errorf("superseded score collapsed (%.3f) — must remain findable", sOld) + } +} diff --git a/ai-compliance-sdk/internal/ucca/legal_rag_types.go b/ai-compliance-sdk/internal/ucca/legal_rag_types.go index 2d4ee59f..a47327d2 100644 --- a/ai-compliance-sdk/internal/ucca/legal_rag_types.go +++ b/ai-compliance-sdk/internal/ucca/legal_rag_types.go @@ -33,6 +33,10 @@ type LegalSearchResult struct { CitationUnit string `json:"-"` ReferencesOut []string `json:"-"` ReferencesIn []string `json:"-"` + + // Supersede-Status (status="superseded", use_for_primary=false) — Alt-Quelle, + // die fuer Default-Fragen demoted wird (nicht versteckt; fuer Historie auffindbar). + Superseded bool `json:"-"` } // LegalAssessment is the auditable explanation layer over a ranked result set: