feat(ai-sdk): authority-aware re-ranking for legal RAG retrieval (Phase 1)
CI / detect-changes (pull_request) Successful in 16s
CI / branch-name (pull_request) Successful in 2s
CI / guardrail-integrity (pull_request) Successful in 5s
CI / secret-scan (pull_request) Successful in 6s
CI / dep-audit (pull_request) Failing after 1m1s
CI / sbom-scan (pull_request) Failing after 1m4s
CI / build-sha-integrity (pull_request) Successful in 14s
CI / validate-canonical-controls (pull_request) Successful in 13s
CI / test-go (pull_request) Successful in 1m2s
CI / loc-budget (pull_request) Successful in 24s
CI / go-lint (pull_request) Failing after 20s
CI / python-lint (pull_request) Failing after 23s
CI / nodejs-lint (pull_request) Failing after 1m10s
CI / nodejs-build (pull_request) Successful in 3m26s
CI / iace-gt-coverage (pull_request) Successful in 16s
CI / test-python-backend (pull_request) Successful in 27s
CI / test-python-document-crawler (pull_request) Successful in 13s
CI / test-python-dsms-gateway (pull_request) Successful in 9s

Re-orders /sdk/v1/rag/search results so binding law from the matching
jurisdiction and domain ranks above guidance, foreign and off-domain law —
without dropping anything (guidance stays as interpretation context).
Internal-only: response schema is unchanged (json:"-" fields), so every
consumer benefits without a contract change.

- authority.go: classifyAuthority / queryDomain / chunkDomain / scopeClass /
  topic ontology. Tagged payload (authority_weight/source_class/jurisdiction)
  wins; deterministic fallback via category + name markers for the untagged corpus.
- authority_rerank.go: rerankByAuthority. final = semantic + authority +
  jurisdiction + domain + scope + topic; the authority score is written back to
  Score so the multi-collection advisor merge preserves the order.
- legal_rag_client: stratified retrieval — the binding-law pool AUGMENTS the
  semantic pool (mergeDedupHits), then re-rank.
- legal_rag_http: searchBinding (source_class filter) + shared doPointsSearch.
- table-driven tests for authority/domain/scope/topic + rerank acceptance +
  a stratified-binding integration test. go test -race green.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-06-23 10:37:31 +02:00
parent a4d1105b3c
commit 49147d9497
9 changed files with 641 additions and 2 deletions
@@ -0,0 +1,125 @@
package ucca
import "testing"
func TestClassifyAuthority(t *testing.T) {
tests := []struct {
name string
result LegalSearchResult
wantW int
wantSC string
wantJur string
}{
{"tagged binding EU", LegalSearchResult{AuthorityWeight: 100, SourceClass: "binding_law", Jurisdiction: "EU"}, 100, "binding_law", "EU"},
{"tagged guidance DE", LegalSearchResult{AuthorityWeight: 70, SourceClass: "supervisory_guidance", Jurisdiction: "DE"}, 70, "supervisory_guidance", "DE"},
{"tagged foreign CH", LegalSearchResult{AuthorityWeight: 0, SourceClass: "foreign_law", Jurisdiction: "CH"}, 0, "foreign_law", "CH"},
{"untagged ENISA guidance", LegalSearchResult{RegulationShort: "ENISA", ArticleLabel: "ENISA CRA Standards Mapping"}, 70, "supervisory_guidance", "EU"},
{"untagged CRA binding", LegalSearchResult{RegulationShort: "CRA", ArticleLabel: "Art. 13 CRA", Category: "regulation"}, 100, "binding_law", "EU"},
{"untagged BDSG binding DE", LegalSearchResult{RegulationShort: "BDSG", ArticleLabel: "§ 38 BDSG"}, 100, "binding_law", "DE"},
{"untagged RevDSG foreign", LegalSearchResult{RegulationShort: "RevDSG", ArticleLabel: "RevDSG (CH)"}, 0, "foreign_law", "CH"},
{"untagged unknown", LegalSearchResult{RegulationShort: "", ArticleLabel: ""}, 50, "unknown", "EU"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := classifyAuthority(tt.result)
if got.weight != tt.wantW || got.sourceClass != tt.wantSC || got.jurisdiction != tt.wantJur {
t.Errorf("classifyAuthority() = {%d %s %s}, want {%d %s %s}",
got.weight, got.sourceClass, got.jurisdiction, tt.wantW, tt.wantSC, tt.wantJur)
}
})
}
}
func TestQueryDomain(t *testing.T) {
tests := []struct{ q, want string }{
{"Welche Anforderungen an Security Updates?", "cyber"},
{"Wer braucht einen Datenschutzbeauftragten?", "data_protection"},
{"Was sind besondere Kategorien personenbezogener Daten?", "data_protection"},
{"Welche Pflichten beim Hochrisiko-KI-System?", "ai"},
{"Wie spaet ist es?", ""},
}
for _, tt := range tests {
if got := queryDomain(tt.q); got != tt.want {
t.Errorf("queryDomain(%q) = %q, want %q", tt.q, got, tt.want)
}
}
}
func TestChunkDomain(t *testing.T) {
tests := []struct {
name string
r LegalSearchResult
want string
}{
{"CRA cyber", LegalSearchResult{RegulationShort: "CRA", ArticleLabel: "Art. 13 CRA"}, "cyber"},
{"DSGVO dp", LegalSearchResult{RegulationShort: "DSGVO", ArticleLabel: "Art. 9 DSGVO"}, "data_protection"},
{"AI Act ai", LegalSearchResult{RegulationShort: "AI Act", ArticleLabel: "Art. 10 AI Act"}, "ai"},
{"MDR product", LegalSearchResult{RegulationShort: "MDR", ArticleLabel: "Art. 13 EU MDR"}, "product_safety"},
{"unknown", LegalSearchResult{RegulationShort: "XYZ"}, ""},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := chunkDomain(tt.r); got != tt.want {
t.Errorf("chunkDomain() = %q, want %q", got, tt.want)
}
})
}
}
func TestScopeClass(t *testing.T) {
tests := []struct {
name string
r LegalSearchResult
want string
}{
{"BDSG Teil 3 law enforcement", LegalSearchResult{RegulationShort: "BDSG", ArticleLabel: "§ 48 BDSG"}, "law_enforcement"},
{"BDSG general part", LegalSearchResult{RegulationShort: "BDSG", ArticleLabel: "§ 38 BDSG"}, "general"},
{"DSGVO general", LegalSearchResult{RegulationShort: "DSGVO", ArticleLabel: "Art. 9 DSGVO"}, "general"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := scopeClass(tt.r); got != tt.want {
t.Errorf("scopeClass() = %q, want %q", got, tt.want)
}
})
}
}
func TestResultMatchesTopic(t *testing.T) {
tests := []struct {
name string
query string
r LegalSearchResult
want bool
}{
{"besondere Kategorien -> Art 9 match", "Was sind besondere Kategorien?", LegalSearchResult{ArticleLabel: "Art. 9 DSGVO"}, true},
{"besondere Kategorien -> Art 90 no match", "Was sind besondere Kategorien?", LegalSearchResult{ArticleLabel: "Art. 90 DSGVO"}, false},
{"security updates -> CRA Anhang I", "Anforderungen an Security Updates?", LegalSearchResult{ArticleLabel: "CRA Anhang I"}, true},
{"no topic keyword", "Wie spaet ist es?", LegalSearchResult{ArticleLabel: "Art. 9 DSGVO"}, false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := resultMatchesTopic(tt.query, tt.r); got != tt.want {
t.Errorf("resultMatchesTopic() = %v, want %v", got, tt.want)
}
})
}
}
func TestNormMatches(t *testing.T) {
tests := []struct {
hay, norm string
want bool
}{
{"Art. 9 DSGVO", "Art. 9", true},
{"Art. 90 DSGVO", "Art. 9", false},
{"§ 38 BDSG", "§ 38 BDSG", true},
{"§ 380 BDSG", "§ 38", false},
{"Art. 14 CRA", "Art. 14 CRA", true},
}
for _, tt := range tests {
if got := normMatches(tt.hay, tt.norm); got != tt.want {
t.Errorf("normMatches(%q,%q) = %v, want %v", tt.hay, tt.norm, got, tt.want)
}
}
}