feat(ai-sdk): authority-aware re-ranking for legal RAG (Phase 1) (#31)
CI / detect-changes (push) Successful in 8s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Successful in 5s
CI / validate-canonical-controls (push) Successful in 4s
CI / loc-budget (push) Successful in 28s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Successful in 58s
CI / iace-gt-coverage (push) Successful in 16s
CI / test-python-backend (push) Has been skipped
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
CI / detect-changes (push) Successful in 8s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Successful in 5s
CI / validate-canonical-controls (push) Successful in 4s
CI / loc-budget (push) Successful in 28s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Successful in 58s
CI / iace-gt-coverage (push) Successful in 16s
CI / test-python-backend (push) Has been skipped
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
This commit was merged in pull request #31.
This commit is contained in:
@@ -0,0 +1,125 @@
|
||||
package ucca
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestClassifyAuthority(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
result LegalSearchResult
|
||||
wantW int
|
||||
wantSC string
|
||||
wantJur string
|
||||
}{
|
||||
{"tagged binding EU", LegalSearchResult{AuthorityWeight: 100, SourceClass: "binding_law", Jurisdiction: "EU"}, 100, "binding_law", "EU"},
|
||||
{"tagged guidance DE", LegalSearchResult{AuthorityWeight: 70, SourceClass: "supervisory_guidance", Jurisdiction: "DE"}, 70, "supervisory_guidance", "DE"},
|
||||
{"tagged foreign CH", LegalSearchResult{AuthorityWeight: 0, SourceClass: "foreign_law", Jurisdiction: "CH"}, 0, "foreign_law", "CH"},
|
||||
{"untagged ENISA guidance", LegalSearchResult{RegulationShort: "ENISA", ArticleLabel: "ENISA CRA Standards Mapping"}, 70, "supervisory_guidance", "EU"},
|
||||
{"untagged CRA binding", LegalSearchResult{RegulationShort: "CRA", ArticleLabel: "Art. 13 CRA", Category: "regulation"}, 100, "binding_law", "EU"},
|
||||
{"untagged BDSG binding DE", LegalSearchResult{RegulationShort: "BDSG", ArticleLabel: "§ 38 BDSG"}, 100, "binding_law", "DE"},
|
||||
{"untagged RevDSG foreign", LegalSearchResult{RegulationShort: "RevDSG", ArticleLabel: "RevDSG (CH)"}, 0, "foreign_law", "CH"},
|
||||
{"untagged unknown", LegalSearchResult{RegulationShort: "", ArticleLabel: ""}, 50, "unknown", "EU"},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := classifyAuthority(tt.result)
|
||||
if got.weight != tt.wantW || got.sourceClass != tt.wantSC || got.jurisdiction != tt.wantJur {
|
||||
t.Errorf("classifyAuthority() = {%d %s %s}, want {%d %s %s}",
|
||||
got.weight, got.sourceClass, got.jurisdiction, tt.wantW, tt.wantSC, tt.wantJur)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestQueryDomain(t *testing.T) {
|
||||
tests := []struct{ q, want string }{
|
||||
{"Welche Anforderungen an Security Updates?", "cyber"},
|
||||
{"Wer braucht einen Datenschutzbeauftragten?", "data_protection"},
|
||||
{"Was sind besondere Kategorien personenbezogener Daten?", "data_protection"},
|
||||
{"Welche Pflichten beim Hochrisiko-KI-System?", "ai"},
|
||||
{"Wie spaet ist es?", ""},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
if got := queryDomain(tt.q); got != tt.want {
|
||||
t.Errorf("queryDomain(%q) = %q, want %q", tt.q, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestChunkDomain(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
r LegalSearchResult
|
||||
want string
|
||||
}{
|
||||
{"CRA cyber", LegalSearchResult{RegulationShort: "CRA", ArticleLabel: "Art. 13 CRA"}, "cyber"},
|
||||
{"DSGVO dp", LegalSearchResult{RegulationShort: "DSGVO", ArticleLabel: "Art. 9 DSGVO"}, "data_protection"},
|
||||
{"AI Act ai", LegalSearchResult{RegulationShort: "AI Act", ArticleLabel: "Art. 10 AI Act"}, "ai"},
|
||||
{"MDR product", LegalSearchResult{RegulationShort: "MDR", ArticleLabel: "Art. 13 EU MDR"}, "product_safety"},
|
||||
{"unknown", LegalSearchResult{RegulationShort: "XYZ"}, ""},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := chunkDomain(tt.r); got != tt.want {
|
||||
t.Errorf("chunkDomain() = %q, want %q", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestScopeClass(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
r LegalSearchResult
|
||||
want string
|
||||
}{
|
||||
{"BDSG Teil 3 law enforcement", LegalSearchResult{RegulationShort: "BDSG", ArticleLabel: "§ 48 BDSG"}, "law_enforcement"},
|
||||
{"BDSG general part", LegalSearchResult{RegulationShort: "BDSG", ArticleLabel: "§ 38 BDSG"}, "general"},
|
||||
{"DSGVO general", LegalSearchResult{RegulationShort: "DSGVO", ArticleLabel: "Art. 9 DSGVO"}, "general"},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := scopeClass(tt.r); got != tt.want {
|
||||
t.Errorf("scopeClass() = %q, want %q", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestResultMatchesTopic(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
query string
|
||||
r LegalSearchResult
|
||||
want bool
|
||||
}{
|
||||
{"besondere Kategorien -> Art 9 match", "Was sind besondere Kategorien?", LegalSearchResult{ArticleLabel: "Art. 9 DSGVO"}, true},
|
||||
{"besondere Kategorien -> Art 90 no match", "Was sind besondere Kategorien?", LegalSearchResult{ArticleLabel: "Art. 90 DSGVO"}, false},
|
||||
{"security updates -> CRA Anhang I", "Anforderungen an Security Updates?", LegalSearchResult{ArticleLabel: "CRA Anhang I"}, true},
|
||||
{"no topic keyword", "Wie spaet ist es?", LegalSearchResult{ArticleLabel: "Art. 9 DSGVO"}, false},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := resultMatchesTopic(tt.query, tt.r); got != tt.want {
|
||||
t.Errorf("resultMatchesTopic() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormMatches(t *testing.T) {
|
||||
tests := []struct {
|
||||
hay, norm string
|
||||
want bool
|
||||
}{
|
||||
{"Art. 9 DSGVO", "Art. 9", true},
|
||||
{"Art. 90 DSGVO", "Art. 9", false},
|
||||
{"§ 38 BDSG", "§ 38 BDSG", true},
|
||||
{"§ 380 BDSG", "§ 38", false},
|
||||
{"Art. 14 CRA", "Art. 14 CRA", true},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
if got := normMatches(tt.hay, tt.norm); got != tt.want {
|
||||
t.Errorf("normMatches(%q,%q) = %v, want %v", tt.hay, tt.norm, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user