90e0a57799
CI / detect-changes (pull_request) Successful in 5s
CI / branch-name (pull_request) Successful in 1s
CI / guardrail-integrity (pull_request) Successful in 4s
CI / secret-scan (pull_request) Successful in 6s
CI / dep-audit (pull_request) Failing after 55s
CI / sbom-scan (pull_request) Failing after 58s
CI / build-sha-integrity (pull_request) Successful in 6s
CI / validate-canonical-controls (pull_request) Successful in 4s
CI / loc-budget (pull_request) Successful in 18s
CI / go-lint (pull_request) Successful in 41s
CI / python-lint (pull_request) Failing after 13s
CI / nodejs-lint (pull_request) Failing after 1m4s
CI / nodejs-build (pull_request) Successful in 3m0s
CI / test-go (pull_request) Successful in 58s
CI / iace-gt-coverage (pull_request) Successful in 14s
CI / test-python-backend (pull_request) Successful in 25s
CI / test-python-document-crawler (pull_request) Successful in 13s
CI / test-python-dsms-gateway (pull_request) Successful in 10s
The existing NIST corpus (SP 800-82r3 etc., ingested before source_class tagging) was classified supervisory_guidance because "NIST" sat in guidanceMarkers, so the control-intent lift (#36) could never surface it. Add a technical_standard class: - authority.go: new standardMarkers (NIST/OWASP/Grundschutz/ISO 27001/CSA CCM/CIS), checked before guidanceMarkers (so "BSI Grundschutz" -> standard, not BSI guidance); move NIST out of guidanceMarkers; sourceClassFromWeight maps weight 80 -> standard. - authority_rerank.go: the intent-lift path (liftAboveBinding + bestBindingSemantic) now classifies via classifyAuthority instead of trusting the raw payload source_class, so the untagged legacy corpus is recognized — untagged NIST is now lifted on a control question ("Welche Controls passen zu Security Updates?"). Tested: classifier cases for NIST/Grundschutz/weight-80, and an untagged-NIST lift case. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
130 lines
5.4 KiB
Go
130 lines
5.4 KiB
Go
package ucca
|
|
|
|
import "testing"
|
|
|
|
func TestClassifyAuthority(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
result LegalSearchResult
|
|
wantW int
|
|
wantSC string
|
|
wantJur string
|
|
}{
|
|
{"tagged binding EU", LegalSearchResult{AuthorityWeight: 100, SourceClass: "binding_law", Jurisdiction: "EU"}, 100, "binding_law", "EU"},
|
|
{"tagged guidance DE", LegalSearchResult{AuthorityWeight: 70, SourceClass: "supervisory_guidance", Jurisdiction: "DE"}, 70, "supervisory_guidance", "DE"},
|
|
{"tagged foreign CH", LegalSearchResult{AuthorityWeight: 0, SourceClass: "foreign_law", Jurisdiction: "CH"}, 0, "foreign_law", "CH"},
|
|
{"untagged ENISA guidance", LegalSearchResult{RegulationShort: "ENISA", ArticleLabel: "ENISA CRA Standards Mapping"}, 70, "supervisory_guidance", "EU"},
|
|
{"untagged NIST standard", LegalSearchResult{RegulationShort: "NIST SP 800-82r3", ArticleLabel: "AU-8"}, 80, "technical_standard", "EU"},
|
|
{"BSI Grundschutz standard beats BSI guidance", LegalSearchResult{RegulationShort: "BSI Grundschutz", ArticleLabel: "BSI Grundschutz Baustein"}, 80, "technical_standard", "DE"},
|
|
{"weight-only 85 TRGS standard", LegalSearchResult{AuthorityWeight: 85, RegulationShort: "TRGS 529"}, 85, "technical_standard", "EU"},
|
|
{"tagged technical_standard", LegalSearchResult{AuthorityWeight: 80, SourceClass: "technical_standard", Jurisdiction: "EU"}, 80, "technical_standard", "EU"},
|
|
{"untagged CRA binding", LegalSearchResult{RegulationShort: "CRA", ArticleLabel: "Art. 13 CRA", Category: "regulation"}, 100, "binding_law", "EU"},
|
|
{"untagged BDSG binding DE", LegalSearchResult{RegulationShort: "BDSG", ArticleLabel: "§ 38 BDSG"}, 100, "binding_law", "DE"},
|
|
{"untagged RevDSG foreign", LegalSearchResult{RegulationShort: "RevDSG", ArticleLabel: "RevDSG (CH)"}, 0, "foreign_law", "CH"},
|
|
{"untagged unknown", LegalSearchResult{RegulationShort: "", ArticleLabel: ""}, 50, "unknown", "EU"},
|
|
}
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
got := classifyAuthority(tt.result)
|
|
if got.weight != tt.wantW || got.sourceClass != tt.wantSC || got.jurisdiction != tt.wantJur {
|
|
t.Errorf("classifyAuthority() = {%d %s %s}, want {%d %s %s}",
|
|
got.weight, got.sourceClass, got.jurisdiction, tt.wantW, tt.wantSC, tt.wantJur)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestQueryDomain(t *testing.T) {
|
|
tests := []struct{ q, want string }{
|
|
{"Welche Anforderungen an Security Updates?", "cyber"},
|
|
{"Wer braucht einen Datenschutzbeauftragten?", "data_protection"},
|
|
{"Was sind besondere Kategorien personenbezogener Daten?", "data_protection"},
|
|
{"Welche Pflichten beim Hochrisiko-KI-System?", "ai"},
|
|
{"Wie spaet ist es?", ""},
|
|
}
|
|
for _, tt := range tests {
|
|
if got := queryDomain(tt.q); got != tt.want {
|
|
t.Errorf("queryDomain(%q) = %q, want %q", tt.q, got, tt.want)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestChunkDomain(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
r LegalSearchResult
|
|
want string
|
|
}{
|
|
{"CRA cyber", LegalSearchResult{RegulationShort: "CRA", ArticleLabel: "Art. 13 CRA"}, "cyber"},
|
|
{"DSGVO dp", LegalSearchResult{RegulationShort: "DSGVO", ArticleLabel: "Art. 9 DSGVO"}, "data_protection"},
|
|
{"AI Act ai", LegalSearchResult{RegulationShort: "AI Act", ArticleLabel: "Art. 10 AI Act"}, "ai"},
|
|
{"MDR product", LegalSearchResult{RegulationShort: "MDR", ArticleLabel: "Art. 13 EU MDR"}, "product_safety"},
|
|
{"unknown", LegalSearchResult{RegulationShort: "XYZ"}, ""},
|
|
}
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
if got := chunkDomain(tt.r); got != tt.want {
|
|
t.Errorf("chunkDomain() = %q, want %q", got, tt.want)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestScopeClass(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
r LegalSearchResult
|
|
want string
|
|
}{
|
|
{"BDSG Teil 3 law enforcement", LegalSearchResult{RegulationShort: "BDSG", ArticleLabel: "§ 48 BDSG"}, "law_enforcement"},
|
|
{"BDSG general part", LegalSearchResult{RegulationShort: "BDSG", ArticleLabel: "§ 38 BDSG"}, "general"},
|
|
{"DSGVO general", LegalSearchResult{RegulationShort: "DSGVO", ArticleLabel: "Art. 9 DSGVO"}, "general"},
|
|
}
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
if got := scopeClass(tt.r); got != tt.want {
|
|
t.Errorf("scopeClass() = %q, want %q", got, tt.want)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestResultMatchesTopic(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
query string
|
|
r LegalSearchResult
|
|
want bool
|
|
}{
|
|
{"besondere Kategorien -> Art 9 match", "Was sind besondere Kategorien?", LegalSearchResult{ArticleLabel: "Art. 9 DSGVO"}, true},
|
|
{"besondere Kategorien -> Art 90 no match", "Was sind besondere Kategorien?", LegalSearchResult{ArticleLabel: "Art. 90 DSGVO"}, false},
|
|
{"security updates -> CRA Anhang I", "Anforderungen an Security Updates?", LegalSearchResult{ArticleLabel: "CRA Anhang I"}, true},
|
|
{"no topic keyword", "Wie spaet ist es?", LegalSearchResult{ArticleLabel: "Art. 9 DSGVO"}, false},
|
|
}
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
if got := resultMatchesTopic(tt.query, tt.r); got != tt.want {
|
|
t.Errorf("resultMatchesTopic() = %v, want %v", got, tt.want)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestNormMatches(t *testing.T) {
|
|
tests := []struct {
|
|
hay, norm string
|
|
want bool
|
|
}{
|
|
{"Art. 9 DSGVO", "Art. 9", true},
|
|
{"Art. 90 DSGVO", "Art. 9", false},
|
|
{"§ 38 BDSG", "§ 38 BDSG", true},
|
|
{"§ 380 BDSG", "§ 38", false},
|
|
{"Art. 14 CRA", "Art. 14 CRA", true},
|
|
}
|
|
for _, tt := range tests {
|
|
if got := normMatches(tt.hay, tt.norm); got != tt.want {
|
|
t.Errorf("normMatches(%q,%q) = %v, want %v", tt.hay, tt.norm, got, tt.want)
|
|
}
|
|
}
|
|
}
|