0903e3a8d1
CI / detect-changes (push) Successful in 5s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Successful in 5s
CI / validate-canonical-controls (push) Successful in 4s
CI / loc-budget (push) Successful in 18s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 3m0s
CI / test-go (push) Successful in 59s
CI / iace-gt-coverage (push) Successful in 17s
CI / test-python-backend (push) Has been skipped
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
Authority Router re-embedded the query per collection (6x); on dev the embed endpoint (OVH) is remote so that was 6 round-trips = 7-12s per /retrieve. Embed once, reuse via ctx across the concurrent per-collection searches. DetectIntent + ConceptNorms now fold ae/oe/ue/ss so ASCII (Pruefe) and umlaut (Pruefe) inputs both match.
30 lines
1022 B
Go
30 lines
1022 B
Go
package ucca
|
|
|
|
import "testing"
|
|
|
|
func TestDetectIntentUmlautFold(t *testing.T) {
|
|
cases := map[string]string{
|
|
"Pruefe meine Datenschutzerklaerung.": "review", // ASCII digraph
|
|
"Prüfe meine Datenschutzerklärung.": "review", // umlaut
|
|
"Ueberpruefe das Impressum": "review", // ASCII "überprüfe"
|
|
"Was ist eine TOM?": "definition", // unchanged
|
|
}
|
|
for q, want := range cases {
|
|
if got := DetectIntent(q); got != want {
|
|
t.Errorf("DetectIntent(%q)=%q want %q", q, got, want)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestConceptNormsUmlautFold(t *testing.T) {
|
|
// ASCII "datenschutzerklaerung" must resolve to the same core norms as the umlaut form.
|
|
ascii := ConceptNorms("Was gehoert in eine Datenschutzerklaerung?")
|
|
umlaut := ConceptNorms("Was gehört in eine Datenschutzerklärung?")
|
|
if len(ascii) == 0 {
|
|
t.Errorf("ConceptNorms(ASCII datenschutzerklaerung) returned none")
|
|
}
|
|
if len(ascii) != len(umlaut) {
|
|
t.Errorf("ASCII vs umlaut concept norms differ: %v vs %v", ascii, umlaut)
|
|
}
|
|
}
|