perf(ai-sdk): embed query once across router fan-out + fold umlauts in intent/concept matching
CI / detect-changes (push) Successful in 5s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Successful in 5s
CI / validate-canonical-controls (push) Successful in 4s
CI / loc-budget (push) Successful in 18s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 3m0s
CI / test-go (push) Successful in 59s
CI / iace-gt-coverage (push) Successful in 17s
CI / test-python-backend (push) Has been skipped
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
CI / detect-changes (push) Successful in 5s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Successful in 5s
CI / validate-canonical-controls (push) Successful in 4s
CI / loc-budget (push) Successful in 18s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 3m0s
CI / test-go (push) Successful in 59s
CI / iace-gt-coverage (push) Successful in 17s
CI / test-python-backend (push) Has been skipped
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
Authority Router re-embedded the query per collection (6x); on dev the embed endpoint (OVH) is remote so that was 6 round-trips = 7-12s per /retrieve. Embed once, reuse via ctx across the concurrent per-collection searches. DetectIntent + ConceptNorms now fold ae/oe/ue/ss so ASCII (Pruefe) and umlaut (Pruefe) inputs both match.
This commit is contained in:
@@ -74,6 +74,10 @@ func (c *LegalRAGClient) Retrieve(ctx context.Context, query string, topK int) (
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Embed the query ONCE and stash it in ctx so the concurrent per-collection searches
|
||||||
|
// below reuse it instead of each re-embedding (was N remote round-trips on dev/OVH).
|
||||||
|
ctx = c.withQueryEmbedding(ctx, query)
|
||||||
|
|
||||||
out := make([][]LegalSearchResult, len(collections))
|
out := make([][]LegalSearchResult, len(collections))
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
for i, coll := range collections {
|
for i, coll := range collections {
|
||||||
|
|||||||
@@ -48,12 +48,12 @@ var legalConceptOntology = []conceptNorm{
|
|||||||
// ConceptNorms returns the load-bearing norm_ids for the concepts named in the
|
// ConceptNorms returns the load-bearing norm_ids for the concepts named in the
|
||||||
// query (dedup, order-preserving). Empty if no concept is named.
|
// query (dedup, order-preserving). Empty if no concept is named.
|
||||||
func ConceptNorms(query string) []string {
|
func ConceptNorms(query string) []string {
|
||||||
q := strings.ToLower(query)
|
q := normalizeGerman(query)
|
||||||
seen := map[string]bool{}
|
seen := map[string]bool{}
|
||||||
out := []string{}
|
out := []string{}
|
||||||
for _, cn := range legalConceptOntology {
|
for _, cn := range legalConceptOntology {
|
||||||
for _, kw := range cn.keywords {
|
for _, kw := range cn.keywords {
|
||||||
if strings.Contains(q, kw) {
|
if strings.Contains(q, normalizeGerman(kw)) {
|
||||||
for _, nid := range cn.normIDs {
|
for _, nid := range cn.normIDs {
|
||||||
if !seen[nid] {
|
if !seen[nid] {
|
||||||
seen[nid] = true
|
seen[nid] = true
|
||||||
|
|||||||
@@ -0,0 +1,34 @@
|
|||||||
|
package ucca
|
||||||
|
|
||||||
|
import "context"
|
||||||
|
|
||||||
|
type embCacheKeyT struct{}
|
||||||
|
|
||||||
|
var embCacheKey embCacheKeyT
|
||||||
|
|
||||||
|
type embCacheEntry struct {
|
||||||
|
query string
|
||||||
|
vec []float64
|
||||||
|
}
|
||||||
|
|
||||||
|
// embedForQuery returns the query embedding, reusing a value precomputed for the SAME
|
||||||
|
// query and stashed in ctx by withQueryEmbedding. This collapses the Authority Router's
|
||||||
|
// per-collection fan-out from N embeddings to ONE — decisive when the embedding endpoint
|
||||||
|
// is remote (dev/OVH), where N round-trips dominated /retrieve latency. Falls back to a
|
||||||
|
// fresh embedding when nothing is cached (direct Search / SearchCollection callers).
|
||||||
|
func (c *LegalRAGClient) embedForQuery(ctx context.Context, query string) ([]float64, error) {
|
||||||
|
if v, ok := ctx.Value(embCacheKey).(*embCacheEntry); ok && v.query == query && len(v.vec) > 0 {
|
||||||
|
return v.vec, nil
|
||||||
|
}
|
||||||
|
return c.generateEmbedding(ctx, query)
|
||||||
|
}
|
||||||
|
|
||||||
|
// withQueryEmbedding precomputes the query embedding once and stashes it in ctx so the
|
||||||
|
// concurrent per-collection searches reuse it instead of each re-embedding. Best-effort:
|
||||||
|
// on embed error the ctx is returned unchanged and callers fall back to per-call embedding.
|
||||||
|
func (c *LegalRAGClient) withQueryEmbedding(ctx context.Context, query string) context.Context {
|
||||||
|
if vec, err := c.generateEmbedding(ctx, query); err == nil && len(vec) > 0 {
|
||||||
|
return context.WithValue(ctx, embCacheKey, &embCacheEntry{query: query, vec: vec})
|
||||||
|
}
|
||||||
|
return ctx
|
||||||
|
}
|
||||||
@@ -10,10 +10,10 @@ import "strings"
|
|||||||
// this evidence") instead of guessing the format. Returns "" (neutral) when no
|
// this evidence") instead of guessing the format. Returns "" (neutral) when no
|
||||||
// clear task is signalled. First tier of ~20-30 intent types.
|
// clear task is signalled. First tier of ~20-30 intent types.
|
||||||
func DetectIntent(query string) string {
|
func DetectIntent(query string) string {
|
||||||
q := " " + strings.ToLower(query) + " "
|
q := " " + normalizeGerman(query) + " "
|
||||||
has := func(subs ...string) bool {
|
has := func(subs ...string) bool {
|
||||||
for _, s := range subs {
|
for _, s := range subs {
|
||||||
if strings.Contains(q, s) {
|
if strings.Contains(q, normalizeGerman(s)) {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -105,7 +105,7 @@ func (c *LegalRAGClient) searchInternal(ctx context.Context, collection string,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
embedding, err := c.generateEmbedding(ctx, query)
|
embedding, err := c.embedForQuery(ctx, query)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to generate embedding: %w", err)
|
return nil, fmt.Errorf("failed to generate embedding: %w", err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,15 @@
|
|||||||
|
package ucca
|
||||||
|
|
||||||
|
import "strings"
|
||||||
|
|
||||||
|
// normalizeGerman lowercases and folds German umlauts / ß to their ASCII digraphs
|
||||||
|
// (ä→ae, ö→oe, ü→ue, ß→ss) so keyword matching is insensitive to whether the user
|
||||||
|
// typed "Prüfe" or "Pruefe", "Datenschutzerklärung" or "Datenschutzerklaerung".
|
||||||
|
// Applied to BOTH the query and the keyword lists in the German-text matchers.
|
||||||
|
func normalizeGerman(s string) string {
|
||||||
|
return umlautFolder.Replace(strings.ToLower(s))
|
||||||
|
}
|
||||||
|
|
||||||
|
var umlautFolder = strings.NewReplacer(
|
||||||
|
"ä", "ae", "ö", "oe", "ü", "ue", "ß", "ss",
|
||||||
|
)
|
||||||
@@ -0,0 +1,29 @@
|
|||||||
|
package ucca
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestDetectIntentUmlautFold(t *testing.T) {
|
||||||
|
cases := map[string]string{
|
||||||
|
"Pruefe meine Datenschutzerklaerung.": "review", // ASCII digraph
|
||||||
|
"Prüfe meine Datenschutzerklärung.": "review", // umlaut
|
||||||
|
"Ueberpruefe das Impressum": "review", // ASCII "überprüfe"
|
||||||
|
"Was ist eine TOM?": "definition", // unchanged
|
||||||
|
}
|
||||||
|
for q, want := range cases {
|
||||||
|
if got := DetectIntent(q); got != want {
|
||||||
|
t.Errorf("DetectIntent(%q)=%q want %q", q, got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConceptNormsUmlautFold(t *testing.T) {
|
||||||
|
// ASCII "datenschutzerklaerung" must resolve to the same core norms as the umlaut form.
|
||||||
|
ascii := ConceptNorms("Was gehoert in eine Datenschutzerklaerung?")
|
||||||
|
umlaut := ConceptNorms("Was gehört in eine Datenschutzerklärung?")
|
||||||
|
if len(ascii) == 0 {
|
||||||
|
t.Errorf("ConceptNorms(ASCII datenschutzerklaerung) returned none")
|
||||||
|
}
|
||||||
|
if len(ascii) != len(umlaut) {
|
||||||
|
t.Errorf("ASCII vs umlaut concept norms differ: %v vs %v", ascii, umlaut)
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user