Files
breakpilot-compliance/ai-compliance-sdk/internal/ucca/embed_cache.go
T
Claude 0903e3a8d1
CI / detect-changes (push) Successful in 5s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / build-sha-integrity (push) Successful in 5s
CI / validate-canonical-controls (push) Successful in 4s
CI / loc-budget (push) Successful in 18s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 3m0s
CI / test-go (push) Successful in 59s
CI / iace-gt-coverage (push) Successful in 17s
CI / test-python-backend (push) Has been skipped
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
perf(ai-sdk): embed query once across router fan-out + fold umlauts in intent/concept matching
Authority Router re-embedded the query per collection (6x); on dev the embed
endpoint (OVH) is remote so that was 6 round-trips = 7-12s per /retrieve. Embed
once, reuse via ctx across the concurrent per-collection searches.
DetectIntent + ConceptNorms now fold ae/oe/ue/ss so ASCII (Pruefe) and umlaut
(Pruefe) inputs both match.
2026-07-01 19:03:11 +02:00

35 lines
1.4 KiB
Go

package ucca
import "context"
type embCacheKeyT struct{}
var embCacheKey embCacheKeyT
type embCacheEntry struct {
query string
vec []float64
}
// embedForQuery returns the query embedding, reusing a value precomputed for the SAME
// query and stashed in ctx by withQueryEmbedding. This collapses the Authority Router's
// per-collection fan-out from N embeddings to ONE — decisive when the embedding endpoint
// is remote (dev/OVH), where N round-trips dominated /retrieve latency. Falls back to a
// fresh embedding when nothing is cached (direct Search / SearchCollection callers).
func (c *LegalRAGClient) embedForQuery(ctx context.Context, query string) ([]float64, error) {
if v, ok := ctx.Value(embCacheKey).(*embCacheEntry); ok && v.query == query && len(v.vec) > 0 {
return v.vec, nil
}
return c.generateEmbedding(ctx, query)
}
// withQueryEmbedding precomputes the query embedding once and stashes it in ctx so the
// concurrent per-collection searches reuse it instead of each re-embedding. Best-effort:
// on embed error the ctx is returned unchanged and callers fall back to per-call embedding.
func (c *LegalRAGClient) withQueryEmbedding(ctx context.Context, query string) context.Context {
if vec, err := c.generateEmbedding(ctx, query); err == nil && len(vec) > 0 {
return context.WithValue(ctx, embCacheKey, &embCacheEntry{query: query, vec: vec})
}
return ctx
}