breakpilot-compliance/ai-compliance-sdk/internal/ucca/embed_cache.go

package ucca

import "context"

type embCacheKeyT struct{}

var embCacheKey embCacheKeyT

type embCacheEntry struct {
	query string
	vec   []float64
}

// embedForQuery returns the query embedding, reusing a value precomputed for the SAME
// query and stashed in ctx by withQueryEmbedding. This collapses the Authority Router's
// per-collection fan-out from N embeddings to ONE — decisive when the embedding endpoint
// is remote (dev/OVH), where N round-trips dominated /retrieve latency. Falls back to a
// fresh embedding when nothing is cached (direct Search / SearchCollection callers).
func (c *LegalRAGClient) embedForQuery(ctx context.Context, query string) ([]float64, error) {
	if v, ok := ctx.Value(embCacheKey).(*embCacheEntry); ok && v.query == query && len(v.vec) > 0 {
		return v.vec, nil
	}
	return c.generateEmbedding(ctx, query)
}

// withQueryEmbedding precomputes the query embedding once and stashes it in ctx so the
// concurrent per-collection searches reuse it instead of each re-embedding. Best-effort:
// on embed error the ctx is returned unchanged and callers fall back to per-call embedding.
func (c *LegalRAGClient) withQueryEmbedding(ctx context.Context, query string) context.Context {
	if vec, err := c.generateEmbedding(ctx, query); err == nil && len(vec) > 0 {
		return context.WithValue(ctx, embCacheKey, &embCacheEntry{query: query, vec: vec})
	}
	return ctx
}