package ucca import "context" type embCacheKeyT struct{} var embCacheKey embCacheKeyT type embCacheEntry struct { query string vec []float64 } // embedForQuery returns the query embedding, reusing a value precomputed for the SAME // query and stashed in ctx by withQueryEmbedding. This collapses the Authority Router's // per-collection fan-out from N embeddings to ONE — decisive when the embedding endpoint // is remote (dev/OVH), where N round-trips dominated /retrieve latency. Falls back to a // fresh embedding when nothing is cached (direct Search / SearchCollection callers). func (c *LegalRAGClient) embedForQuery(ctx context.Context, query string) ([]float64, error) { if v, ok := ctx.Value(embCacheKey).(*embCacheEntry); ok && v.query == query && len(v.vec) > 0 { return v.vec, nil } return c.generateEmbedding(ctx, query) } // withQueryEmbedding precomputes the query embedding once and stashes it in ctx so the // concurrent per-collection searches reuse it instead of each re-embedding. Best-effort: // on embed error the ctx is returned unchanged and callers fall back to per-call embedding. func (c *LegalRAGClient) withQueryEmbedding(ctx context.Context, query string) context.Context { if vec, err := c.generateEmbedding(ctx, query); err == nil && len(vec) > 0 { return context.WithValue(ctx, embCacheKey, &embCacheEntry{query: query, vec: vec}) } return ctx }