package ucca import ( "bytes" "context" "encoding/json" "fmt" "io" "net/http" ) // generateEmbedding calls Ollama bge-m3 to get a 1024-dim vector for the query. func (c *LegalRAGClient) generateEmbedding(ctx context.Context, text string) ([]float64, error) { if len(text) > 2000 { text = text[:2000] } reqBody := ollamaEmbeddingRequest{ Model: c.embeddingModel, Prompt: text, } jsonBody, err := json.Marshal(reqBody) if err != nil { return nil, fmt.Errorf("failed to marshal embedding request: %w", err) } req, err := http.NewRequestWithContext(ctx, "POST", c.ollamaURL+"/api/embeddings", bytes.NewReader(jsonBody)) if err != nil { return nil, fmt.Errorf("failed to create embedding request: %w", err) } req.Header.Set("Content-Type", "application/json") resp, err := c.httpClient.Do(req) if err != nil { return nil, fmt.Errorf("embedding request failed: %w", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(resp.Body) return nil, fmt.Errorf("ollama returned %d: %s", resp.StatusCode, string(body)) } var embResp ollamaEmbeddingResponse if err := json.NewDecoder(resp.Body).Decode(&embResp); err != nil { return nil, fmt.Errorf("failed to decode embedding response: %w", err) } if len(embResp.Embedding) == 0 { return nil, fmt.Errorf("no embedding returned from ollama") } return embResp.Embedding, nil } // ensureTextIndex creates a full-text index on chunk_text if not already done. func (c *LegalRAGClient) ensureTextIndex(ctx context.Context, collection string) error { if c.textIndexEnsured[collection] { return nil } indexReq := qdrantTextIndexRequest{ FieldName: "chunk_text", FieldSchema: qdrantTextFieldSchema{ Type: "text", Tokenizer: "word", MinLen: 2, MaxLen: 40, }, } jsonBody, err := json.Marshal(indexReq) if err != nil { return fmt.Errorf("failed to marshal text index request: %w", err) } url := fmt.Sprintf("%s/collections/%s/index", c.qdrantURL, collection) req, err := http.NewRequestWithContext(ctx, "PUT", url, bytes.NewReader(jsonBody)) if err != nil { return fmt.Errorf("failed to create text index request: %w", err) } req.Header.Set("Content-Type", "application/json") if c.qdrantAPIKey != "" { req.Header.Set("api-key", c.qdrantAPIKey) } resp, err := c.httpClient.Do(req) if err != nil { return fmt.Errorf("text index request failed: %w", err) } defer resp.Body.Close() // 200 = created, 409 = already exists — both are fine if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusConflict { body, _ := io.ReadAll(resp.Body) return fmt.Errorf("text index creation failed %d: %s", resp.StatusCode, string(body)) } c.textIndexEnsured[collection] = true return nil } // searchHybrid performs RRF-fused hybrid search (dense + full-text) via Qdrant Query API. func (c *LegalRAGClient) searchHybrid(ctx context.Context, collection string, embedding []float64, regulationIDs []string, topK int) ([]qdrantSearchHit, error) { if err := c.ensureTextIndex(ctx, collection); err != nil { return nil, err } prefetchLimit := 20 if topK > 20 { prefetchLimit = topK * 4 } queryReq := qdrantQueryRequest{ Prefetch: []qdrantPrefetch{ {Query: embedding, Limit: prefetchLimit}, }, Query: &qdrantFusion{Fusion: "rrf"}, Limit: topK, WithPayload: true, } if len(regulationIDs) > 0 { conditions := make([]qdrantCondition, len(regulationIDs)) for i, regID := range regulationIDs { conditions[i] = qdrantCondition{ Key: "regulation_id", Match: qdrantMatch{Value: regID}, } } queryReq.Filter = &qdrantFilter{Should: conditions} } jsonBody, err := json.Marshal(queryReq) if err != nil { return nil, fmt.Errorf("failed to marshal query request: %w", err) } url := fmt.Sprintf("%s/collections/%s/points/query", c.qdrantURL, collection) req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(jsonBody)) if err != nil { return nil, fmt.Errorf("failed to create query request: %w", err) } req.Header.Set("Content-Type", "application/json") if c.qdrantAPIKey != "" { req.Header.Set("api-key", c.qdrantAPIKey) } resp, err := c.httpClient.Do(req) if err != nil { return nil, fmt.Errorf("query request failed: %w", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(resp.Body) return nil, fmt.Errorf("qdrant query returned %d: %s", resp.StatusCode, string(body)) } var queryResp qdrantQueryResponse if err := json.NewDecoder(resp.Body).Decode(&queryResp); err != nil { return nil, fmt.Errorf("failed to decode query response: %w", err) } return queryResp.Result, nil } // searchDense performs a dense-only vector search via Qdrant /points/search. func (c *LegalRAGClient) searchDense(ctx context.Context, collection string, embedding []float64, regulationIDs []string, topK int) ([]qdrantSearchHit, error) { searchReq := qdrantSearchRequest{ Vector: embedding, Limit: topK, WithPayload: true, } if len(regulationIDs) > 0 { conditions := make([]qdrantCondition, len(regulationIDs)) for i, regID := range regulationIDs { conditions[i] = qdrantCondition{ Key: "regulation_id", Match: qdrantMatch{Value: regID}, } } searchReq.Filter = &qdrantFilter{Should: conditions} } jsonBody, err := json.Marshal(searchReq) if err != nil { return nil, fmt.Errorf("failed to marshal search request: %w", err) } url := fmt.Sprintf("%s/collections/%s/points/search", c.qdrantURL, collection) req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(jsonBody)) if err != nil { return nil, fmt.Errorf("failed to create search request: %w", err) } req.Header.Set("Content-Type", "application/json") if c.qdrantAPIKey != "" { req.Header.Set("api-key", c.qdrantAPIKey) } resp, err := c.httpClient.Do(req) if err != nil { return nil, fmt.Errorf("search request failed: %w", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(resp.Body) return nil, fmt.Errorf("qdrant returned %d: %s", resp.StatusCode, string(body)) } var searchResp qdrantSearchResponse if err := json.NewDecoder(resp.Body).Decode(&searchResp); err != nil { return nil, fmt.Errorf("failed to decode search response: %w", err) } return searchResp.Result, nil }