refactor(go/ucca): split policy_engine, legal_rag, ai_act, nis2, financial_policy, dsgvo_module
Split 6 oversized files (719–882 LOC each) into focused files under 500 LOC: - policy_engine.go → types, loader, eval, gen (4 files) - legal_rag.go → types, client, http, context, scroll (5 files) - ai_act_module.go → module, yaml, obligations (3 files) - nis2_module.go → module, yaml, obligations + shared obligation_yaml_types.go (3+1 files) - financial_policy.go → types, engine (2 files) - dsgvo_module.go → module, yaml, obligations (3 files) All in package ucca, zero exported symbol renames, go test ./internal/ucca/... passes. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
220
ai-compliance-sdk/internal/ucca/legal_rag_http.go
Normal file
220
ai-compliance-sdk/internal/ucca/legal_rag_http.go
Normal file
@@ -0,0 +1,220 @@
|
||||
package ucca
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
// generateEmbedding calls Ollama bge-m3 to get a 1024-dim vector for the query.
|
||||
func (c *LegalRAGClient) generateEmbedding(ctx context.Context, text string) ([]float64, error) {
|
||||
if len(text) > 2000 {
|
||||
text = text[:2000]
|
||||
}
|
||||
|
||||
reqBody := ollamaEmbeddingRequest{
|
||||
Model: c.embeddingModel,
|
||||
Prompt: text,
|
||||
}
|
||||
|
||||
jsonBody, err := json.Marshal(reqBody)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal embedding request: %w", err)
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "POST", c.ollamaURL+"/api/embeddings", bytes.NewReader(jsonBody))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create embedding request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("embedding request failed: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return nil, fmt.Errorf("ollama returned %d: %s", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
var embResp ollamaEmbeddingResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&embResp); err != nil {
|
||||
return nil, fmt.Errorf("failed to decode embedding response: %w", err)
|
||||
}
|
||||
|
||||
if len(embResp.Embedding) == 0 {
|
||||
return nil, fmt.Errorf("no embedding returned from ollama")
|
||||
}
|
||||
|
||||
return embResp.Embedding, nil
|
||||
}
|
||||
|
||||
// ensureTextIndex creates a full-text index on chunk_text if not already done.
|
||||
func (c *LegalRAGClient) ensureTextIndex(ctx context.Context, collection string) error {
|
||||
if c.textIndexEnsured[collection] {
|
||||
return nil
|
||||
}
|
||||
|
||||
indexReq := qdrantTextIndexRequest{
|
||||
FieldName: "chunk_text",
|
||||
FieldSchema: qdrantTextFieldSchema{
|
||||
Type: "text",
|
||||
Tokenizer: "word",
|
||||
MinLen: 2,
|
||||
MaxLen: 40,
|
||||
},
|
||||
}
|
||||
|
||||
jsonBody, err := json.Marshal(indexReq)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal text index request: %w", err)
|
||||
}
|
||||
|
||||
url := fmt.Sprintf("%s/collections/%s/index", c.qdrantURL, collection)
|
||||
req, err := http.NewRequestWithContext(ctx, "PUT", url, bytes.NewReader(jsonBody))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create text index request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
if c.qdrantAPIKey != "" {
|
||||
req.Header.Set("api-key", c.qdrantAPIKey)
|
||||
}
|
||||
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("text index request failed: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// 200 = created, 409 = already exists — both are fine
|
||||
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusConflict {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return fmt.Errorf("text index creation failed %d: %s", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
c.textIndexEnsured[collection] = true
|
||||
return nil
|
||||
}
|
||||
|
||||
// searchHybrid performs RRF-fused hybrid search (dense + full-text) via Qdrant Query API.
|
||||
func (c *LegalRAGClient) searchHybrid(ctx context.Context, collection string, embedding []float64, regulationIDs []string, topK int) ([]qdrantSearchHit, error) {
|
||||
if err := c.ensureTextIndex(ctx, collection); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
prefetchLimit := 20
|
||||
if topK > 20 {
|
||||
prefetchLimit = topK * 4
|
||||
}
|
||||
|
||||
queryReq := qdrantQueryRequest{
|
||||
Prefetch: []qdrantPrefetch{
|
||||
{Query: embedding, Limit: prefetchLimit},
|
||||
},
|
||||
Query: &qdrantFusion{Fusion: "rrf"},
|
||||
Limit: topK,
|
||||
WithPayload: true,
|
||||
}
|
||||
|
||||
if len(regulationIDs) > 0 {
|
||||
conditions := make([]qdrantCondition, len(regulationIDs))
|
||||
for i, regID := range regulationIDs {
|
||||
conditions[i] = qdrantCondition{
|
||||
Key: "regulation_id",
|
||||
Match: qdrantMatch{Value: regID},
|
||||
}
|
||||
}
|
||||
queryReq.Filter = &qdrantFilter{Should: conditions}
|
||||
}
|
||||
|
||||
jsonBody, err := json.Marshal(queryReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal query request: %w", err)
|
||||
}
|
||||
|
||||
url := fmt.Sprintf("%s/collections/%s/points/query", c.qdrantURL, collection)
|
||||
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(jsonBody))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create query request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
if c.qdrantAPIKey != "" {
|
||||
req.Header.Set("api-key", c.qdrantAPIKey)
|
||||
}
|
||||
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("query request failed: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return nil, fmt.Errorf("qdrant query returned %d: %s", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
var queryResp qdrantQueryResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&queryResp); err != nil {
|
||||
return nil, fmt.Errorf("failed to decode query response: %w", err)
|
||||
}
|
||||
|
||||
return queryResp.Result, nil
|
||||
}
|
||||
|
||||
// searchDense performs a dense-only vector search via Qdrant /points/search.
|
||||
func (c *LegalRAGClient) searchDense(ctx context.Context, collection string, embedding []float64, regulationIDs []string, topK int) ([]qdrantSearchHit, error) {
|
||||
searchReq := qdrantSearchRequest{
|
||||
Vector: embedding,
|
||||
Limit: topK,
|
||||
WithPayload: true,
|
||||
}
|
||||
|
||||
if len(regulationIDs) > 0 {
|
||||
conditions := make([]qdrantCondition, len(regulationIDs))
|
||||
for i, regID := range regulationIDs {
|
||||
conditions[i] = qdrantCondition{
|
||||
Key: "regulation_id",
|
||||
Match: qdrantMatch{Value: regID},
|
||||
}
|
||||
}
|
||||
searchReq.Filter = &qdrantFilter{Should: conditions}
|
||||
}
|
||||
|
||||
jsonBody, err := json.Marshal(searchReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal search request: %w", err)
|
||||
}
|
||||
|
||||
url := fmt.Sprintf("%s/collections/%s/points/search", c.qdrantURL, collection)
|
||||
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(jsonBody))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create search request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
if c.qdrantAPIKey != "" {
|
||||
req.Header.Set("api-key", c.qdrantAPIKey)
|
||||
}
|
||||
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("search request failed: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return nil, fmt.Errorf("qdrant returned %d: %s", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
var searchResp qdrantSearchResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&searchResp); err != nil {
|
||||
return nil, fmt.Errorf("failed to decode search response: %w", err)
|
||||
}
|
||||
|
||||
return searchResp.Result, nil
|
||||
}
|
||||
Reference in New Issue
Block a user