Files
breakpilot-compliance/ai-compliance-sdk/internal/ucca/legal_rag_http.go
T
Benjamin Admin 9760dca443
CI / detect-changes (pull_request) Successful in 10s
CI / branch-name (pull_request) Successful in 1s
CI / guardrail-integrity (pull_request) Successful in 8s
CI / secret-scan (pull_request) Successful in 9s
CI / dep-audit (pull_request) Failing after 56s
CI / sbom-scan (pull_request) Failing after 58s
CI / build-sha-integrity (pull_request) Successful in 9s
CI / validate-canonical-controls (pull_request) Successful in 7s
CI / loc-budget (pull_request) Successful in 24s
CI / go-lint (pull_request) Successful in 54s
CI / python-lint (pull_request) Failing after 16s
CI / nodejs-lint (pull_request) Failing after 1m9s
CI / nodejs-build (pull_request) Successful in 3m6s
CI / test-go (pull_request) Successful in 1m3s
CI / iace-gt-coverage (pull_request) Successful in 19s
CI / test-python-backend (pull_request) Successful in 26s
CI / test-python-document-crawler (pull_request) Successful in 15s
CI / test-python-dsms-gateway (pull_request) Successful in 12s
feat(ucca): Multi-Regulation-Retrieval für Cross-Regulation-Fragen
Nennt eine Query EXPLIZIT >=2 Regelwerke ("Wie greifen CRA und Maschinen-
verordnung ineinander?"), retrievt searchInternal pro Regelwerk separat
(regulation_code/regulation_id-Filter) und merged — damit BEIDE Domänen im
Prompt landen statt nur der keyword-dominanten. Generisch (Query->Regelwerke,
KEINE doc-spezifische Logik), gegated auf >=2 erkannte Regelwerke; sonst
unveränderter Single-Domain-Pfad.

Behebt GQ-0070: vorher CRA x8 / null MaschVO -> Modell halluzinierte
MaschVO=2019/2144 + falsche "CRA ausgenommen"-Konklusion. Nachher CRA + MaschVO
im Prompt -> korrekt "beide gleichzeitig anwendbar" + Art. 20(9)
Konformitätsvermutung, gegroundet.

Validierung (Build-Collection, echtes SearchCollection):
- Unit: detectRegulations-Scoping (>=2 -> multi, 1/0 -> single)
- 5 Cross-Reg-Fälle (0070 + DSGVO+TDDDG/CRA+NIS2/DORA+NIS2/AI Act+DSGVO):
  beide Regelwerke in Top-8
- CB-100 Freeze-Regression: NUR GQ-0070 + GQ-0095 geändert (beide echte
  Cross-Reg, beide verbessert), 98/100 byte-identisch
- 10 Hard Cases: 9 Single-Domain unverändert, 0070 behält CRA Rang 1

Filter erweitert auf regulation_id UND regulation_code (rückwärtskompatibel,
aktiviert die re-ingestierte Build-Collection).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-30 08:18:06 +02:00

274 lines
8.8 KiB
Go

package ucca
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
)
// generateEmbedding calls Ollama bge-m3 to get a 1024-dim vector for the query.
func (c *LegalRAGClient) generateEmbedding(ctx context.Context, text string) ([]float64, error) {
if len(text) > 2000 {
text = text[:2000]
}
reqBody := ollamaEmbeddingRequest{
Model: c.embeddingModel,
Prompt: text,
}
jsonBody, err := json.Marshal(reqBody)
if err != nil {
return nil, fmt.Errorf("failed to marshal embedding request: %w", err)
}
req, err := http.NewRequestWithContext(ctx, "POST", c.ollamaURL+"/api/embeddings", bytes.NewReader(jsonBody))
if err != nil {
return nil, fmt.Errorf("failed to create embedding request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
resp, err := c.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("embedding request failed: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("ollama returned %d: %s", resp.StatusCode, string(body))
}
var embResp ollamaEmbeddingResponse
if err := json.NewDecoder(resp.Body).Decode(&embResp); err != nil {
return nil, fmt.Errorf("failed to decode embedding response: %w", err)
}
if len(embResp.Embedding) == 0 {
return nil, fmt.Errorf("no embedding returned from ollama")
}
return embResp.Embedding, nil
}
// ensureTextIndex creates a full-text index on chunk_text if not already done.
func (c *LegalRAGClient) ensureTextIndex(ctx context.Context, collection string) error {
if c.textIndexEnsured[collection] {
return nil
}
indexReq := qdrantTextIndexRequest{
FieldName: "chunk_text",
FieldSchema: qdrantTextFieldSchema{
Type: "text",
Tokenizer: "word",
MinLen: 2,
MaxLen: 40,
},
}
jsonBody, err := json.Marshal(indexReq)
if err != nil {
return fmt.Errorf("failed to marshal text index request: %w", err)
}
url := fmt.Sprintf("%s/collections/%s/index", c.qdrantURL, collection)
req, err := http.NewRequestWithContext(ctx, "PUT", url, bytes.NewReader(jsonBody))
if err != nil {
return fmt.Errorf("failed to create text index request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
if c.qdrantAPIKey != "" {
req.Header.Set("api-key", c.qdrantAPIKey)
}
resp, err := c.httpClient.Do(req)
if err != nil {
return fmt.Errorf("text index request failed: %w", err)
}
defer resp.Body.Close()
// 200 = created, 409 = already exists — both are fine
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusConflict {
body, _ := io.ReadAll(resp.Body)
return fmt.Errorf("text index creation failed %d: %s", resp.StatusCode, string(body))
}
c.textIndexEnsured[collection] = true
return nil
}
// searchHybrid performs RRF-fused hybrid search (dense + full-text) via Qdrant Query API.
func (c *LegalRAGClient) searchHybrid(ctx context.Context, collection string, embedding []float64, regulationIDs []string, topK int) ([]qdrantSearchHit, error) {
if err := c.ensureTextIndex(ctx, collection); err != nil {
return nil, err
}
prefetchLimit := 20
if topK > 20 {
prefetchLimit = topK * 4
}
queryReq := qdrantQueryRequest{
Prefetch: []qdrantPrefetch{
{Query: embedding, Limit: prefetchLimit},
},
Query: &qdrantFusion{Fusion: "rrf"},
Limit: topK,
WithPayload: true,
}
if len(regulationIDs) > 0 {
// Match BOTH the legacy field (regulation_id) and the normalized field
// (regulation_code) so per-regulation filtering works on the re-ingested corpus too.
conditions := make([]qdrantCondition, 0, len(regulationIDs)*2)
for _, regID := range regulationIDs {
conditions = append(conditions,
qdrantCondition{Key: "regulation_id", Match: qdrantMatch{Value: regID}},
qdrantCondition{Key: "regulation_code", Match: qdrantMatch{Value: regID}},
)
}
queryReq.Filter = &qdrantFilter{Should: conditions}
}
jsonBody, err := json.Marshal(queryReq)
if err != nil {
return nil, fmt.Errorf("failed to marshal query request: %w", err)
}
url := fmt.Sprintf("%s/collections/%s/points/query", c.qdrantURL, collection)
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(jsonBody))
if err != nil {
return nil, fmt.Errorf("failed to create query request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
if c.qdrantAPIKey != "" {
req.Header.Set("api-key", c.qdrantAPIKey)
}
resp, err := c.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("query request failed: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("qdrant query returned %d: %s", resp.StatusCode, string(body))
}
var queryResp qdrantQueryResponse
if err := json.NewDecoder(resp.Body).Decode(&queryResp); err != nil {
return nil, fmt.Errorf("failed to decode query response: %w", err)
}
return queryResp.Result, nil
}
// searchDense performs a dense-only vector search via Qdrant /points/search.
func (c *LegalRAGClient) searchDense(ctx context.Context, collection string, embedding []float64, regulationIDs []string, topK int) ([]qdrantSearchHit, error) {
searchReq := qdrantSearchRequest{
Vector: embedding,
Limit: topK,
WithPayload: true,
}
if len(regulationIDs) > 0 {
// Match BOTH the legacy field (regulation_id) and the normalized field
// (regulation_code) so per-regulation filtering works on the re-ingested corpus too.
conditions := make([]qdrantCondition, 0, len(regulationIDs)*2)
for _, regID := range regulationIDs {
conditions = append(conditions,
qdrantCondition{Key: "regulation_id", Match: qdrantMatch{Value: regID}},
qdrantCondition{Key: "regulation_code", Match: qdrantMatch{Value: regID}},
)
}
searchReq.Filter = &qdrantFilter{Should: conditions}
}
return c.doPointsSearch(ctx, collection, searchReq)
}
// searchBinding fetches the top binding_law hits (authority-stratified pool) so the
// obligation source is always a candidate even when guidance dominates semantically.
// It AUGMENTS the semantic pool — guidance is preserved as interpretation context.
func (c *LegalRAGClient) searchBinding(ctx context.Context, collection string, embedding []float64, topK int) ([]qdrantSearchHit, error) {
searchReq := qdrantSearchRequest{
Vector: embedding,
Limit: topK,
WithPayload: true,
Filter: &qdrantFilter{Must: []qdrantCondition{
{Key: "source_class", Match: qdrantMatch{Value: "binding_law"}},
}},
}
return c.doPointsSearch(ctx, collection, searchReq)
}
// controlPoolDepth is how deep the dense control pull reaches. Measured: for an EU-cyber
// control query the relevant control sources sit at dense rank ~8-9 (NIST, CRA Annex), far
// below the client's small top-K — so a fixed dense depth of 60 reliably surfaces them.
const controlPoolDepth = 60
// searchControls fetches a DEEP dense pool and keeps only the control-pool roles, so control
// sources that the small top-K (hybrid) search misses become candidates on an implementation
// question. Role is derived in code (no source_role tag needed). AUGMENTS the pool — the
// caller gates it on control-intent.
func (c *LegalRAGClient) searchControls(ctx context.Context, collection string, embedding []float64) ([]qdrantSearchHit, error) {
searchReq := qdrantSearchRequest{
Vector: embedding,
Limit: controlPoolDepth,
WithPayload: true,
}
hits, err := c.doPointsSearch(ctx, collection, searchReq)
if err != nil {
return nil, err
}
kept := make([]qdrantSearchHit, 0, len(hits))
for _, h := range hits {
if isControlPoolRole(controlRoleOf(h.Payload)) {
kept = append(kept, h)
}
}
return kept, nil
}
// doPointsSearch issues a POST /points/search and decodes the hits.
func (c *LegalRAGClient) doPointsSearch(ctx context.Context, collection string, searchReq qdrantSearchRequest) ([]qdrantSearchHit, error) {
jsonBody, err := json.Marshal(searchReq)
if err != nil {
return nil, fmt.Errorf("failed to marshal search request: %w", err)
}
url := fmt.Sprintf("%s/collections/%s/points/search", c.qdrantURL, collection)
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(jsonBody))
if err != nil {
return nil, fmt.Errorf("failed to create search request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
if c.qdrantAPIKey != "" {
req.Header.Set("api-key", c.qdrantAPIKey)
}
resp, err := c.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("search request failed: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("qdrant returned %d: %s", resp.StatusCode, string(body))
}
var searchResp qdrantSearchResponse
if err := json.NewDecoder(resp.Body).Decode(&searchResp); err != nil {
return nil, fmt.Errorf("failed to decode search response: %w", err)
}
return searchResp.Result, nil
}