refactor(go/ucca): split policy_engine, legal_rag, ai_act, nis2, financial_policy, dsgvo_module
Split 6 oversized files (719–882 LOC each) into focused files under 500 LOC: - policy_engine.go → types, loader, eval, gen (4 files) - legal_rag.go → types, client, http, context, scroll (5 files) - ai_act_module.go → module, yaml, obligations (3 files) - nis2_module.go → module, yaml, obligations + shared obligation_yaml_types.go (3+1 files) - financial_policy.go → types, engine (2 files) - dsgvo_module.go → module, yaml, obligations (3 files) All in package ucca, zero exported symbol renames, go test ./internal/ucca/... passes. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,809 +1,8 @@
|
||||
// Package ucca provides the Use Case Compliance Assessment engine.
|
||||
// legal_rag.go is split into:
|
||||
// - legal_rag_types.go — struct types (results, Qdrant/Ollama HTTP types)
|
||||
// - legal_rag_client.go — LegalRAGClient struct, constructor, Search methods, helpers
|
||||
// - legal_rag_http.go — HTTP helpers: embedding, dense/hybrid search, text index
|
||||
// - legal_rag_context.go — GetLegalContextForAssessment, determineRelevantRegulations
|
||||
// - legal_rag_scroll.go — ScrollChunks + payload helper functions
|
||||
package ucca
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// LegalRAGClient provides access to the compliance CE vector search via Qdrant + Ollama bge-m3.
|
||||
type LegalRAGClient struct {
|
||||
qdrantURL string
|
||||
qdrantAPIKey string
|
||||
ollamaURL string
|
||||
embeddingModel string
|
||||
collection string
|
||||
httpClient *http.Client
|
||||
textIndexEnsured map[string]bool // tracks which collections have text index
|
||||
hybridEnabled bool // use Query API with RRF fusion
|
||||
}
|
||||
|
||||
// LegalSearchResult represents a single search result from the compliance corpus.
|
||||
type LegalSearchResult struct {
|
||||
Text string `json:"text"`
|
||||
RegulationCode string `json:"regulation_code"`
|
||||
RegulationName string `json:"regulation_name"`
|
||||
RegulationShort string `json:"regulation_short"`
|
||||
Category string `json:"category"`
|
||||
Article string `json:"article,omitempty"`
|
||||
Paragraph string `json:"paragraph,omitempty"`
|
||||
Pages []int `json:"pages,omitempty"`
|
||||
SourceURL string `json:"source_url"`
|
||||
Score float64 `json:"score"`
|
||||
}
|
||||
|
||||
// LegalContext represents aggregated legal context for an assessment.
|
||||
type LegalContext struct {
|
||||
Query string `json:"query"`
|
||||
Results []LegalSearchResult `json:"results"`
|
||||
RelevantArticles []string `json:"relevant_articles"`
|
||||
Regulations []string `json:"regulations"`
|
||||
GeneratedAt time.Time `json:"generated_at"`
|
||||
}
|
||||
|
||||
// RegulationInfo describes an available regulation in the corpus.
|
||||
type CERegulationInfo struct {
|
||||
ID string `json:"id"`
|
||||
NameDE string `json:"name_de"`
|
||||
NameEN string `json:"name_en"`
|
||||
Short string `json:"short"`
|
||||
Category string `json:"category"`
|
||||
}
|
||||
|
||||
// NewLegalRAGClient creates a new Legal RAG client using Ollama bge-m3 embeddings.
|
||||
func NewLegalRAGClient() *LegalRAGClient {
|
||||
qdrantURL := os.Getenv("QDRANT_URL")
|
||||
if qdrantURL == "" {
|
||||
qdrantURL = "http://localhost:6333"
|
||||
}
|
||||
// Strip trailing slash
|
||||
qdrantURL = strings.TrimRight(qdrantURL, "/")
|
||||
|
||||
qdrantAPIKey := os.Getenv("QDRANT_API_KEY")
|
||||
|
||||
ollamaURL := os.Getenv("OLLAMA_URL")
|
||||
if ollamaURL == "" {
|
||||
ollamaURL = "http://localhost:11434"
|
||||
}
|
||||
|
||||
hybridEnabled := os.Getenv("RAG_HYBRID_SEARCH") != "false" // enabled by default
|
||||
|
||||
return &LegalRAGClient{
|
||||
qdrantURL: qdrantURL,
|
||||
qdrantAPIKey: qdrantAPIKey,
|
||||
ollamaURL: ollamaURL,
|
||||
embeddingModel: "bge-m3",
|
||||
collection: "bp_compliance_ce",
|
||||
textIndexEnsured: make(map[string]bool),
|
||||
hybridEnabled: hybridEnabled,
|
||||
httpClient: &http.Client{
|
||||
Timeout: 60 * time.Second,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// ollamaEmbeddingRequest for Ollama embedding API.
|
||||
type ollamaEmbeddingRequest struct {
|
||||
Model string `json:"model"`
|
||||
Prompt string `json:"prompt"`
|
||||
}
|
||||
|
||||
// ollamaEmbeddingResponse from Ollama embedding API.
|
||||
type ollamaEmbeddingResponse struct {
|
||||
Embedding []float64 `json:"embedding"`
|
||||
}
|
||||
|
||||
// qdrantSearchRequest for Qdrant REST API.
|
||||
type qdrantSearchRequest struct {
|
||||
Vector []float64 `json:"vector"`
|
||||
Limit int `json:"limit"`
|
||||
WithPayload bool `json:"with_payload"`
|
||||
Filter *qdrantFilter `json:"filter,omitempty"`
|
||||
}
|
||||
|
||||
type qdrantFilter struct {
|
||||
Should []qdrantCondition `json:"should,omitempty"`
|
||||
Must []qdrantCondition `json:"must,omitempty"`
|
||||
}
|
||||
|
||||
type qdrantCondition struct {
|
||||
Key string `json:"key"`
|
||||
Match qdrantMatch `json:"match"`
|
||||
}
|
||||
|
||||
type qdrantMatch struct {
|
||||
Value string `json:"value"`
|
||||
}
|
||||
|
||||
// qdrantSearchResponse from Qdrant REST API.
|
||||
type qdrantSearchResponse struct {
|
||||
Result []qdrantSearchHit `json:"result"`
|
||||
}
|
||||
|
||||
type qdrantSearchHit struct {
|
||||
ID interface{} `json:"id"`
|
||||
Score float64 `json:"score"`
|
||||
Payload map[string]interface{} `json:"payload"`
|
||||
}
|
||||
|
||||
// --- Hybrid Search (Query API with RRF fusion) ---
|
||||
|
||||
// qdrantQueryRequest for Qdrant Query API with prefetch + fusion.
|
||||
type qdrantQueryRequest struct {
|
||||
Prefetch []qdrantPrefetch `json:"prefetch"`
|
||||
Query *qdrantFusion `json:"query"`
|
||||
Limit int `json:"limit"`
|
||||
WithPayload bool `json:"with_payload"`
|
||||
Filter *qdrantFilter `json:"filter,omitempty"`
|
||||
}
|
||||
|
||||
type qdrantPrefetch struct {
|
||||
Query []float64 `json:"query"`
|
||||
Limit int `json:"limit"`
|
||||
Filter *qdrantFilter `json:"filter,omitempty"`
|
||||
}
|
||||
|
||||
type qdrantFusion struct {
|
||||
Fusion string `json:"fusion"`
|
||||
}
|
||||
|
||||
// qdrantQueryResponse from Qdrant Query API (same shape as search).
|
||||
type qdrantQueryResponse struct {
|
||||
Result []qdrantSearchHit `json:"result"`
|
||||
}
|
||||
|
||||
// qdrantTextIndexRequest for creating a full-text index on a payload field.
|
||||
type qdrantTextIndexRequest struct {
|
||||
FieldName string `json:"field_name"`
|
||||
FieldSchema qdrantTextFieldSchema `json:"field_schema"`
|
||||
}
|
||||
|
||||
type qdrantTextFieldSchema struct {
|
||||
Type string `json:"type"`
|
||||
Tokenizer string `json:"tokenizer"`
|
||||
MinLen int `json:"min_token_len,omitempty"`
|
||||
MaxLen int `json:"max_token_len,omitempty"`
|
||||
}
|
||||
|
||||
// ensureTextIndex creates a full-text index on chunk_text if not already done for this collection.
|
||||
func (c *LegalRAGClient) ensureTextIndex(ctx context.Context, collection string) error {
|
||||
if c.textIndexEnsured[collection] {
|
||||
return nil
|
||||
}
|
||||
|
||||
indexReq := qdrantTextIndexRequest{
|
||||
FieldName: "chunk_text",
|
||||
FieldSchema: qdrantTextFieldSchema{
|
||||
Type: "text",
|
||||
Tokenizer: "word",
|
||||
MinLen: 2,
|
||||
MaxLen: 40,
|
||||
},
|
||||
}
|
||||
|
||||
jsonBody, err := json.Marshal(indexReq)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal text index request: %w", err)
|
||||
}
|
||||
|
||||
url := fmt.Sprintf("%s/collections/%s/index", c.qdrantURL, collection)
|
||||
req, err := http.NewRequestWithContext(ctx, "PUT", url, bytes.NewReader(jsonBody))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create text index request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
if c.qdrantAPIKey != "" {
|
||||
req.Header.Set("api-key", c.qdrantAPIKey)
|
||||
}
|
||||
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("text index request failed: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// 200 = created, 409 = already exists — both are fine
|
||||
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusConflict {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return fmt.Errorf("text index creation failed %d: %s", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
c.textIndexEnsured[collection] = true
|
||||
return nil
|
||||
}
|
||||
|
||||
// searchHybrid performs RRF-fused hybrid search (dense + full-text) via Qdrant Query API.
|
||||
func (c *LegalRAGClient) searchHybrid(ctx context.Context, collection string, embedding []float64, regulationIDs []string, topK int) ([]qdrantSearchHit, error) {
|
||||
// Ensure text index exists
|
||||
if err := c.ensureTextIndex(ctx, collection); err != nil {
|
||||
// Non-fatal: log and fall back to dense-only
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Build prefetch with dense vector (retrieve top-20 for re-ranking)
|
||||
prefetchLimit := 20
|
||||
if topK > 20 {
|
||||
prefetchLimit = topK * 4
|
||||
}
|
||||
|
||||
queryReq := qdrantQueryRequest{
|
||||
Prefetch: []qdrantPrefetch{
|
||||
{Query: embedding, Limit: prefetchLimit},
|
||||
},
|
||||
Query: &qdrantFusion{Fusion: "rrf"},
|
||||
Limit: topK,
|
||||
WithPayload: true,
|
||||
}
|
||||
|
||||
// Add regulation filter
|
||||
if len(regulationIDs) > 0 {
|
||||
conditions := make([]qdrantCondition, len(regulationIDs))
|
||||
for i, regID := range regulationIDs {
|
||||
conditions[i] = qdrantCondition{
|
||||
Key: "regulation_id",
|
||||
Match: qdrantMatch{Value: regID},
|
||||
}
|
||||
}
|
||||
queryReq.Filter = &qdrantFilter{Should: conditions}
|
||||
}
|
||||
|
||||
jsonBody, err := json.Marshal(queryReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal query request: %w", err)
|
||||
}
|
||||
|
||||
url := fmt.Sprintf("%s/collections/%s/points/query", c.qdrantURL, collection)
|
||||
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(jsonBody))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create query request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
if c.qdrantAPIKey != "" {
|
||||
req.Header.Set("api-key", c.qdrantAPIKey)
|
||||
}
|
||||
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("query request failed: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return nil, fmt.Errorf("qdrant query returned %d: %s", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
var queryResp qdrantQueryResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&queryResp); err != nil {
|
||||
return nil, fmt.Errorf("failed to decode query response: %w", err)
|
||||
}
|
||||
|
||||
return queryResp.Result, nil
|
||||
}
|
||||
|
||||
// generateEmbedding calls Ollama bge-m3 to get a 1024-dim vector for the query.
|
||||
func (c *LegalRAGClient) generateEmbedding(ctx context.Context, text string) ([]float64, error) {
|
||||
// Truncate to 2000 chars for bge-m3
|
||||
if len(text) > 2000 {
|
||||
text = text[:2000]
|
||||
}
|
||||
|
||||
reqBody := ollamaEmbeddingRequest{
|
||||
Model: c.embeddingModel,
|
||||
Prompt: text,
|
||||
}
|
||||
|
||||
jsonBody, err := json.Marshal(reqBody)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal embedding request: %w", err)
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "POST", c.ollamaURL+"/api/embeddings", bytes.NewReader(jsonBody))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create embedding request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("embedding request failed: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return nil, fmt.Errorf("ollama returned %d: %s", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
var embResp ollamaEmbeddingResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&embResp); err != nil {
|
||||
return nil, fmt.Errorf("failed to decode embedding response: %w", err)
|
||||
}
|
||||
|
||||
if len(embResp.Embedding) == 0 {
|
||||
return nil, fmt.Errorf("no embedding returned from ollama")
|
||||
}
|
||||
|
||||
return embResp.Embedding, nil
|
||||
}
|
||||
|
||||
// SearchCollection queries a specific Qdrant collection for relevant passages.
|
||||
// If collection is empty, it falls back to the default collection (bp_compliance_ce).
|
||||
func (c *LegalRAGClient) SearchCollection(ctx context.Context, collection string, query string, regulationIDs []string, topK int) ([]LegalSearchResult, error) {
|
||||
if collection == "" {
|
||||
collection = c.collection
|
||||
}
|
||||
return c.searchInternal(ctx, collection, query, regulationIDs, topK)
|
||||
}
|
||||
|
||||
// Search queries the compliance CE corpus for relevant passages.
|
||||
func (c *LegalRAGClient) Search(ctx context.Context, query string, regulationIDs []string, topK int) ([]LegalSearchResult, error) {
|
||||
return c.searchInternal(ctx, c.collection, query, regulationIDs, topK)
|
||||
}
|
||||
|
||||
// searchInternal performs the actual search against a given collection.
|
||||
// If hybrid search is enabled, it uses the Qdrant Query API with RRF fusion
|
||||
// (dense + full-text). Falls back to dense-only /points/search on failure.
|
||||
func (c *LegalRAGClient) searchInternal(ctx context.Context, collection string, query string, regulationIDs []string, topK int) ([]LegalSearchResult, error) {
|
||||
// Generate query embedding via Ollama bge-m3
|
||||
embedding, err := c.generateEmbedding(ctx, query)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to generate embedding: %w", err)
|
||||
}
|
||||
|
||||
// Try hybrid search first (Query API + RRF), fall back to dense-only
|
||||
var hits []qdrantSearchHit
|
||||
|
||||
if c.hybridEnabled {
|
||||
hybridHits, err := c.searchHybrid(ctx, collection, embedding, regulationIDs, topK)
|
||||
if err == nil {
|
||||
hits = hybridHits
|
||||
}
|
||||
// On error, fall through to dense-only search below
|
||||
}
|
||||
|
||||
if hits == nil {
|
||||
denseHits, err := c.searchDense(ctx, collection, embedding, regulationIDs, topK)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
hits = denseHits
|
||||
}
|
||||
|
||||
// Convert to results using bp_compliance_ce payload schema
|
||||
results := make([]LegalSearchResult, len(hits))
|
||||
for i, hit := range hits {
|
||||
results[i] = LegalSearchResult{
|
||||
Text: getString(hit.Payload, "chunk_text"),
|
||||
RegulationCode: getString(hit.Payload, "regulation_id"),
|
||||
RegulationName: getString(hit.Payload, "regulation_name_de"),
|
||||
RegulationShort: getString(hit.Payload, "regulation_short"),
|
||||
Category: getString(hit.Payload, "category"),
|
||||
Pages: getIntSlice(hit.Payload, "pages"),
|
||||
SourceURL: getString(hit.Payload, "source"),
|
||||
Score: hit.Score,
|
||||
}
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// searchDense performs a dense-only vector search via Qdrant /points/search.
|
||||
func (c *LegalRAGClient) searchDense(ctx context.Context, collection string, embedding []float64, regulationIDs []string, topK int) ([]qdrantSearchHit, error) {
|
||||
searchReq := qdrantSearchRequest{
|
||||
Vector: embedding,
|
||||
Limit: topK,
|
||||
WithPayload: true,
|
||||
}
|
||||
|
||||
if len(regulationIDs) > 0 {
|
||||
conditions := make([]qdrantCondition, len(regulationIDs))
|
||||
for i, regID := range regulationIDs {
|
||||
conditions[i] = qdrantCondition{
|
||||
Key: "regulation_id",
|
||||
Match: qdrantMatch{Value: regID},
|
||||
}
|
||||
}
|
||||
searchReq.Filter = &qdrantFilter{Should: conditions}
|
||||
}
|
||||
|
||||
jsonBody, err := json.Marshal(searchReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal search request: %w", err)
|
||||
}
|
||||
|
||||
url := fmt.Sprintf("%s/collections/%s/points/search", c.qdrantURL, collection)
|
||||
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(jsonBody))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create search request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
if c.qdrantAPIKey != "" {
|
||||
req.Header.Set("api-key", c.qdrantAPIKey)
|
||||
}
|
||||
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("search request failed: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return nil, fmt.Errorf("qdrant returned %d: %s", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
var searchResp qdrantSearchResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&searchResp); err != nil {
|
||||
return nil, fmt.Errorf("failed to decode search response: %w", err)
|
||||
}
|
||||
|
||||
return searchResp.Result, nil
|
||||
}
|
||||
|
||||
// GetLegalContextForAssessment retrieves relevant legal context for an assessment.
|
||||
func (c *LegalRAGClient) GetLegalContextForAssessment(ctx context.Context, assessment *Assessment) (*LegalContext, error) {
|
||||
// Build query from assessment data
|
||||
queryParts := []string{}
|
||||
|
||||
// Add domain context
|
||||
if assessment.Domain != "" {
|
||||
queryParts = append(queryParts, fmt.Sprintf("KI-Anwendung im Bereich %s", assessment.Domain))
|
||||
}
|
||||
|
||||
// Add data type context
|
||||
if assessment.Intake.DataTypes.Article9Data {
|
||||
queryParts = append(queryParts, "besondere Kategorien personenbezogener Daten Art. 9 DSGVO")
|
||||
}
|
||||
if assessment.Intake.DataTypes.PersonalData {
|
||||
queryParts = append(queryParts, "personenbezogene Daten")
|
||||
}
|
||||
if assessment.Intake.DataTypes.MinorData {
|
||||
queryParts = append(queryParts, "Daten von Minderjährigen")
|
||||
}
|
||||
|
||||
// Add purpose context
|
||||
if assessment.Intake.Purpose.EvaluationScoring {
|
||||
queryParts = append(queryParts, "automatisierte Bewertung Scoring")
|
||||
}
|
||||
if assessment.Intake.Purpose.DecisionMaking {
|
||||
queryParts = append(queryParts, "automatisierte Entscheidung Art. 22 DSGVO")
|
||||
}
|
||||
if assessment.Intake.Purpose.Profiling {
|
||||
queryParts = append(queryParts, "Profiling")
|
||||
}
|
||||
|
||||
// Add risk-specific context
|
||||
if assessment.DSFARecommended {
|
||||
queryParts = append(queryParts, "Datenschutz-Folgenabschätzung Art. 35 DSGVO")
|
||||
}
|
||||
if assessment.Art22Risk {
|
||||
queryParts = append(queryParts, "automatisierte Einzelentscheidung rechtliche Wirkung")
|
||||
}
|
||||
|
||||
// Build final query
|
||||
query := strings.Join(queryParts, " ")
|
||||
if query == "" {
|
||||
query = "DSGVO Anforderungen KI-System Datenschutz"
|
||||
}
|
||||
|
||||
// Determine which regulations to search based on triggered rules
|
||||
regulationIDs := c.determineRelevantRegulations(assessment)
|
||||
|
||||
// Search compliance corpus
|
||||
results, err := c.Search(ctx, query, regulationIDs, 5)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Extract unique regulations
|
||||
regSet := make(map[string]bool)
|
||||
for _, r := range results {
|
||||
regSet[r.RegulationCode] = true
|
||||
}
|
||||
|
||||
regulations := make([]string, 0, len(regSet))
|
||||
for r := range regSet {
|
||||
regulations = append(regulations, r)
|
||||
}
|
||||
|
||||
// Build relevant articles from page references
|
||||
articles := make([]string, 0)
|
||||
for _, r := range results {
|
||||
if len(r.Pages) > 0 {
|
||||
key := fmt.Sprintf("%s S. %v", r.RegulationShort, r.Pages)
|
||||
articles = append(articles, key)
|
||||
}
|
||||
}
|
||||
|
||||
return &LegalContext{
|
||||
Query: query,
|
||||
Results: results,
|
||||
RelevantArticles: articles,
|
||||
Regulations: regulations,
|
||||
GeneratedAt: time.Now().UTC(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// determineRelevantRegulations determines which regulations to search based on the assessment.
|
||||
func (c *LegalRAGClient) determineRelevantRegulations(assessment *Assessment) []string {
|
||||
ids := []string{"eu_2016_679"} // Always include GDPR
|
||||
|
||||
// Check triggered rules for regulation hints
|
||||
for _, rule := range assessment.TriggeredRules {
|
||||
gdprRef := rule.GDPRRef
|
||||
if strings.Contains(gdprRef, "AI Act") || strings.Contains(gdprRef, "KI-VO") {
|
||||
if !contains(ids, "eu_2024_1689") {
|
||||
ids = append(ids, "eu_2024_1689")
|
||||
}
|
||||
}
|
||||
if strings.Contains(gdprRef, "NIS2") || strings.Contains(gdprRef, "NIS-2") {
|
||||
if !contains(ids, "eu_2022_2555") {
|
||||
ids = append(ids, "eu_2022_2555")
|
||||
}
|
||||
}
|
||||
if strings.Contains(gdprRef, "CRA") || strings.Contains(gdprRef, "Cyber Resilience") {
|
||||
if !contains(ids, "eu_2024_2847") {
|
||||
ids = append(ids, "eu_2024_2847")
|
||||
}
|
||||
}
|
||||
if strings.Contains(gdprRef, "Maschinenverordnung") || strings.Contains(gdprRef, "Machinery") {
|
||||
if !contains(ids, "eu_2023_1230") {
|
||||
ids = append(ids, "eu_2023_1230")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add AI Act if AI-related controls are required
|
||||
for _, ctrl := range assessment.RequiredControls {
|
||||
if strings.HasPrefix(ctrl.ID, "AI-") {
|
||||
if !contains(ids, "eu_2024_1689") {
|
||||
ids = append(ids, "eu_2024_1689")
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Add CRA/NIS2 if security controls are required
|
||||
for _, ctrl := range assessment.RequiredControls {
|
||||
if strings.HasPrefix(ctrl.ID, "CRYPTO-") || strings.HasPrefix(ctrl.ID, "IAM-") || strings.HasPrefix(ctrl.ID, "SEC-") {
|
||||
if !contains(ids, "eu_2022_2555") {
|
||||
ids = append(ids, "eu_2022_2555")
|
||||
}
|
||||
if !contains(ids, "eu_2024_2847") {
|
||||
ids = append(ids, "eu_2024_2847")
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return ids
|
||||
}
|
||||
|
||||
// ListAvailableRegulations returns the list of regulations available in the corpus.
|
||||
func (c *LegalRAGClient) ListAvailableRegulations() []CERegulationInfo {
|
||||
return []CERegulationInfo{
|
||||
CERegulationInfo{ID: "eu_2023_1230", NameDE: "EU-Maschinenverordnung 2023/1230", NameEN: "EU Machinery Regulation 2023/1230", Short: "Maschinenverordnung", Category: "regulation"},
|
||||
CERegulationInfo{ID: "eu_2024_1689", NameDE: "EU KI-Verordnung (AI Act)", NameEN: "EU AI Act 2024/1689", Short: "AI Act", Category: "regulation"},
|
||||
CERegulationInfo{ID: "eu_2024_2847", NameDE: "Cyber Resilience Act", NameEN: "Cyber Resilience Act 2024/2847", Short: "CRA", Category: "regulation"},
|
||||
CERegulationInfo{ID: "eu_2022_2555", NameDE: "NIS-2-Richtlinie", NameEN: "NIS2 Directive 2022/2555", Short: "NIS2", Category: "regulation"},
|
||||
CERegulationInfo{ID: "eu_2016_679", NameDE: "Datenschutz-Grundverordnung (DSGVO)", NameEN: "General Data Protection Regulation (GDPR)", Short: "DSGVO/GDPR", Category: "regulation"},
|
||||
CERegulationInfo{ID: "eu_blue_guide_2022", NameDE: "EU Blue Guide 2022", NameEN: "EU Blue Guide 2022", Short: "Blue Guide", Category: "guidance"},
|
||||
CERegulationInfo{ID: "nist_sp_800_218", NameDE: "NIST Secure Software Development Framework", NameEN: "NIST SSDF SP 800-218", Short: "NIST SSDF", Category: "guidance"},
|
||||
CERegulationInfo{ID: "nist_csf_2_0", NameDE: "NIST Cybersecurity Framework 2.0", NameEN: "NIST CSF 2.0", Short: "NIST CSF", Category: "guidance"},
|
||||
CERegulationInfo{ID: "oecd_ai_principles", NameDE: "OECD Empfehlung zu Kuenstlicher Intelligenz", NameEN: "OECD Recommendation on AI", Short: "OECD AI", Category: "guidance"},
|
||||
CERegulationInfo{ID: "enisa_supply_chain_good_practices", NameDE: "ENISA Supply Chain Cybersecurity", NameEN: "ENISA Good Practices for Supply Chain Cybersecurity", Short: "ENISA Supply Chain", Category: "guidance"},
|
||||
CERegulationInfo{ID: "enisa_threat_landscape_supply_chain", NameDE: "ENISA Threat Landscape Supply Chain", NameEN: "ENISA Threat Landscape for Supply Chain Attacks", Short: "ENISA Threat SC", Category: "guidance"},
|
||||
CERegulationInfo{ID: "enisa_ics_scada_dependencies", NameDE: "ENISA ICS/SCADA Abhaengigkeiten", NameEN: "ENISA ICS/SCADA Communication Dependencies", Short: "ENISA ICS/SCADA", Category: "guidance"},
|
||||
CERegulationInfo{ID: "cisa_secure_by_design", NameDE: "CISA Secure by Design", NameEN: "CISA Secure by Design", Short: "CISA SbD", Category: "guidance"},
|
||||
CERegulationInfo{ID: "enisa_cybersecurity_state_2024", NameDE: "ENISA State of Cybersecurity 2024", NameEN: "ENISA State of Cybersecurity in the Union 2024", Short: "ENISA 2024", Category: "guidance"},
|
||||
}
|
||||
}
|
||||
|
||||
// FormatLegalContextForPrompt formats the legal context for inclusion in an LLM prompt.
|
||||
func (c *LegalRAGClient) FormatLegalContextForPrompt(lc *LegalContext) string {
|
||||
if lc == nil || len(lc.Results) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
buf.WriteString("\n\n**Relevante Rechtsgrundlagen:**\n\n")
|
||||
|
||||
for i, result := range lc.Results {
|
||||
buf.WriteString(fmt.Sprintf("%d. **%s** (%s)", i+1, result.RegulationShort, result.RegulationCode))
|
||||
if len(result.Pages) > 0 {
|
||||
buf.WriteString(fmt.Sprintf(" - Seiten %v", result.Pages))
|
||||
}
|
||||
buf.WriteString("\n")
|
||||
buf.WriteString(fmt.Sprintf(" > %s\n\n", truncateText(result.Text, 300)))
|
||||
}
|
||||
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// ScrollChunkResult represents a single chunk from the scroll/list endpoint.
|
||||
type ScrollChunkResult struct {
|
||||
ID string `json:"id"`
|
||||
Text string `json:"text"`
|
||||
RegulationCode string `json:"regulation_code"`
|
||||
RegulationName string `json:"regulation_name"`
|
||||
RegulationShort string `json:"regulation_short"`
|
||||
Category string `json:"category"`
|
||||
Article string `json:"article,omitempty"`
|
||||
Paragraph string `json:"paragraph,omitempty"`
|
||||
SourceURL string `json:"source_url,omitempty"`
|
||||
}
|
||||
|
||||
// qdrantScrollRequest for the Qdrant scroll API.
|
||||
type qdrantScrollRequest struct {
|
||||
Limit int `json:"limit"`
|
||||
Offset interface{} `json:"offset,omitempty"` // string (UUID) or null
|
||||
WithPayload bool `json:"with_payload"`
|
||||
WithVectors bool `json:"with_vectors"`
|
||||
}
|
||||
|
||||
// qdrantScrollResponse from the Qdrant scroll API.
|
||||
type qdrantScrollResponse struct {
|
||||
Result struct {
|
||||
Points []qdrantScrollPoint `json:"points"`
|
||||
NextPageOffset interface{} `json:"next_page_offset"`
|
||||
} `json:"result"`
|
||||
}
|
||||
|
||||
type qdrantScrollPoint struct {
|
||||
ID interface{} `json:"id"`
|
||||
Payload map[string]interface{} `json:"payload"`
|
||||
}
|
||||
|
||||
// ScrollChunks iterates over all chunks in a Qdrant collection using the scroll API.
|
||||
// Pass an empty offset to start from the beginning. Returns chunks, next offset ID, and error.
|
||||
func (c *LegalRAGClient) ScrollChunks(ctx context.Context, collection string, offset string, limit int) ([]ScrollChunkResult, string, error) {
|
||||
scrollReq := qdrantScrollRequest{
|
||||
Limit: limit,
|
||||
WithPayload: true,
|
||||
WithVectors: false,
|
||||
}
|
||||
if offset != "" {
|
||||
// Qdrant expects integer point IDs — parse the offset string back to a number
|
||||
// Try parsing as integer first, fall back to string (for UUID-based collections)
|
||||
var offsetInt uint64
|
||||
if _, err := fmt.Sscanf(offset, "%d", &offsetInt); err == nil {
|
||||
scrollReq.Offset = offsetInt
|
||||
} else {
|
||||
scrollReq.Offset = offset
|
||||
}
|
||||
}
|
||||
|
||||
jsonBody, err := json.Marshal(scrollReq)
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("failed to marshal scroll request: %w", err)
|
||||
}
|
||||
|
||||
url := fmt.Sprintf("%s/collections/%s/points/scroll", c.qdrantURL, collection)
|
||||
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(jsonBody))
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("failed to create scroll request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
if c.qdrantAPIKey != "" {
|
||||
req.Header.Set("api-key", c.qdrantAPIKey)
|
||||
}
|
||||
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("scroll request failed: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return nil, "", fmt.Errorf("qdrant returned %d: %s", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
var scrollResp qdrantScrollResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&scrollResp); err != nil {
|
||||
return nil, "", fmt.Errorf("failed to decode scroll response: %w", err)
|
||||
}
|
||||
|
||||
// Convert points to results
|
||||
chunks := make([]ScrollChunkResult, len(scrollResp.Result.Points))
|
||||
for i, pt := range scrollResp.Result.Points {
|
||||
// Extract point ID as string
|
||||
pointID := ""
|
||||
if pt.ID != nil {
|
||||
pointID = fmt.Sprintf("%v", pt.ID)
|
||||
}
|
||||
|
||||
chunks[i] = ScrollChunkResult{
|
||||
ID: pointID,
|
||||
Text: getString(pt.Payload, "text"),
|
||||
RegulationCode: getString(pt.Payload, "regulation_code"),
|
||||
RegulationName: getString(pt.Payload, "regulation_name"),
|
||||
RegulationShort: getString(pt.Payload, "regulation_short"),
|
||||
Category: getString(pt.Payload, "category"),
|
||||
Article: getString(pt.Payload, "article"),
|
||||
Paragraph: getString(pt.Payload, "paragraph"),
|
||||
SourceURL: getString(pt.Payload, "source_url"),
|
||||
}
|
||||
|
||||
// Fallback: try alternate payload field names used in ingestion
|
||||
if chunks[i].Text == "" {
|
||||
chunks[i].Text = getString(pt.Payload, "chunk_text")
|
||||
}
|
||||
if chunks[i].RegulationCode == "" {
|
||||
chunks[i].RegulationCode = getString(pt.Payload, "regulation_id")
|
||||
}
|
||||
if chunks[i].RegulationName == "" {
|
||||
chunks[i].RegulationName = getString(pt.Payload, "regulation_name_de")
|
||||
}
|
||||
if chunks[i].SourceURL == "" {
|
||||
chunks[i].SourceURL = getString(pt.Payload, "source")
|
||||
}
|
||||
}
|
||||
|
||||
// Extract next offset — Qdrant returns integer point IDs
|
||||
nextOffset := ""
|
||||
if scrollResp.Result.NextPageOffset != nil {
|
||||
switch v := scrollResp.Result.NextPageOffset.(type) {
|
||||
case float64:
|
||||
nextOffset = fmt.Sprintf("%.0f", v)
|
||||
case string:
|
||||
nextOffset = v
|
||||
default:
|
||||
nextOffset = fmt.Sprintf("%v", v)
|
||||
}
|
||||
}
|
||||
|
||||
return chunks, nextOffset, nil
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
|
||||
func getString(m map[string]interface{}, key string) string {
|
||||
if v, ok := m[key]; ok {
|
||||
if s, ok := v.(string); ok {
|
||||
return s
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func getIntSlice(m map[string]interface{}, key string) []int {
|
||||
v, ok := m[key]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
arr, ok := v.([]interface{})
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
result := make([]int, 0, len(arr))
|
||||
for _, item := range arr {
|
||||
if f, ok := item.(float64); ok {
|
||||
result = append(result, int(f))
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func contains(slice []string, item string) bool {
|
||||
for _, s := range slice {
|
||||
if s == item {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func truncateText(text string, maxLen int) string {
|
||||
if len(text) <= maxLen {
|
||||
return text
|
||||
}
|
||||
return text[:maxLen] + "..."
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user