package ucca import ( "bytes" "context" "encoding/json" "fmt" "io" "net/http" "os" "strings" "time" ) // LegalRAGClient provides access to the legal corpus vector search. type LegalRAGClient struct { qdrantHost string qdrantPort string embeddingURL string collection string httpClient *http.Client } // LegalSearchResult represents a single search result from the legal corpus. type LegalSearchResult struct { Text string `json:"text"` RegulationCode string `json:"regulation_code"` RegulationName string `json:"regulation_name"` Article string `json:"article,omitempty"` Paragraph string `json:"paragraph,omitempty"` SourceURL string `json:"source_url"` Score float64 `json:"score"` } // LegalContext represents aggregated legal context for an assessment. type LegalContext struct { Query string `json:"query"` Results []LegalSearchResult `json:"results"` RelevantArticles []string `json:"relevant_articles"` Regulations []string `json:"regulations"` GeneratedAt time.Time `json:"generated_at"` } // NewLegalRAGClient creates a new Legal RAG client. func NewLegalRAGClient() *LegalRAGClient { qdrantHost := os.Getenv("QDRANT_HOST") if qdrantHost == "" { qdrantHost = "localhost" } qdrantPort := os.Getenv("QDRANT_PORT") if qdrantPort == "" { qdrantPort = "6333" } embeddingURL := os.Getenv("EMBEDDING_SERVICE_URL") if embeddingURL == "" { embeddingURL = "http://localhost:8087" } return &LegalRAGClient{ qdrantHost: qdrantHost, qdrantPort: qdrantPort, embeddingURL: embeddingURL, collection: "bp_legal_corpus", httpClient: &http.Client{ Timeout: 30 * time.Second, }, } } // embeddingResponse from the embedding service. type embeddingResponse struct { Embeddings [][]float64 `json:"embeddings"` } // qdrantSearchRequest for Qdrant REST API. type qdrantSearchRequest struct { Vector []float64 `json:"vector"` Limit int `json:"limit"` WithPayload bool `json:"with_payload"` Filter *qdrantFilter `json:"filter,omitempty"` } type qdrantFilter struct { Should []qdrantCondition `json:"should,omitempty"` Must []qdrantCondition `json:"must,omitempty"` } type qdrantCondition struct { Key string `json:"key"` Match qdrantMatch `json:"match"` } type qdrantMatch struct { Value string `json:"value"` } // qdrantSearchResponse from Qdrant REST API. type qdrantSearchResponse struct { Result []qdrantSearchHit `json:"result"` } type qdrantSearchHit struct { ID string `json:"id"` Score float64 `json:"score"` Payload map[string]interface{} `json:"payload"` } // generateEmbedding calls the embedding service to get a vector for the query. func (c *LegalRAGClient) generateEmbedding(ctx context.Context, text string) ([]float64, error) { reqBody := map[string]interface{}{ "texts": []string{text}, } jsonBody, err := json.Marshal(reqBody) if err != nil { return nil, fmt.Errorf("failed to marshal embedding request: %w", err) } req, err := http.NewRequestWithContext(ctx, "POST", c.embeddingURL+"/embed", bytes.NewReader(jsonBody)) if err != nil { return nil, fmt.Errorf("failed to create embedding request: %w", err) } req.Header.Set("Content-Type", "application/json") resp, err := c.httpClient.Do(req) if err != nil { return nil, fmt.Errorf("embedding request failed: %w", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(resp.Body) return nil, fmt.Errorf("embedding service returned %d: %s", resp.StatusCode, string(body)) } var embResp embeddingResponse if err := json.NewDecoder(resp.Body).Decode(&embResp); err != nil { return nil, fmt.Errorf("failed to decode embedding response: %w", err) } if len(embResp.Embeddings) == 0 { return nil, fmt.Errorf("no embeddings returned") } return embResp.Embeddings[0], nil } // Search queries the legal corpus for relevant passages. func (c *LegalRAGClient) Search(ctx context.Context, query string, regulationCodes []string, topK int) ([]LegalSearchResult, error) { // Generate query embedding embedding, err := c.generateEmbedding(ctx, query) if err != nil { return nil, fmt.Errorf("failed to generate embedding: %w", err) } // Build Qdrant search request searchReq := qdrantSearchRequest{ Vector: embedding, Limit: topK, WithPayload: true, } // Add filter for specific regulations if provided if len(regulationCodes) > 0 { conditions := make([]qdrantCondition, len(regulationCodes)) for i, code := range regulationCodes { conditions[i] = qdrantCondition{ Key: "regulation_code", Match: qdrantMatch{Value: code}, } } searchReq.Filter = &qdrantFilter{Should: conditions} } jsonBody, err := json.Marshal(searchReq) if err != nil { return nil, fmt.Errorf("failed to marshal search request: %w", err) } // Call Qdrant url := fmt.Sprintf("http://%s:%s/collections/%s/points/search", c.qdrantHost, c.qdrantPort, c.collection) req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(jsonBody)) if err != nil { return nil, fmt.Errorf("failed to create search request: %w", err) } req.Header.Set("Content-Type", "application/json") resp, err := c.httpClient.Do(req) if err != nil { return nil, fmt.Errorf("search request failed: %w", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(resp.Body) return nil, fmt.Errorf("qdrant returned %d: %s", resp.StatusCode, string(body)) } var searchResp qdrantSearchResponse if err := json.NewDecoder(resp.Body).Decode(&searchResp); err != nil { return nil, fmt.Errorf("failed to decode search response: %w", err) } // Convert to results results := make([]LegalSearchResult, len(searchResp.Result)) for i, hit := range searchResp.Result { results[i] = LegalSearchResult{ Text: getString(hit.Payload, "text"), RegulationCode: getString(hit.Payload, "regulation_code"), RegulationName: getString(hit.Payload, "regulation_name"), Article: getString(hit.Payload, "article"), Paragraph: getString(hit.Payload, "paragraph"), SourceURL: getString(hit.Payload, "source_url"), Score: hit.Score, } } return results, nil } // GetLegalContextForAssessment retrieves relevant legal context for an assessment. func (c *LegalRAGClient) GetLegalContextForAssessment(ctx context.Context, assessment *Assessment) (*LegalContext, error) { // Build query from assessment data queryParts := []string{} // Add domain context if assessment.Domain != "" { queryParts = append(queryParts, fmt.Sprintf("KI-Anwendung im Bereich %s", assessment.Domain)) } // Add data type context if assessment.Intake.DataTypes.Article9Data { queryParts = append(queryParts, "besondere Kategorien personenbezogener Daten Art. 9 DSGVO") } if assessment.Intake.DataTypes.PersonalData { queryParts = append(queryParts, "personenbezogene Daten") } if assessment.Intake.DataTypes.MinorData { queryParts = append(queryParts, "Daten von Minderjährigen") } // Add purpose context if assessment.Intake.Purpose.EvaluationScoring { queryParts = append(queryParts, "automatisierte Bewertung Scoring") } if assessment.Intake.Purpose.DecisionMaking { queryParts = append(queryParts, "automatisierte Entscheidung Art. 22 DSGVO") } if assessment.Intake.Purpose.Profiling { queryParts = append(queryParts, "Profiling") } // Add risk-specific context if assessment.DSFARecommended { queryParts = append(queryParts, "Datenschutz-Folgenabschätzung Art. 35 DSGVO") } if assessment.Art22Risk { queryParts = append(queryParts, "automatisierte Einzelentscheidung rechtliche Wirkung") } // Build final query query := strings.Join(queryParts, " ") if query == "" { query = "DSGVO Anforderungen KI-System Datenschutz" } // Determine which regulations to search based on triggered rules regulationCodes := c.determineRelevantRegulations(assessment) // Search legal corpus results, err := c.Search(ctx, query, regulationCodes, 5) if err != nil { return nil, err } // Extract unique articles and regulations articleSet := make(map[string]bool) regSet := make(map[string]bool) for _, r := range results { if r.Article != "" { key := fmt.Sprintf("%s Art. %s", r.RegulationCode, r.Article) articleSet[key] = true } regSet[r.RegulationCode] = true } articles := make([]string, 0, len(articleSet)) for a := range articleSet { articles = append(articles, a) } regulations := make([]string, 0, len(regSet)) for r := range regSet { regulations = append(regulations, r) } return &LegalContext{ Query: query, Results: results, RelevantArticles: articles, Regulations: regulations, GeneratedAt: time.Now().UTC(), }, nil } // determineRelevantRegulations determines which regulations to search based on the assessment. func (c *LegalRAGClient) determineRelevantRegulations(assessment *Assessment) []string { codes := []string{"GDPR"} // Always include GDPR // Check triggered rules for regulation hints for _, rule := range assessment.TriggeredRules { gdprRef := rule.GDPRRef if strings.Contains(gdprRef, "AI Act") || strings.Contains(gdprRef, "KI-VO") { codes = append(codes, "AIACT") } if strings.Contains(gdprRef, "Art. 9") || strings.Contains(gdprRef, "Art. 22") { // Already have GDPR } } // Add AI Act if AI-related controls are required for _, ctrl := range assessment.RequiredControls { if strings.HasPrefix(ctrl.ID, "AI-") { if !contains(codes, "AIACT") { codes = append(codes, "AIACT") } break } } // Add BSI if security controls are required for _, ctrl := range assessment.RequiredControls { if strings.HasPrefix(ctrl.ID, "CRYPTO-") || strings.HasPrefix(ctrl.ID, "IAM-") { codes = append(codes, "BSI-TR-03161-1") break } } return codes } // FormatLegalContextForPrompt formats the legal context for inclusion in an LLM prompt. func (c *LegalRAGClient) FormatLegalContextForPrompt(lc *LegalContext) string { if lc == nil || len(lc.Results) == 0 { return "" } var buf bytes.Buffer buf.WriteString("\n\n**Relevante Rechtsgrundlagen:**\n\n") for i, result := range lc.Results { buf.WriteString(fmt.Sprintf("%d. **%s** (%s)", i+1, result.RegulationName, result.RegulationCode)) if result.Article != "" { buf.WriteString(fmt.Sprintf(" - Art. %s", result.Article)) if result.Paragraph != "" { buf.WriteString(fmt.Sprintf(" Abs. %s", result.Paragraph)) } } buf.WriteString("\n") buf.WriteString(fmt.Sprintf(" > %s\n\n", truncateText(result.Text, 300))) } return buf.String() } // Helper functions func getString(m map[string]interface{}, key string) string { if v, ok := m[key]; ok { if s, ok := v.(string); ok { return s } } return "" } func contains(slice []string, item string) bool { for _, s := range slice { if s == item { return true } } return false } func truncateText(text string, maxLen int) string { if len(text) <= maxLen { return text } return text[:maxLen] + "..." }