Initial commit: breakpilot-compliance - Compliance SDK Platform
Services: Admin-Compliance, Backend-Compliance, AI-Compliance-SDK, Consent-SDK, Developer-Portal, PCA-Platform, DSMS Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
394
ai-compliance-sdk/internal/ucca/legal_rag.go
Normal file
394
ai-compliance-sdk/internal/ucca/legal_rag.go
Normal file
@@ -0,0 +1,394 @@
|
||||
package ucca
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// LegalRAGClient provides access to the legal corpus vector search.
|
||||
type LegalRAGClient struct {
|
||||
qdrantHost string
|
||||
qdrantPort string
|
||||
embeddingURL string
|
||||
collection string
|
||||
httpClient *http.Client
|
||||
}
|
||||
|
||||
// LegalSearchResult represents a single search result from the legal corpus.
|
||||
type LegalSearchResult struct {
|
||||
Text string `json:"text"`
|
||||
RegulationCode string `json:"regulation_code"`
|
||||
RegulationName string `json:"regulation_name"`
|
||||
Article string `json:"article,omitempty"`
|
||||
Paragraph string `json:"paragraph,omitempty"`
|
||||
SourceURL string `json:"source_url"`
|
||||
Score float64 `json:"score"`
|
||||
}
|
||||
|
||||
// LegalContext represents aggregated legal context for an assessment.
|
||||
type LegalContext struct {
|
||||
Query string `json:"query"`
|
||||
Results []LegalSearchResult `json:"results"`
|
||||
RelevantArticles []string `json:"relevant_articles"`
|
||||
Regulations []string `json:"regulations"`
|
||||
GeneratedAt time.Time `json:"generated_at"`
|
||||
}
|
||||
|
||||
// NewLegalRAGClient creates a new Legal RAG client.
|
||||
func NewLegalRAGClient() *LegalRAGClient {
|
||||
qdrantHost := os.Getenv("QDRANT_HOST")
|
||||
if qdrantHost == "" {
|
||||
qdrantHost = "localhost"
|
||||
}
|
||||
|
||||
qdrantPort := os.Getenv("QDRANT_PORT")
|
||||
if qdrantPort == "" {
|
||||
qdrantPort = "6333"
|
||||
}
|
||||
|
||||
embeddingURL := os.Getenv("EMBEDDING_SERVICE_URL")
|
||||
if embeddingURL == "" {
|
||||
embeddingURL = "http://localhost:8087"
|
||||
}
|
||||
|
||||
return &LegalRAGClient{
|
||||
qdrantHost: qdrantHost,
|
||||
qdrantPort: qdrantPort,
|
||||
embeddingURL: embeddingURL,
|
||||
collection: "bp_legal_corpus",
|
||||
httpClient: &http.Client{
|
||||
Timeout: 30 * time.Second,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// embeddingResponse from the embedding service.
|
||||
type embeddingResponse struct {
|
||||
Embeddings [][]float64 `json:"embeddings"`
|
||||
}
|
||||
|
||||
// qdrantSearchRequest for Qdrant REST API.
|
||||
type qdrantSearchRequest struct {
|
||||
Vector []float64 `json:"vector"`
|
||||
Limit int `json:"limit"`
|
||||
WithPayload bool `json:"with_payload"`
|
||||
Filter *qdrantFilter `json:"filter,omitempty"`
|
||||
}
|
||||
|
||||
type qdrantFilter struct {
|
||||
Should []qdrantCondition `json:"should,omitempty"`
|
||||
Must []qdrantCondition `json:"must,omitempty"`
|
||||
}
|
||||
|
||||
type qdrantCondition struct {
|
||||
Key string `json:"key"`
|
||||
Match qdrantMatch `json:"match"`
|
||||
}
|
||||
|
||||
type qdrantMatch struct {
|
||||
Value string `json:"value"`
|
||||
}
|
||||
|
||||
// qdrantSearchResponse from Qdrant REST API.
|
||||
type qdrantSearchResponse struct {
|
||||
Result []qdrantSearchHit `json:"result"`
|
||||
}
|
||||
|
||||
type qdrantSearchHit struct {
|
||||
ID string `json:"id"`
|
||||
Score float64 `json:"score"`
|
||||
Payload map[string]interface{} `json:"payload"`
|
||||
}
|
||||
|
||||
// generateEmbedding calls the embedding service to get a vector for the query.
|
||||
func (c *LegalRAGClient) generateEmbedding(ctx context.Context, text string) ([]float64, error) {
|
||||
reqBody := map[string]interface{}{
|
||||
"texts": []string{text},
|
||||
}
|
||||
|
||||
jsonBody, err := json.Marshal(reqBody)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal embedding request: %w", err)
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "POST", c.embeddingURL+"/embed", bytes.NewReader(jsonBody))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create embedding request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("embedding request failed: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return nil, fmt.Errorf("embedding service returned %d: %s", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
var embResp embeddingResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&embResp); err != nil {
|
||||
return nil, fmt.Errorf("failed to decode embedding response: %w", err)
|
||||
}
|
||||
|
||||
if len(embResp.Embeddings) == 0 {
|
||||
return nil, fmt.Errorf("no embeddings returned")
|
||||
}
|
||||
|
||||
return embResp.Embeddings[0], nil
|
||||
}
|
||||
|
||||
// Search queries the legal corpus for relevant passages.
|
||||
func (c *LegalRAGClient) Search(ctx context.Context, query string, regulationCodes []string, topK int) ([]LegalSearchResult, error) {
|
||||
// Generate query embedding
|
||||
embedding, err := c.generateEmbedding(ctx, query)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to generate embedding: %w", err)
|
||||
}
|
||||
|
||||
// Build Qdrant search request
|
||||
searchReq := qdrantSearchRequest{
|
||||
Vector: embedding,
|
||||
Limit: topK,
|
||||
WithPayload: true,
|
||||
}
|
||||
|
||||
// Add filter for specific regulations if provided
|
||||
if len(regulationCodes) > 0 {
|
||||
conditions := make([]qdrantCondition, len(regulationCodes))
|
||||
for i, code := range regulationCodes {
|
||||
conditions[i] = qdrantCondition{
|
||||
Key: "regulation_code",
|
||||
Match: qdrantMatch{Value: code},
|
||||
}
|
||||
}
|
||||
searchReq.Filter = &qdrantFilter{Should: conditions}
|
||||
}
|
||||
|
||||
jsonBody, err := json.Marshal(searchReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal search request: %w", err)
|
||||
}
|
||||
|
||||
// Call Qdrant
|
||||
url := fmt.Sprintf("http://%s:%s/collections/%s/points/search", c.qdrantHost, c.qdrantPort, c.collection)
|
||||
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(jsonBody))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create search request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("search request failed: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return nil, fmt.Errorf("qdrant returned %d: %s", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
var searchResp qdrantSearchResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&searchResp); err != nil {
|
||||
return nil, fmt.Errorf("failed to decode search response: %w", err)
|
||||
}
|
||||
|
||||
// Convert to results
|
||||
results := make([]LegalSearchResult, len(searchResp.Result))
|
||||
for i, hit := range searchResp.Result {
|
||||
results[i] = LegalSearchResult{
|
||||
Text: getString(hit.Payload, "text"),
|
||||
RegulationCode: getString(hit.Payload, "regulation_code"),
|
||||
RegulationName: getString(hit.Payload, "regulation_name"),
|
||||
Article: getString(hit.Payload, "article"),
|
||||
Paragraph: getString(hit.Payload, "paragraph"),
|
||||
SourceURL: getString(hit.Payload, "source_url"),
|
||||
Score: hit.Score,
|
||||
}
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// GetLegalContextForAssessment retrieves relevant legal context for an assessment.
|
||||
func (c *LegalRAGClient) GetLegalContextForAssessment(ctx context.Context, assessment *Assessment) (*LegalContext, error) {
|
||||
// Build query from assessment data
|
||||
queryParts := []string{}
|
||||
|
||||
// Add domain context
|
||||
if assessment.Domain != "" {
|
||||
queryParts = append(queryParts, fmt.Sprintf("KI-Anwendung im Bereich %s", assessment.Domain))
|
||||
}
|
||||
|
||||
// Add data type context
|
||||
if assessment.Intake.DataTypes.Article9Data {
|
||||
queryParts = append(queryParts, "besondere Kategorien personenbezogener Daten Art. 9 DSGVO")
|
||||
}
|
||||
if assessment.Intake.DataTypes.PersonalData {
|
||||
queryParts = append(queryParts, "personenbezogene Daten")
|
||||
}
|
||||
if assessment.Intake.DataTypes.MinorData {
|
||||
queryParts = append(queryParts, "Daten von Minderjährigen")
|
||||
}
|
||||
|
||||
// Add purpose context
|
||||
if assessment.Intake.Purpose.EvaluationScoring {
|
||||
queryParts = append(queryParts, "automatisierte Bewertung Scoring")
|
||||
}
|
||||
if assessment.Intake.Purpose.DecisionMaking {
|
||||
queryParts = append(queryParts, "automatisierte Entscheidung Art. 22 DSGVO")
|
||||
}
|
||||
if assessment.Intake.Purpose.Profiling {
|
||||
queryParts = append(queryParts, "Profiling")
|
||||
}
|
||||
|
||||
// Add risk-specific context
|
||||
if assessment.DSFARecommended {
|
||||
queryParts = append(queryParts, "Datenschutz-Folgenabschätzung Art. 35 DSGVO")
|
||||
}
|
||||
if assessment.Art22Risk {
|
||||
queryParts = append(queryParts, "automatisierte Einzelentscheidung rechtliche Wirkung")
|
||||
}
|
||||
|
||||
// Build final query
|
||||
query := strings.Join(queryParts, " ")
|
||||
if query == "" {
|
||||
query = "DSGVO Anforderungen KI-System Datenschutz"
|
||||
}
|
||||
|
||||
// Determine which regulations to search based on triggered rules
|
||||
regulationCodes := c.determineRelevantRegulations(assessment)
|
||||
|
||||
// Search legal corpus
|
||||
results, err := c.Search(ctx, query, regulationCodes, 5)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Extract unique articles and regulations
|
||||
articleSet := make(map[string]bool)
|
||||
regSet := make(map[string]bool)
|
||||
|
||||
for _, r := range results {
|
||||
if r.Article != "" {
|
||||
key := fmt.Sprintf("%s Art. %s", r.RegulationCode, r.Article)
|
||||
articleSet[key] = true
|
||||
}
|
||||
regSet[r.RegulationCode] = true
|
||||
}
|
||||
|
||||
articles := make([]string, 0, len(articleSet))
|
||||
for a := range articleSet {
|
||||
articles = append(articles, a)
|
||||
}
|
||||
|
||||
regulations := make([]string, 0, len(regSet))
|
||||
for r := range regSet {
|
||||
regulations = append(regulations, r)
|
||||
}
|
||||
|
||||
return &LegalContext{
|
||||
Query: query,
|
||||
Results: results,
|
||||
RelevantArticles: articles,
|
||||
Regulations: regulations,
|
||||
GeneratedAt: time.Now().UTC(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// determineRelevantRegulations determines which regulations to search based on the assessment.
|
||||
func (c *LegalRAGClient) determineRelevantRegulations(assessment *Assessment) []string {
|
||||
codes := []string{"GDPR"} // Always include GDPR
|
||||
|
||||
// Check triggered rules for regulation hints
|
||||
for _, rule := range assessment.TriggeredRules {
|
||||
gdprRef := rule.GDPRRef
|
||||
if strings.Contains(gdprRef, "AI Act") || strings.Contains(gdprRef, "KI-VO") {
|
||||
codes = append(codes, "AIACT")
|
||||
}
|
||||
if strings.Contains(gdprRef, "Art. 9") || strings.Contains(gdprRef, "Art. 22") {
|
||||
// Already have GDPR
|
||||
}
|
||||
}
|
||||
|
||||
// Add AI Act if AI-related controls are required
|
||||
for _, ctrl := range assessment.RequiredControls {
|
||||
if strings.HasPrefix(ctrl.ID, "AI-") {
|
||||
if !contains(codes, "AIACT") {
|
||||
codes = append(codes, "AIACT")
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Add BSI if security controls are required
|
||||
for _, ctrl := range assessment.RequiredControls {
|
||||
if strings.HasPrefix(ctrl.ID, "CRYPTO-") || strings.HasPrefix(ctrl.ID, "IAM-") {
|
||||
codes = append(codes, "BSI-TR-03161-1")
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return codes
|
||||
}
|
||||
|
||||
// FormatLegalContextForPrompt formats the legal context for inclusion in an LLM prompt.
|
||||
func (c *LegalRAGClient) FormatLegalContextForPrompt(lc *LegalContext) string {
|
||||
if lc == nil || len(lc.Results) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
buf.WriteString("\n\n**Relevante Rechtsgrundlagen:**\n\n")
|
||||
|
||||
for i, result := range lc.Results {
|
||||
buf.WriteString(fmt.Sprintf("%d. **%s** (%s)", i+1, result.RegulationName, result.RegulationCode))
|
||||
if result.Article != "" {
|
||||
buf.WriteString(fmt.Sprintf(" - Art. %s", result.Article))
|
||||
if result.Paragraph != "" {
|
||||
buf.WriteString(fmt.Sprintf(" Abs. %s", result.Paragraph))
|
||||
}
|
||||
}
|
||||
buf.WriteString("\n")
|
||||
buf.WriteString(fmt.Sprintf(" > %s\n\n", truncateText(result.Text, 300)))
|
||||
}
|
||||
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
|
||||
func getString(m map[string]interface{}, key string) string {
|
||||
if v, ok := m[key]; ok {
|
||||
if s, ok := v.(string); ok {
|
||||
return s
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func contains(slice []string, item string) bool {
|
||||
for _, s := range slice {
|
||||
if s == item {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func truncateText(text string, maxLen int) string {
|
||||
if len(text) <= maxLen {
|
||||
return text
|
||||
}
|
||||
return text[:maxLen] + "..."
|
||||
}
|
||||
Reference in New Issue
Block a user