package ucca import ( "bytes" "context" "encoding/json" "fmt" "io" "net/http" "os" "strings" "time" ) // LegalRAGClient provides access to the compliance CE vector search via Qdrant + Ollama bge-m3. type LegalRAGClient struct { qdrantURL string qdrantAPIKey string ollamaURL string embeddingModel string collection string httpClient *http.Client textIndexEnsured map[string]bool // tracks which collections have text index hybridEnabled bool // use Query API with RRF fusion } // LegalSearchResult represents a single search result from the compliance corpus. type LegalSearchResult struct { Text string `json:"text"` RegulationCode string `json:"regulation_code"` RegulationName string `json:"regulation_name"` RegulationShort string `json:"regulation_short"` Category string `json:"category"` Article string `json:"article,omitempty"` Paragraph string `json:"paragraph,omitempty"` Pages []int `json:"pages,omitempty"` SourceURL string `json:"source_url"` Score float64 `json:"score"` } // LegalContext represents aggregated legal context for an assessment. type LegalContext struct { Query string `json:"query"` Results []LegalSearchResult `json:"results"` RelevantArticles []string `json:"relevant_articles"` Regulations []string `json:"regulations"` GeneratedAt time.Time `json:"generated_at"` } // RegulationInfo describes an available regulation in the corpus. type CERegulationInfo struct { ID string `json:"id"` NameDE string `json:"name_de"` NameEN string `json:"name_en"` Short string `json:"short"` Category string `json:"category"` } // NewLegalRAGClient creates a new Legal RAG client using Ollama bge-m3 embeddings. func NewLegalRAGClient() *LegalRAGClient { qdrantURL := os.Getenv("QDRANT_URL") if qdrantURL == "" { qdrantURL = "http://localhost:6333" } // Strip trailing slash qdrantURL = strings.TrimRight(qdrantURL, "/") qdrantAPIKey := os.Getenv("QDRANT_API_KEY") ollamaURL := os.Getenv("OLLAMA_URL") if ollamaURL == "" { ollamaURL = "http://localhost:11434" } hybridEnabled := os.Getenv("RAG_HYBRID_SEARCH") != "false" // enabled by default return &LegalRAGClient{ qdrantURL: qdrantURL, qdrantAPIKey: qdrantAPIKey, ollamaURL: ollamaURL, embeddingModel: "bge-m3", collection: "bp_compliance_ce", textIndexEnsured: make(map[string]bool), hybridEnabled: hybridEnabled, httpClient: &http.Client{ Timeout: 60 * time.Second, }, } } // ollamaEmbeddingRequest for Ollama embedding API. type ollamaEmbeddingRequest struct { Model string `json:"model"` Prompt string `json:"prompt"` } // ollamaEmbeddingResponse from Ollama embedding API. type ollamaEmbeddingResponse struct { Embedding []float64 `json:"embedding"` } // qdrantSearchRequest for Qdrant REST API. type qdrantSearchRequest struct { Vector []float64 `json:"vector"` Limit int `json:"limit"` WithPayload bool `json:"with_payload"` Filter *qdrantFilter `json:"filter,omitempty"` } type qdrantFilter struct { Should []qdrantCondition `json:"should,omitempty"` Must []qdrantCondition `json:"must,omitempty"` } type qdrantCondition struct { Key string `json:"key"` Match qdrantMatch `json:"match"` } type qdrantMatch struct { Value string `json:"value"` } // qdrantSearchResponse from Qdrant REST API. type qdrantSearchResponse struct { Result []qdrantSearchHit `json:"result"` } type qdrantSearchHit struct { ID interface{} `json:"id"` Score float64 `json:"score"` Payload map[string]interface{} `json:"payload"` } // --- Hybrid Search (Query API with RRF fusion) --- // qdrantQueryRequest for Qdrant Query API with prefetch + fusion. type qdrantQueryRequest struct { Prefetch []qdrantPrefetch `json:"prefetch"` Query *qdrantFusion `json:"query"` Limit int `json:"limit"` WithPayload bool `json:"with_payload"` Filter *qdrantFilter `json:"filter,omitempty"` } type qdrantPrefetch struct { Query []float64 `json:"query"` Limit int `json:"limit"` Filter *qdrantFilter `json:"filter,omitempty"` } type qdrantFusion struct { Fusion string `json:"fusion"` } // qdrantQueryResponse from Qdrant Query API (same shape as search). type qdrantQueryResponse struct { Result []qdrantSearchHit `json:"result"` } // qdrantTextIndexRequest for creating a full-text index on a payload field. type qdrantTextIndexRequest struct { FieldName string `json:"field_name"` FieldSchema qdrantTextFieldSchema `json:"field_schema"` } type qdrantTextFieldSchema struct { Type string `json:"type"` Tokenizer string `json:"tokenizer"` MinLen int `json:"min_token_len,omitempty"` MaxLen int `json:"max_token_len,omitempty"` } // ensureTextIndex creates a full-text index on chunk_text if not already done for this collection. func (c *LegalRAGClient) ensureTextIndex(ctx context.Context, collection string) error { if c.textIndexEnsured[collection] { return nil } indexReq := qdrantTextIndexRequest{ FieldName: "chunk_text", FieldSchema: qdrantTextFieldSchema{ Type: "text", Tokenizer: "word", MinLen: 2, MaxLen: 40, }, } jsonBody, err := json.Marshal(indexReq) if err != nil { return fmt.Errorf("failed to marshal text index request: %w", err) } url := fmt.Sprintf("%s/collections/%s/index", c.qdrantURL, collection) req, err := http.NewRequestWithContext(ctx, "PUT", url, bytes.NewReader(jsonBody)) if err != nil { return fmt.Errorf("failed to create text index request: %w", err) } req.Header.Set("Content-Type", "application/json") if c.qdrantAPIKey != "" { req.Header.Set("api-key", c.qdrantAPIKey) } resp, err := c.httpClient.Do(req) if err != nil { return fmt.Errorf("text index request failed: %w", err) } defer resp.Body.Close() // 200 = created, 409 = already exists — both are fine if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusConflict { body, _ := io.ReadAll(resp.Body) return fmt.Errorf("text index creation failed %d: %s", resp.StatusCode, string(body)) } c.textIndexEnsured[collection] = true return nil } // searchHybrid performs RRF-fused hybrid search (dense + full-text) via Qdrant Query API. func (c *LegalRAGClient) searchHybrid(ctx context.Context, collection string, embedding []float64, regulationIDs []string, topK int) ([]qdrantSearchHit, error) { // Ensure text index exists if err := c.ensureTextIndex(ctx, collection); err != nil { // Non-fatal: log and fall back to dense-only return nil, err } // Build prefetch with dense vector (retrieve top-20 for re-ranking) prefetchLimit := 20 if topK > 20 { prefetchLimit = topK * 4 } queryReq := qdrantQueryRequest{ Prefetch: []qdrantPrefetch{ {Query: embedding, Limit: prefetchLimit}, }, Query: &qdrantFusion{Fusion: "rrf"}, Limit: topK, WithPayload: true, } // Add regulation filter if len(regulationIDs) > 0 { conditions := make([]qdrantCondition, len(regulationIDs)) for i, regID := range regulationIDs { conditions[i] = qdrantCondition{ Key: "regulation_id", Match: qdrantMatch{Value: regID}, } } queryReq.Filter = &qdrantFilter{Should: conditions} } jsonBody, err := json.Marshal(queryReq) if err != nil { return nil, fmt.Errorf("failed to marshal query request: %w", err) } url := fmt.Sprintf("%s/collections/%s/points/query", c.qdrantURL, collection) req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(jsonBody)) if err != nil { return nil, fmt.Errorf("failed to create query request: %w", err) } req.Header.Set("Content-Type", "application/json") if c.qdrantAPIKey != "" { req.Header.Set("api-key", c.qdrantAPIKey) } resp, err := c.httpClient.Do(req) if err != nil { return nil, fmt.Errorf("query request failed: %w", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(resp.Body) return nil, fmt.Errorf("qdrant query returned %d: %s", resp.StatusCode, string(body)) } var queryResp qdrantQueryResponse if err := json.NewDecoder(resp.Body).Decode(&queryResp); err != nil { return nil, fmt.Errorf("failed to decode query response: %w", err) } return queryResp.Result, nil } // generateEmbedding calls Ollama bge-m3 to get a 1024-dim vector for the query. func (c *LegalRAGClient) generateEmbedding(ctx context.Context, text string) ([]float64, error) { // Truncate to 2000 chars for bge-m3 if len(text) > 2000 { text = text[:2000] } reqBody := ollamaEmbeddingRequest{ Model: c.embeddingModel, Prompt: text, } jsonBody, err := json.Marshal(reqBody) if err != nil { return nil, fmt.Errorf("failed to marshal embedding request: %w", err) } req, err := http.NewRequestWithContext(ctx, "POST", c.ollamaURL+"/api/embeddings", bytes.NewReader(jsonBody)) if err != nil { return nil, fmt.Errorf("failed to create embedding request: %w", err) } req.Header.Set("Content-Type", "application/json") resp, err := c.httpClient.Do(req) if err != nil { return nil, fmt.Errorf("embedding request failed: %w", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(resp.Body) return nil, fmt.Errorf("ollama returned %d: %s", resp.StatusCode, string(body)) } var embResp ollamaEmbeddingResponse if err := json.NewDecoder(resp.Body).Decode(&embResp); err != nil { return nil, fmt.Errorf("failed to decode embedding response: %w", err) } if len(embResp.Embedding) == 0 { return nil, fmt.Errorf("no embedding returned from ollama") } return embResp.Embedding, nil } // SearchCollection queries a specific Qdrant collection for relevant passages. // If collection is empty, it falls back to the default collection (bp_compliance_ce). func (c *LegalRAGClient) SearchCollection(ctx context.Context, collection string, query string, regulationIDs []string, topK int) ([]LegalSearchResult, error) { if collection == "" { collection = c.collection } return c.searchInternal(ctx, collection, query, regulationIDs, topK) } // Search queries the compliance CE corpus for relevant passages. func (c *LegalRAGClient) Search(ctx context.Context, query string, regulationIDs []string, topK int) ([]LegalSearchResult, error) { return c.searchInternal(ctx, c.collection, query, regulationIDs, topK) } // searchInternal performs the actual search against a given collection. // If hybrid search is enabled, it uses the Qdrant Query API with RRF fusion // (dense + full-text). Falls back to dense-only /points/search on failure. func (c *LegalRAGClient) searchInternal(ctx context.Context, collection string, query string, regulationIDs []string, topK int) ([]LegalSearchResult, error) { // Generate query embedding via Ollama bge-m3 embedding, err := c.generateEmbedding(ctx, query) if err != nil { return nil, fmt.Errorf("failed to generate embedding: %w", err) } // Try hybrid search first (Query API + RRF), fall back to dense-only var hits []qdrantSearchHit if c.hybridEnabled { hybridHits, err := c.searchHybrid(ctx, collection, embedding, regulationIDs, topK) if err == nil { hits = hybridHits } // On error, fall through to dense-only search below } if hits == nil { denseHits, err := c.searchDense(ctx, collection, embedding, regulationIDs, topK) if err != nil { return nil, err } hits = denseHits } // Convert to results using bp_compliance_ce payload schema results := make([]LegalSearchResult, len(hits)) for i, hit := range hits { results[i] = LegalSearchResult{ Text: getString(hit.Payload, "chunk_text"), RegulationCode: getString(hit.Payload, "regulation_id"), RegulationName: getString(hit.Payload, "regulation_name_de"), RegulationShort: getString(hit.Payload, "regulation_short"), Category: getString(hit.Payload, "category"), Pages: getIntSlice(hit.Payload, "pages"), SourceURL: getString(hit.Payload, "source"), Score: hit.Score, } } return results, nil } // searchDense performs a dense-only vector search via Qdrant /points/search. func (c *LegalRAGClient) searchDense(ctx context.Context, collection string, embedding []float64, regulationIDs []string, topK int) ([]qdrantSearchHit, error) { searchReq := qdrantSearchRequest{ Vector: embedding, Limit: topK, WithPayload: true, } if len(regulationIDs) > 0 { conditions := make([]qdrantCondition, len(regulationIDs)) for i, regID := range regulationIDs { conditions[i] = qdrantCondition{ Key: "regulation_id", Match: qdrantMatch{Value: regID}, } } searchReq.Filter = &qdrantFilter{Should: conditions} } jsonBody, err := json.Marshal(searchReq) if err != nil { return nil, fmt.Errorf("failed to marshal search request: %w", err) } url := fmt.Sprintf("%s/collections/%s/points/search", c.qdrantURL, collection) req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(jsonBody)) if err != nil { return nil, fmt.Errorf("failed to create search request: %w", err) } req.Header.Set("Content-Type", "application/json") if c.qdrantAPIKey != "" { req.Header.Set("api-key", c.qdrantAPIKey) } resp, err := c.httpClient.Do(req) if err != nil { return nil, fmt.Errorf("search request failed: %w", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(resp.Body) return nil, fmt.Errorf("qdrant returned %d: %s", resp.StatusCode, string(body)) } var searchResp qdrantSearchResponse if err := json.NewDecoder(resp.Body).Decode(&searchResp); err != nil { return nil, fmt.Errorf("failed to decode search response: %w", err) } return searchResp.Result, nil } // GetLegalContextForAssessment retrieves relevant legal context for an assessment. func (c *LegalRAGClient) GetLegalContextForAssessment(ctx context.Context, assessment *Assessment) (*LegalContext, error) { // Build query from assessment data queryParts := []string{} // Add domain context if assessment.Domain != "" { queryParts = append(queryParts, fmt.Sprintf("KI-Anwendung im Bereich %s", assessment.Domain)) } // Add data type context if assessment.Intake.DataTypes.Article9Data { queryParts = append(queryParts, "besondere Kategorien personenbezogener Daten Art. 9 DSGVO") } if assessment.Intake.DataTypes.PersonalData { queryParts = append(queryParts, "personenbezogene Daten") } if assessment.Intake.DataTypes.MinorData { queryParts = append(queryParts, "Daten von Minderjährigen") } // Add purpose context if assessment.Intake.Purpose.EvaluationScoring { queryParts = append(queryParts, "automatisierte Bewertung Scoring") } if assessment.Intake.Purpose.DecisionMaking { queryParts = append(queryParts, "automatisierte Entscheidung Art. 22 DSGVO") } if assessment.Intake.Purpose.Profiling { queryParts = append(queryParts, "Profiling") } // Add risk-specific context if assessment.DSFARecommended { queryParts = append(queryParts, "Datenschutz-Folgenabschätzung Art. 35 DSGVO") } if assessment.Art22Risk { queryParts = append(queryParts, "automatisierte Einzelentscheidung rechtliche Wirkung") } // Build final query query := strings.Join(queryParts, " ") if query == "" { query = "DSGVO Anforderungen KI-System Datenschutz" } // Determine which regulations to search based on triggered rules regulationIDs := c.determineRelevantRegulations(assessment) // Search compliance corpus results, err := c.Search(ctx, query, regulationIDs, 5) if err != nil { return nil, err } // Extract unique regulations regSet := make(map[string]bool) for _, r := range results { regSet[r.RegulationCode] = true } regulations := make([]string, 0, len(regSet)) for r := range regSet { regulations = append(regulations, r) } // Build relevant articles from page references articles := make([]string, 0) for _, r := range results { if len(r.Pages) > 0 { key := fmt.Sprintf("%s S. %v", r.RegulationShort, r.Pages) articles = append(articles, key) } } return &LegalContext{ Query: query, Results: results, RelevantArticles: articles, Regulations: regulations, GeneratedAt: time.Now().UTC(), }, nil } // determineRelevantRegulations determines which regulations to search based on the assessment. func (c *LegalRAGClient) determineRelevantRegulations(assessment *Assessment) []string { ids := []string{"eu_2016_679"} // Always include GDPR // Check triggered rules for regulation hints for _, rule := range assessment.TriggeredRules { gdprRef := rule.GDPRRef if strings.Contains(gdprRef, "AI Act") || strings.Contains(gdprRef, "KI-VO") { if !contains(ids, "eu_2024_1689") { ids = append(ids, "eu_2024_1689") } } if strings.Contains(gdprRef, "NIS2") || strings.Contains(gdprRef, "NIS-2") { if !contains(ids, "eu_2022_2555") { ids = append(ids, "eu_2022_2555") } } if strings.Contains(gdprRef, "CRA") || strings.Contains(gdprRef, "Cyber Resilience") { if !contains(ids, "eu_2024_2847") { ids = append(ids, "eu_2024_2847") } } if strings.Contains(gdprRef, "Maschinenverordnung") || strings.Contains(gdprRef, "Machinery") { if !contains(ids, "eu_2023_1230") { ids = append(ids, "eu_2023_1230") } } } // Add AI Act if AI-related controls are required for _, ctrl := range assessment.RequiredControls { if strings.HasPrefix(ctrl.ID, "AI-") { if !contains(ids, "eu_2024_1689") { ids = append(ids, "eu_2024_1689") } break } } // Add CRA/NIS2 if security controls are required for _, ctrl := range assessment.RequiredControls { if strings.HasPrefix(ctrl.ID, "CRYPTO-") || strings.HasPrefix(ctrl.ID, "IAM-") || strings.HasPrefix(ctrl.ID, "SEC-") { if !contains(ids, "eu_2022_2555") { ids = append(ids, "eu_2022_2555") } if !contains(ids, "eu_2024_2847") { ids = append(ids, "eu_2024_2847") } break } } return ids } // ListAvailableRegulations returns the list of regulations available in the corpus. func (c *LegalRAGClient) ListAvailableRegulations() []CERegulationInfo { return []CERegulationInfo{ CERegulationInfo{ID: "eu_2023_1230", NameDE: "EU-Maschinenverordnung 2023/1230", NameEN: "EU Machinery Regulation 2023/1230", Short: "Maschinenverordnung", Category: "regulation"}, CERegulationInfo{ID: "eu_2024_1689", NameDE: "EU KI-Verordnung (AI Act)", NameEN: "EU AI Act 2024/1689", Short: "AI Act", Category: "regulation"}, CERegulationInfo{ID: "eu_2024_2847", NameDE: "Cyber Resilience Act", NameEN: "Cyber Resilience Act 2024/2847", Short: "CRA", Category: "regulation"}, CERegulationInfo{ID: "eu_2022_2555", NameDE: "NIS-2-Richtlinie", NameEN: "NIS2 Directive 2022/2555", Short: "NIS2", Category: "regulation"}, CERegulationInfo{ID: "eu_2016_679", NameDE: "Datenschutz-Grundverordnung (DSGVO)", NameEN: "General Data Protection Regulation (GDPR)", Short: "DSGVO/GDPR", Category: "regulation"}, CERegulationInfo{ID: "eu_blue_guide_2022", NameDE: "EU Blue Guide 2022", NameEN: "EU Blue Guide 2022", Short: "Blue Guide", Category: "guidance"}, CERegulationInfo{ID: "nist_sp_800_218", NameDE: "NIST Secure Software Development Framework", NameEN: "NIST SSDF SP 800-218", Short: "NIST SSDF", Category: "guidance"}, CERegulationInfo{ID: "nist_csf_2_0", NameDE: "NIST Cybersecurity Framework 2.0", NameEN: "NIST CSF 2.0", Short: "NIST CSF", Category: "guidance"}, CERegulationInfo{ID: "oecd_ai_principles", NameDE: "OECD Empfehlung zu Kuenstlicher Intelligenz", NameEN: "OECD Recommendation on AI", Short: "OECD AI", Category: "guidance"}, CERegulationInfo{ID: "enisa_supply_chain_good_practices", NameDE: "ENISA Supply Chain Cybersecurity", NameEN: "ENISA Good Practices for Supply Chain Cybersecurity", Short: "ENISA Supply Chain", Category: "guidance"}, CERegulationInfo{ID: "enisa_threat_landscape_supply_chain", NameDE: "ENISA Threat Landscape Supply Chain", NameEN: "ENISA Threat Landscape for Supply Chain Attacks", Short: "ENISA Threat SC", Category: "guidance"}, CERegulationInfo{ID: "enisa_ics_scada_dependencies", NameDE: "ENISA ICS/SCADA Abhaengigkeiten", NameEN: "ENISA ICS/SCADA Communication Dependencies", Short: "ENISA ICS/SCADA", Category: "guidance"}, CERegulationInfo{ID: "cisa_secure_by_design", NameDE: "CISA Secure by Design", NameEN: "CISA Secure by Design", Short: "CISA SbD", Category: "guidance"}, CERegulationInfo{ID: "enisa_cybersecurity_state_2024", NameDE: "ENISA State of Cybersecurity 2024", NameEN: "ENISA State of Cybersecurity in the Union 2024", Short: "ENISA 2024", Category: "guidance"}, } } // FormatLegalContextForPrompt formats the legal context for inclusion in an LLM prompt. func (c *LegalRAGClient) FormatLegalContextForPrompt(lc *LegalContext) string { if lc == nil || len(lc.Results) == 0 { return "" } var buf bytes.Buffer buf.WriteString("\n\n**Relevante Rechtsgrundlagen:**\n\n") for i, result := range lc.Results { buf.WriteString(fmt.Sprintf("%d. **%s** (%s)", i+1, result.RegulationShort, result.RegulationCode)) if len(result.Pages) > 0 { buf.WriteString(fmt.Sprintf(" - Seiten %v", result.Pages)) } buf.WriteString("\n") buf.WriteString(fmt.Sprintf(" > %s\n\n", truncateText(result.Text, 300))) } return buf.String() } // ScrollChunkResult represents a single chunk from the scroll/list endpoint. type ScrollChunkResult struct { ID string `json:"id"` Text string `json:"text"` RegulationCode string `json:"regulation_code"` RegulationName string `json:"regulation_name"` RegulationShort string `json:"regulation_short"` Category string `json:"category"` Article string `json:"article,omitempty"` Paragraph string `json:"paragraph,omitempty"` SourceURL string `json:"source_url,omitempty"` } // qdrantScrollRequest for the Qdrant scroll API. type qdrantScrollRequest struct { Limit int `json:"limit"` Offset interface{} `json:"offset,omitempty"` // string (UUID) or null WithPayload bool `json:"with_payload"` WithVectors bool `json:"with_vectors"` } // qdrantScrollResponse from the Qdrant scroll API. type qdrantScrollResponse struct { Result struct { Points []qdrantScrollPoint `json:"points"` NextPageOffset interface{} `json:"next_page_offset"` } `json:"result"` } type qdrantScrollPoint struct { ID interface{} `json:"id"` Payload map[string]interface{} `json:"payload"` } // ScrollChunks iterates over all chunks in a Qdrant collection using the scroll API. // Pass an empty offset to start from the beginning. Returns chunks, next offset ID, and error. func (c *LegalRAGClient) ScrollChunks(ctx context.Context, collection string, offset string, limit int) ([]ScrollChunkResult, string, error) { scrollReq := qdrantScrollRequest{ Limit: limit, WithPayload: true, WithVectors: false, } if offset != "" { // Qdrant expects integer point IDs — parse the offset string back to a number // Try parsing as integer first, fall back to string (for UUID-based collections) var offsetInt uint64 if _, err := fmt.Sscanf(offset, "%d", &offsetInt); err == nil { scrollReq.Offset = offsetInt } else { scrollReq.Offset = offset } } jsonBody, err := json.Marshal(scrollReq) if err != nil { return nil, "", fmt.Errorf("failed to marshal scroll request: %w", err) } url := fmt.Sprintf("%s/collections/%s/points/scroll", c.qdrantURL, collection) req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(jsonBody)) if err != nil { return nil, "", fmt.Errorf("failed to create scroll request: %w", err) } req.Header.Set("Content-Type", "application/json") if c.qdrantAPIKey != "" { req.Header.Set("api-key", c.qdrantAPIKey) } resp, err := c.httpClient.Do(req) if err != nil { return nil, "", fmt.Errorf("scroll request failed: %w", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(resp.Body) return nil, "", fmt.Errorf("qdrant returned %d: %s", resp.StatusCode, string(body)) } var scrollResp qdrantScrollResponse if err := json.NewDecoder(resp.Body).Decode(&scrollResp); err != nil { return nil, "", fmt.Errorf("failed to decode scroll response: %w", err) } // Convert points to results chunks := make([]ScrollChunkResult, len(scrollResp.Result.Points)) for i, pt := range scrollResp.Result.Points { // Extract point ID as string pointID := "" if pt.ID != nil { pointID = fmt.Sprintf("%v", pt.ID) } chunks[i] = ScrollChunkResult{ ID: pointID, Text: getString(pt.Payload, "text"), RegulationCode: getString(pt.Payload, "regulation_code"), RegulationName: getString(pt.Payload, "regulation_name"), RegulationShort: getString(pt.Payload, "regulation_short"), Category: getString(pt.Payload, "category"), Article: getString(pt.Payload, "article"), Paragraph: getString(pt.Payload, "paragraph"), SourceURL: getString(pt.Payload, "source_url"), } // Fallback: try alternate payload field names used in ingestion if chunks[i].Text == "" { chunks[i].Text = getString(pt.Payload, "chunk_text") } if chunks[i].RegulationCode == "" { chunks[i].RegulationCode = getString(pt.Payload, "regulation_id") } if chunks[i].RegulationName == "" { chunks[i].RegulationName = getString(pt.Payload, "regulation_name_de") } if chunks[i].SourceURL == "" { chunks[i].SourceURL = getString(pt.Payload, "source") } } // Extract next offset — Qdrant returns integer point IDs nextOffset := "" if scrollResp.Result.NextPageOffset != nil { switch v := scrollResp.Result.NextPageOffset.(type) { case float64: nextOffset = fmt.Sprintf("%.0f", v) case string: nextOffset = v default: nextOffset = fmt.Sprintf("%v", v) } } return chunks, nextOffset, nil } // Helper functions func getString(m map[string]interface{}, key string) string { if v, ok := m[key]; ok { if s, ok := v.(string); ok { return s } } return "" } func getIntSlice(m map[string]interface{}, key string) []int { v, ok := m[key] if !ok { return nil } arr, ok := v.([]interface{}) if !ok { return nil } result := make([]int, 0, len(arr)) for _, item := range arr { if f, ok := item.(float64); ok { result = append(result, int(f)) } } return result } func contains(slice []string, item string) bool { for _, s := range slice { if s == item { return true } } return false } func truncateText(text string, maxLen int) string { if len(text) <= maxLen { return text } return text[:maxLen] + "..." }