package ucca import ( "bytes" "context" "encoding/json" "fmt" "io" "net/http" ) // ScrollChunks iterates over all chunks in a Qdrant collection using the scroll API. // Pass an empty offset to start from the beginning. Returns chunks, next offset ID, and error. func (c *LegalRAGClient) ScrollChunks(ctx context.Context, collection string, offset string, limit int) ([]ScrollChunkResult, string, error) { scrollReq := qdrantScrollRequest{ Limit: limit, WithPayload: true, WithVectors: false, } if offset != "" { var offsetInt uint64 if _, err := fmt.Sscanf(offset, "%d", &offsetInt); err == nil { scrollReq.Offset = offsetInt } else { scrollReq.Offset = offset } } jsonBody, err := json.Marshal(scrollReq) if err != nil { return nil, "", fmt.Errorf("failed to marshal scroll request: %w", err) } url := fmt.Sprintf("%s/collections/%s/points/scroll", c.qdrantURL, collection) req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(jsonBody)) if err != nil { return nil, "", fmt.Errorf("failed to create scroll request: %w", err) } req.Header.Set("Content-Type", "application/json") if c.qdrantAPIKey != "" { req.Header.Set("api-key", c.qdrantAPIKey) } resp, err := c.httpClient.Do(req) if err != nil { return nil, "", fmt.Errorf("scroll request failed: %w", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(resp.Body) return nil, "", fmt.Errorf("qdrant returned %d: %s", resp.StatusCode, string(body)) } var scrollResp qdrantScrollResponse if err := json.NewDecoder(resp.Body).Decode(&scrollResp); err != nil { return nil, "", fmt.Errorf("failed to decode scroll response: %w", err) } chunks := make([]ScrollChunkResult, len(scrollResp.Result.Points)) for i, pt := range scrollResp.Result.Points { pointID := "" if pt.ID != nil { pointID = fmt.Sprintf("%v", pt.ID) } chunks[i] = ScrollChunkResult{ ID: pointID, Text: getString(pt.Payload, "text"), RegulationCode: getString(pt.Payload, "regulation_code"), RegulationName: getString(pt.Payload, "regulation_name"), RegulationShort: getString(pt.Payload, "regulation_short"), Category: getString(pt.Payload, "category"), Article: getString(pt.Payload, "article"), Paragraph: getString(pt.Payload, "paragraph"), SourceURL: getString(pt.Payload, "source_url"), } if chunks[i].Text == "" { chunks[i].Text = getString(pt.Payload, "chunk_text") } if chunks[i].RegulationCode == "" { chunks[i].RegulationCode = getString(pt.Payload, "regulation_id") } if chunks[i].RegulationName == "" { chunks[i].RegulationName = getString(pt.Payload, "regulation_name_de") } if chunks[i].SourceURL == "" { chunks[i].SourceURL = getString(pt.Payload, "source") } } nextOffset := "" if scrollResp.Result.NextPageOffset != nil { switch v := scrollResp.Result.NextPageOffset.(type) { case float64: nextOffset = fmt.Sprintf("%.0f", v) case string: nextOffset = v default: nextOffset = fmt.Sprintf("%v", v) } } return chunks, nextOffset, nil } // Helper functions func getString(m map[string]interface{}, key string) string { if v, ok := m[key]; ok { if s, ok := v.(string); ok { return s } } return "" } func getIntSlice(m map[string]interface{}, key string) []int { v, ok := m[key] if !ok { return nil } arr, ok := v.([]interface{}) if !ok { return nil } result := make([]int, 0, len(arr)) for _, item := range arr { if f, ok := item.(float64); ok { result = append(result, int(f)) } } return result } func contains(slice []string, item string) bool { for _, s := range slice { if s == item { return true } } return false } func truncateText(text string, maxLen int) string { if len(text) <= maxLen { return text } return text[:maxLen] + "..." }