Files
breakpilot-compliance/ai-compliance-sdk/internal/ucca/legal_rag_scroll.go
T
Benjamin Admin 2e29b611c9 feat(iace): Phase 1 — Haftungs-Fixes, Massnahmen-Verkabelung, Explainability Engine
Phase 1A — Haftungs-kritische Fixes:
- SIL/PL-Badges als "Vorab-Einschaetzung" mit Tooltip gekennzeichnet
- Coverage-Disclaimer in CE-Akte, Projekt-Uebersicht und Print-Export
- Norm-Referenzen: 42 Kapitelverweise durch Themen-Deskriptoren ersetzt

Phase 1B — Massnahmen-Verkabelung:
- 16 neue Massnahmen (M201-M216) fuer bisher unabgedeckte Kategorien
  (communication_failure, hmi_error, firmware_corruption, maintenance,
  sensor_fault, mode_confusion)
- Kategorie-Fallback im Initialize-Endpoint: ordnet Massnahmen aus der
  Bibliothek automatisch per HazardCategory zu (max 8 pro Kategorie)
- Total: 225 → 241 Massnahmen, 0 Kategorien ohne Massnahmen

Phase 1C — Explainability Engine:
- MatchReason Struct in PatternMatch (type, tag, met)
- Pattern Engine schreibt fuer jeden Match strukturierte Begruendungen
- Frontend zeigt "Erkannt weil: Komponente X, Energie Y, Kein Ausschluss Z"

Weitere Aenderungen:
- BAuA/OSHA Regulatory Hints: 3 Enrich-Endpoints (per Hazard, per Measure, Batch)
- Dokumente-Tab in IACE-Bibliothek (36.708 Chunks aus Qdrant)
- Varianten-UX: Basis-Projekt-Summary auf Varianten-Seite
- Projekt-Initialisierung: POST /initialize kettet Parse→Komponenten→Patterns→Hazards→Massnahmen→Normen
- 18 pre-existing TS-Fehler gefixt, Route-Konflikt behoben
- Component-Library + Measures-Library Tests aktualisiert

Tests: Go alle bestanden, TS 0 Fehler, Playwright 141+ bestanden

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-09 21:32:23 +02:00

236 lines
6.4 KiB
Go

package ucca
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
)
// ScrollChunks iterates over all chunks in a Qdrant collection using the scroll API.
// Pass an empty offset to start from the beginning. Returns chunks, next offset ID, and error.
func (c *LegalRAGClient) ScrollChunks(ctx context.Context, collection string, offset string, limit int) ([]ScrollChunkResult, string, error) {
scrollReq := qdrantScrollRequest{
Limit: limit,
WithPayload: true,
WithVectors: false,
}
if offset != "" {
var offsetInt uint64
if _, err := fmt.Sscanf(offset, "%d", &offsetInt); err == nil {
scrollReq.Offset = offsetInt
} else {
scrollReq.Offset = offset
}
}
jsonBody, err := json.Marshal(scrollReq)
if err != nil {
return nil, "", fmt.Errorf("failed to marshal scroll request: %w", err)
}
url := fmt.Sprintf("%s/collections/%s/points/scroll", c.qdrantURL, collection)
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(jsonBody))
if err != nil {
return nil, "", fmt.Errorf("failed to create scroll request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
if c.qdrantAPIKey != "" {
req.Header.Set("api-key", c.qdrantAPIKey)
}
resp, err := c.httpClient.Do(req)
if err != nil {
return nil, "", fmt.Errorf("scroll request failed: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return nil, "", fmt.Errorf("qdrant returned %d: %s", resp.StatusCode, string(body))
}
var scrollResp qdrantScrollResponse
if err := json.NewDecoder(resp.Body).Decode(&scrollResp); err != nil {
return nil, "", fmt.Errorf("failed to decode scroll response: %w", err)
}
chunks := make([]ScrollChunkResult, len(scrollResp.Result.Points))
for i, pt := range scrollResp.Result.Points {
pointID := ""
if pt.ID != nil {
pointID = fmt.Sprintf("%v", pt.ID)
}
chunks[i] = ScrollChunkResult{
ID: pointID,
Text: getString(pt.Payload, "text"),
RegulationCode: getString(pt.Payload, "regulation_code"),
RegulationName: getString(pt.Payload, "regulation_name"),
RegulationShort: getString(pt.Payload, "regulation_short"),
Category: getString(pt.Payload, "category"),
Article: getString(pt.Payload, "article"),
Paragraph: getString(pt.Payload, "paragraph"),
SourceURL: getString(pt.Payload, "source_url"),
}
if chunks[i].Text == "" {
chunks[i].Text = getString(pt.Payload, "chunk_text")
}
if chunks[i].RegulationCode == "" {
chunks[i].RegulationCode = getString(pt.Payload, "regulation_id")
}
if chunks[i].RegulationName == "" {
chunks[i].RegulationName = getString(pt.Payload, "regulation_name_de")
}
if chunks[i].SourceURL == "" {
chunks[i].SourceURL = getString(pt.Payload, "source")
}
}
nextOffset := ""
if scrollResp.Result.NextPageOffset != nil {
switch v := scrollResp.Result.NextPageOffset.(type) {
case float64:
nextOffset = fmt.Sprintf("%.0f", v)
case string:
nextOffset = v
default:
nextOffset = fmt.Sprintf("%v", v)
}
}
return chunks, nextOffset, nil
}
// ScrollDocumentIndex scrolls through all chunks in a collection using minimal
// payload (no text/vectors) and returns a deduplicated list of documents.
func (c *LegalRAGClient) ScrollDocumentIndex(ctx context.Context, collection string) ([]CEDocumentInfo, error) {
includeFields := []string{"regulation_id", "regulation_name_de", "regulation_name_en", "category", "source", "source_org"}
// regulation_id → aggregated info
docMap := make(map[string]*CEDocumentInfo)
var offset interface{}
batchLimit := 500
for {
reqBody := map[string]interface{}{
"limit": batchLimit,
"with_payload": map[string]interface{}{"include": includeFields},
"with_vectors": false,
}
if offset != nil {
reqBody["offset"] = offset
}
jsonBody, err := json.Marshal(reqBody)
if err != nil {
return nil, fmt.Errorf("failed to marshal scroll request: %w", err)
}
url := fmt.Sprintf("%s/collections/%s/points/scroll", c.qdrantURL, collection)
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(jsonBody))
if err != nil {
return nil, fmt.Errorf("failed to create scroll request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
if c.qdrantAPIKey != "" {
req.Header.Set("api-key", c.qdrantAPIKey)
}
resp, err := c.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("scroll request failed: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("qdrant returned %d: %s", resp.StatusCode, string(body))
}
var scrollResp qdrantScrollResponse
if err := json.NewDecoder(resp.Body).Decode(&scrollResp); err != nil {
return nil, fmt.Errorf("failed to decode scroll response: %w", err)
}
for _, pt := range scrollResp.Result.Points {
regID := getString(pt.Payload, "regulation_id")
if regID == "" {
continue
}
if existing, ok := docMap[regID]; ok {
existing.ChunkCount++
continue
}
docMap[regID] = &CEDocumentInfo{
RegulationID: regID,
NameDE: getString(pt.Payload, "regulation_name_de"),
NameEN: getString(pt.Payload, "regulation_name_en"),
Category: getString(pt.Payload, "category"),
SourceURL: getString(pt.Payload, "source"),
SourceOrg: getString(pt.Payload, "source_org"),
ChunkCount: 1,
}
}
if scrollResp.Result.NextPageOffset == nil {
break
}
offset = scrollResp.Result.NextPageOffset
}
docs := make([]CEDocumentInfo, 0, len(docMap))
for _, d := range docMap {
docs = append(docs, *d)
}
return docs, nil
}
// Helper functions
func getString(m map[string]interface{}, key string) string {
if v, ok := m[key]; ok {
if s, ok := v.(string); ok {
return s
}
}
return ""
}
func getIntSlice(m map[string]interface{}, key string) []int {
v, ok := m[key]
if !ok {
return nil
}
arr, ok := v.([]interface{})
if !ok {
return nil
}
result := make([]int, 0, len(arr))
for _, item := range arr {
if f, ok := item.(float64); ok {
result = append(result, int(f))
}
}
return result
}
func contains(slice []string, item string) bool {
for _, s := range slice {
if s == item {
return true
}
}
return false
}
func truncateText(text string, maxLen int) string {
if len(text) <= maxLen {
return text
}
return text[:maxLen] + "..."
}