Files
breakpilot-compliance/ai-compliance-sdk/internal/api/handlers/rag_handlers.go
T
Benjamin Admin a606000a20 feat(ai-sdk): EvidenceType-Schicht — autoritative Fußnoten/Tabellen/Figuren surfacen
Router-Schicht Intent→KnowledgeSpace→EvidenceType→Collection→Merge→Authority (User-
Entscheidung A generalisiert). Neuer EvidenceType{TEXT,FOOTNOTE,TABLE,FIGURE} +
classifyEvidence (aus is_footnote/is_table/is_figure-Payload). RetrieveEvidence() zieht
die autoritative typisierte Evidence GEZIELT aus der KB-Slice (top-20, in-scope) statt
sie im Breit-Basis-Text-Merge zu verlieren; /retrieve liefert footnotes[]/tables[]/
figures[]. Kein perColl-Blindanstieg. Dieselbe Infra trägt C8 (FIGURE) ohne Router-Umbau.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-07-01 09:30:42 +02:00

337 lines
10 KiB
Go

package handlers
import (
"net/http"
"strconv"
"github.com/breakpilot/ai-compliance-sdk/internal/ucca"
"github.com/gin-gonic/gin"
)
// RAGHandlers handles RAG search API endpoints.
type RAGHandlers struct {
ragClient *ucca.LegalRAGClient
corpusVersionStore *ucca.CorpusVersionStore
}
// NewRAGHandlers creates new RAG handlers.
func NewRAGHandlers(corpusVersionStore *ucca.CorpusVersionStore) *RAGHandlers {
return &RAGHandlers{
ragClient: ucca.NewLegalRAGClient(),
corpusVersionStore: corpusVersionStore,
}
}
// AllowedCollections is the whitelist of Qdrant collections that can be queried.
var AllowedCollections = map[string]bool{
"bp_compliance_ce": true,
"bp_compliance_gesetze": true,
"bp_compliance_datenschutz": true,
"bp_compliance_recht": true,
"bp_compliance_gdpr": true,
"bp_dsfa_corpus": true,
"bp_dsfa_templates": true,
"bp_dsfa_risks": true,
"bp_legal_templates": true,
"bp_iace_libraries": true,
"bp_iace_accident_stats": true,
"bp_iace_safety_kb": true,
"bp_iace_fmea_kb": true,
}
// SearchRequest represents a RAG search request.
type SearchRequest struct {
Query string `json:"query" binding:"required"`
Collection string `json:"collection,omitempty"`
Regulations []string `json:"regulations,omitempty"`
TopK int `json:"top_k,omitempty"`
}
// Search performs a semantic search across the compliance regulation corpus.
// POST /sdk/v1/rag/search
func (h *RAGHandlers) Search(c *gin.Context) {
var req SearchRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
if req.TopK <= 0 || req.TopK > 20 {
req.TopK = 5
}
// Validate collection if specified
if req.Collection != "" {
if !AllowedCollections[req.Collection] {
c.JSON(http.StatusBadRequest, gin.H{"error": "Unknown collection: " + req.Collection + ". Allowed: bp_compliance_ce, bp_compliance_recht, bp_compliance_gesetze, bp_compliance_datenschutz, bp_dsfa_corpus, bp_legal_templates"})
return
}
}
results, err := h.ragClient.SearchCollection(c.Request.Context(), req.Collection, req.Query, req.Regulations, req.TopK)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "RAG search failed: " + err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"query": req.Query,
"results": results,
"count": len(results),
"assessment": ucca.Assess(results),
})
}
// RetrieveRequest is the Authority Router request: a query only, no collection — the router decides
// which collections to query (broad authority base + the in-scope KB-2026.1 slice).
type RetrieveRequest struct {
Query string `json:"query" binding:"required"`
TopK int `json:"top_k,omitempty"`
}
// Retrieve is the Authority Router endpoint. The Advisor calls this with ONLY a query and stays
// collection-agnostic; the router fans out over the authority base + the in-scope slice, merges by
// authority score, and returns the unified top-K. Response shape matches Search (query/results/
// count/assessment) so existing consumers parse it unchanged.
// POST /sdk/v1/rag/retrieve
func (h *RAGHandlers) Retrieve(c *gin.Context) {
var req RetrieveRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
if req.TopK <= 0 || req.TopK > 20 {
req.TopK = 8
}
results, err := h.ragClient.Retrieve(c.Request.Context(), req.Query, req.TopK)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "RAG retrieve failed: " + err.Error()})
return
}
// Evidence-Type-Schicht: die autoritative typisierte Evidence (Fußnoten/Tabellen/Figuren) aus
// dem KB-Wissensraum SEPARAT surfacen, statt sie im Breit-Basis-Text-Merge zu verlieren.
// results[] bleibt der Text-Kontext fürs LLM + die Quellen-Liste.
ev := h.ragClient.RetrieveEvidence(c.Request.Context(), req.Query)
c.JSON(http.StatusOK, gin.H{
"query": req.Query,
"results": results,
"count": len(results),
"assessment": ucca.Assess(results),
"footnotes": footnotesFromEvidence(ev[ucca.EvidenceFootnote]),
"tables": tablesFromEvidence(ev[ucca.EvidenceTable]),
"figures": figuresFromEvidence(ev[ucca.EvidenceFigure]),
})
}
// footnotesFromEvidence maps FOOTNOTE evidence to the Evidence-Workspace RawFootnote shape.
func footnotesFromEvidence(rs []ucca.LegalSearchResult) []gin.H {
out := make([]gin.H, 0, len(rs))
for _, r := range rs {
out = append(out, gin.H{
"id": r.CitationUnit,
"ref": r.CitationUnit,
"number": r.FootnoteLabel,
"regulation_code": r.RegulationCode,
"regulation_short": r.RegulationShort,
"regulation_name": r.RegulationName,
"section": r.RefCitationUnit,
"text": r.FootnoteVerbatim,
})
}
return out
}
// tablesFromEvidence maps TABLE evidence (C6/C9). Key is present so the same Evidence-Type path
// carries tables the moment the UI adds a table section.
func tablesFromEvidence(rs []ucca.LegalSearchResult) []gin.H {
out := make([]gin.H, 0, len(rs))
for _, r := range rs {
out = append(out, gin.H{
"id": r.CitationUnit,
"caption": r.ArticleLabel,
"regulation_code": r.RegulationCode,
"regulation_short": r.RegulationShort,
"regulation_name": r.RegulationName,
"section": r.RefCitationUnit,
"text": r.Text,
})
}
return out
}
// figuresFromEvidence maps FIGURE evidence (C8). Empty until C8 populates figure units; image_url/
// caption/vision_summary get added here when C8 lands — same path, no router change.
func figuresFromEvidence(rs []ucca.LegalSearchResult) []gin.H {
out := make([]gin.H, 0, len(rs))
for _, r := range rs {
out = append(out, gin.H{
"figure_id": r.CitationUnit,
"caption": r.ArticleLabel,
"regulation_code": r.RegulationCode,
"regulation_short": r.RegulationShort,
"regulation_name": r.RegulationName,
"section": r.RefCitationUnit,
})
}
return out
}
// ListRegulations returns the list of available regulations in the corpus.
// GET /sdk/v1/rag/regulations
func (h *RAGHandlers) ListRegulations(c *gin.Context) {
regs := h.ragClient.ListAvailableRegulations()
// Optionally filter by category
category := c.Query("category")
if category != "" {
filtered := make([]ucca.CERegulationInfo, 0)
for _, r := range regs {
if r.Category == category {
filtered = append(filtered, r)
}
}
regs = filtered
}
c.JSON(http.StatusOK, gin.H{
"regulations": regs,
"count": len(regs),
})
}
// CorpusStatus returns the current version status of all RAG collections.
// GET /sdk/v1/rag/corpus-status
func (h *RAGHandlers) CorpusStatus(c *gin.Context) {
if h.corpusVersionStore == nil {
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "corpus version store not configured"})
return
}
versions, err := h.corpusVersionStore.GetAllLatestVersions(c.Request.Context())
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to fetch corpus versions: " + err.Error()})
return
}
collections := make(map[string]gin.H)
for _, v := range versions {
collections[v.CollectionName] = gin.H{
"id": v.ID,
"current_version": v.Version,
"documents_count": v.DocumentsCount,
"chunks_count": v.ChunksCount,
"regulations": v.Regulations,
"last_updated": v.CreatedAt,
"digest": v.Digest,
}
}
c.JSON(http.StatusOK, gin.H{
"collections": collections,
})
}
// CorpusVersionHistory returns the version history for a specific collection.
// GET /sdk/v1/rag/corpus-versions/:collection
func (h *RAGHandlers) CorpusVersionHistory(c *gin.Context) {
if h.corpusVersionStore == nil {
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "corpus version store not configured"})
return
}
collection := c.Param("collection")
if collection == "" {
c.JSON(http.StatusBadRequest, gin.H{"error": "collection name required"})
return
}
versions, err := h.corpusVersionStore.ListCorpusVersions(c.Request.Context(), collection)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to fetch corpus versions: " + err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"collection": collection,
"versions": versions,
"count": len(versions),
})
}
// HandleScrollChunks scrolls/lists all chunks in a Qdrant collection with pagination.
// GET /sdk/v1/rag/scroll?collection=...&offset=...&limit=...
func (h *RAGHandlers) HandleScrollChunks(c *gin.Context) {
collection := c.Query("collection")
if collection == "" {
c.JSON(http.StatusBadRequest, gin.H{"error": "query parameter 'collection' is required"})
return
}
if !AllowedCollections[collection] {
c.JSON(http.StatusBadRequest, gin.H{"error": "Unknown collection: " + collection})
return
}
// Parse limit (default 100, max 500)
limit := 100
if limitStr := c.Query("limit"); limitStr != "" {
parsed, err := strconv.Atoi(limitStr)
if err != nil || parsed < 1 {
c.JSON(http.StatusBadRequest, gin.H{"error": "limit must be a positive integer"})
return
}
limit = parsed
}
if limit > 500 {
limit = 500
}
// Offset is optional (empty string = start from beginning)
offset := c.Query("offset")
chunks, nextOffset, err := h.ragClient.ScrollChunks(c.Request.Context(), collection, offset, limit)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "scroll failed: " + err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"chunks": chunks,
"next_offset": nextOffset,
"total": len(chunks),
})
}
// LegalCorpusStructure returns the composition (distinct articles, annexes,
// recitals + chunk count) of every ingested eur-lex legal act, so the coverage
// page can show WHAT was ingested instead of just the act name.
// GET /sdk/v1/rag/legal-corpus
func (h *RAGHandlers) LegalCorpusStructure(c *gin.Context) {
acts, err := h.ragClient.CorpusStructure(c.Request.Context())
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to aggregate legal corpus: " + err.Error()})
return
}
arts, anns, recs := 0, 0, 0
for _, a := range acts {
arts += a.Articles
anns += a.Annexes
recs += a.Recitals
}
c.JSON(http.StatusOK, gin.H{
"regulations": acts,
"totals": gin.H{
"regulations": len(acts),
"articles": arts,
"annexes": anns,
"recitals": recs,
},
})
}