All checks were successful
CI/CD / go-lint (push) Has been skipped
CI/CD / python-lint (push) Has been skipped
CI/CD / nodejs-lint (push) Has been skipped
CI/CD / test-go-ai-compliance (push) Successful in 32s
CI/CD / test-python-backend-compliance (push) Successful in 34s
CI/CD / test-python-document-crawler (push) Successful in 23s
CI/CD / test-python-dsms-gateway (push) Successful in 21s
CI/CD / validate-canonical-controls (push) Successful in 11s
CI/CD / Deploy (push) Successful in 2s
Module 2: Extended Compliance Dashboard with roadmap, module-status, next-actions, snapshots, score-history Module 3: 7 German security document templates (IT-Sicherheitskonzept, Datenschutz, Backup, Logging, Incident-Response, Zugriff, Risikomanagement) Module 4: Compliance Process Manager with CRUD, complete/skip/seed, ~50 seed tasks, 3-tab UI Module 5: Evidence Collector Extended with automated checks, control-mapping, coverage report, 4-tab UI Also includes: canonical control library enhancements (verification method, categories, dedup), control generator improvements, RAG client extensions 52 tests pass, frontend builds clean. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
180 lines
5.6 KiB
Python
180 lines
5.6 KiB
Python
"""
|
|
Compliance RAG Client — Proxy to Go SDK RAG Search.
|
|
|
|
Lightweight HTTP client that queries the Go AI Compliance SDK's
|
|
POST /sdk/v1/rag/search endpoint. This avoids needing embedding
|
|
models or direct Qdrant access in Python.
|
|
|
|
Error-tolerant: RAG failures never break the calling function.
|
|
"""
|
|
|
|
import logging
|
|
import os
|
|
from dataclasses import dataclass
|
|
from typing import List, Optional
|
|
|
|
import httpx
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
SDK_URL = os.getenv("SDK_URL", "http://ai-compliance-sdk:8090")
|
|
RAG_SEARCH_TIMEOUT = 15.0 # seconds
|
|
|
|
|
|
@dataclass
|
|
class RAGSearchResult:
|
|
"""A single search result from the compliance corpus."""
|
|
text: str
|
|
regulation_code: str
|
|
regulation_name: str
|
|
regulation_short: str
|
|
category: str
|
|
article: str
|
|
paragraph: str
|
|
source_url: str
|
|
score: float
|
|
collection: str = ""
|
|
|
|
|
|
class ComplianceRAGClient:
|
|
"""
|
|
RAG client that proxies search requests to the Go SDK.
|
|
|
|
Usage:
|
|
client = get_rag_client()
|
|
results = await client.search("DSGVO Art. 35", collection="bp_compliance_recht")
|
|
context_str = client.format_for_prompt(results)
|
|
"""
|
|
|
|
def __init__(self, base_url: str = SDK_URL):
|
|
self._search_url = f"{base_url}/sdk/v1/rag/search"
|
|
|
|
async def search(
|
|
self,
|
|
query: str,
|
|
collection: str = "bp_compliance_ce",
|
|
regulations: Optional[List[str]] = None,
|
|
top_k: int = 5,
|
|
) -> List[RAGSearchResult]:
|
|
"""
|
|
Search the RAG corpus via Go SDK.
|
|
|
|
Returns an empty list on any error (never raises).
|
|
"""
|
|
payload = {
|
|
"query": query,
|
|
"collection": collection,
|
|
"top_k": top_k,
|
|
}
|
|
if regulations:
|
|
payload["regulations"] = regulations
|
|
|
|
try:
|
|
async with httpx.AsyncClient(timeout=RAG_SEARCH_TIMEOUT) as client:
|
|
resp = await client.post(self._search_url, json=payload)
|
|
|
|
if resp.status_code != 200:
|
|
logger.warning(
|
|
"RAG search returned %d: %s", resp.status_code, resp.text[:200]
|
|
)
|
|
return []
|
|
|
|
data = resp.json()
|
|
results = []
|
|
for r in data.get("results", []):
|
|
results.append(RAGSearchResult(
|
|
text=r.get("text", ""),
|
|
regulation_code=r.get("regulation_code", ""),
|
|
regulation_name=r.get("regulation_name", ""),
|
|
regulation_short=r.get("regulation_short", ""),
|
|
category=r.get("category", ""),
|
|
article=r.get("article", ""),
|
|
paragraph=r.get("paragraph", ""),
|
|
source_url=r.get("source_url", ""),
|
|
score=r.get("score", 0.0),
|
|
collection=collection,
|
|
))
|
|
return results
|
|
|
|
except Exception as e:
|
|
logger.warning("RAG search failed: %s", e)
|
|
return []
|
|
|
|
async def scroll(
|
|
self,
|
|
collection: str,
|
|
offset: Optional[str] = None,
|
|
limit: int = 100,
|
|
) -> tuple[List[RAGSearchResult], Optional[str]]:
|
|
"""
|
|
Scroll through ALL chunks in a collection (paginated).
|
|
|
|
Returns (chunks, next_offset). next_offset is None when done.
|
|
"""
|
|
scroll_url = self._search_url.replace("/search", "/scroll")
|
|
params = {"collection": collection, "limit": str(limit)}
|
|
if offset:
|
|
params["offset"] = offset
|
|
|
|
try:
|
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
resp = await client.get(scroll_url, params=params)
|
|
|
|
if resp.status_code != 200:
|
|
logger.warning(
|
|
"RAG scroll returned %d: %s", resp.status_code, resp.text[:200]
|
|
)
|
|
return [], None
|
|
|
|
data = resp.json()
|
|
results = []
|
|
for r in data.get("chunks", []):
|
|
results.append(RAGSearchResult(
|
|
text=r.get("text", ""),
|
|
regulation_code=r.get("regulation_code", ""),
|
|
regulation_name=r.get("regulation_name", ""),
|
|
regulation_short=r.get("regulation_short", ""),
|
|
category=r.get("category", ""),
|
|
article=r.get("article", ""),
|
|
paragraph=r.get("paragraph", ""),
|
|
source_url=r.get("source_url", ""),
|
|
score=0.0,
|
|
collection=collection,
|
|
))
|
|
next_offset = data.get("next_offset") or None
|
|
return results, next_offset
|
|
|
|
except Exception as e:
|
|
logger.warning("RAG scroll failed: %s", e)
|
|
return [], None
|
|
|
|
def format_for_prompt(
|
|
self, results: List[RAGSearchResult], max_results: int = 5
|
|
) -> str:
|
|
"""Format search results as Markdown for inclusion in an LLM prompt."""
|
|
if not results:
|
|
return ""
|
|
|
|
lines = ["## Relevanter Rechtskontext\n"]
|
|
for i, r in enumerate(results[:max_results]):
|
|
header = f"{i + 1}. **{r.regulation_short}** ({r.regulation_code})"
|
|
if r.article:
|
|
header += f" — {r.article}"
|
|
lines.append(header)
|
|
text = r.text[:400] + "..." if len(r.text) > 400 else r.text
|
|
lines.append(f" > {text}\n")
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
# Singleton
|
|
_rag_client: Optional[ComplianceRAGClient] = None
|
|
|
|
|
|
def get_rag_client() -> ComplianceRAGClient:
|
|
"""Get the shared RAG client instance."""
|
|
global _rag_client
|
|
if _rag_client is None:
|
|
_rag_client = ComplianceRAGClient()
|
|
return _rag_client
|