feat: Phase 2 — RAG integration in Requirements + DSFA Draft

Add legal context enrichment from Qdrant vector corpus to the two highest-priority modules (Requirements AI assistant and DSFA drafting engine). Go SDK: - Add SearchCollection() with collection override + whitelist validation - Refactor Search() to delegate to shared searchInternal() Python backend: - New ComplianceRAGClient proxying POST /sdk/v1/rag/search (error-tolerant) - AI assistant: enrich interpret_requirement() and suggest_controls() with RAG - Requirements API: add ?include_legal_context=true query parameter Admin (Next.js): - Extract shared queryRAG() utility from chat route - Inject RAG legal context into v1 and v2 draft pipelines Tests for all three layers (Go, Python, TypeScript shared utility). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-02 08:57:39 +01:00
parent 3d9bc285ac
commit 14a99322eb
12 changed files with 739 additions and 36 deletions
--- a/backend-compliance/compliance/services/ai_compliance_assistant.py
+++ b/backend-compliance/compliance/services/ai_compliance_assistant.py
@@ -16,6 +16,7 @@ from typing import List, Optional, Dict, Any
 from enum import Enum

 from .llm_provider import LLMProvider, get_shared_provider, LLMResponse
+from .rag_client import get_rag_client

 logger = logging.getLogger(__name__)

@@ -199,9 +200,23 @@ Bewerte die Abdeckung und identifiziere Lücken im JSON-Format:

 Gib NUR das JSON zurück."""

+    # EU regulation codes → bp_compliance_ce, DE codes → bp_compliance_recht
+    _EU_CODES = {"DSGVO", "GDPR", "AIACT", "AI_ACT", "NIS2", "CRA"}
+    _DE_CODES = {"BDSG", "TDDDG", "DDG", "URHG", "TMG", "TKG"}
+
    def __init__(self, llm_provider: Optional[LLMProvider] = None):
        """Initialize the assistant with an LLM provider."""
        self.llm = llm_provider or get_shared_provider()
+        self.rag = get_rag_client()
+
+    def _collection_for_regulation(self, regulation_code: str) -> str:
+        """Determine the RAG collection based on regulation code."""
+        code_upper = regulation_code.upper()
+        if any(c in code_upper for c in self._EU_CODES):
+            return "bp_compliance_ce"
+        elif any(c in code_upper for c in self._DE_CODES):
+            return "bp_compliance_recht"
+        return "bp_compliance_ce"

    async def interpret_requirement(
        self,
@@ -226,6 +241,17 @@ Gib NUR das JSON zurück."""
            requirement_text=requirement_text or "Kein Text verfügbar"
        )

+        # Enrich prompt with RAG legal context
+        try:
+            rag_query = f"{regulation_name} {article} {title}"
+            collection = self._collection_for_regulation(regulation_code)
+            rag_results = await self.rag.search(rag_query, collection=collection, top_k=3)
+            rag_context = self.rag.format_for_prompt(rag_results)
+            if rag_context:
+                prompt += f"\n\n{rag_context}"
+        except Exception as e:
+            logger.warning("RAG enrichment failed for interpret_requirement: %s", e)
+
        try:
            response = await self.llm.complete(
                prompt=prompt,
@@ -282,6 +308,16 @@ Gib NUR das JSON zurück."""
            affected_modules=", ".join(affected_modules) if affected_modules else "Alle Module"
        )

+        # Enrich prompt with RAG legal context
+        try:
+            rag_query = f"{regulation_name} {requirement_title} Massnahmen Controls"
+            rag_results = await self.rag.search(rag_query, top_k=3)
+            rag_context = self.rag.format_for_prompt(rag_results)
+            if rag_context:
+                prompt += f"\n\n{rag_context}"
+        except Exception as e:
+            logger.warning("RAG enrichment failed for suggest_controls: %s", e)
+
        try:
            response = await self.llm.complete(
                prompt=prompt,