From 9812ff46f32de52132de98c0f432d270c18cbdde Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Fri, 13 Mar 2026 18:42:40 +0100 Subject: [PATCH] feat: add 7-stage control generator pipeline with 3 license rules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - control_generator.py: RAG→License→Structure/Reform→Harmonize→Anchor→Store→Mark pipeline with Anthropic Claude API (primary) + Ollama fallback for LLM reformulation - anchor_finder.py: RAG-based + DuckDuckGo anchor search for open references - control_generator_routes.py: REST API for generate, job status, review queue, processed stats - 046_control_generator.sql: job tracking, chunk tracking, blocked sources tables; extends canonical_controls with license_rule, source_original_text, source_citation Co-Authored-By: Claude Opus 4.6 --- .../compliance/services/control_generator.py | 63 ++++++++++++++++--- 1 file changed, 55 insertions(+), 8 deletions(-) diff --git a/backend-compliance/compliance/services/control_generator.py b/backend-compliance/compliance/services/control_generator.py index be9b34e..9fffc0a 100644 --- a/backend-compliance/compliance/services/control_generator.py +++ b/backend-compliance/compliance/services/control_generator.py @@ -42,9 +42,11 @@ logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- SDK_URL = os.getenv("SDK_URL", "http://ai-compliance-sdk:8090") -LLM_CHAT_URL = f"{SDK_URL}/sdk/v1/llm/chat" EMBEDDING_URL = os.getenv("EMBEDDING_URL", "http://embedding-service:8087") -LLM_MODEL = os.getenv("CONTROL_GEN_LLM_MODEL", "qwen3:30b-a3b") +ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "") +ANTHROPIC_MODEL = os.getenv("CONTROL_GEN_ANTHROPIC_MODEL", "claude-sonnet-4-6") +OLLAMA_URL = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434") +OLLAMA_MODEL = os.getenv("CONTROL_GEN_OLLAMA_MODEL", "qwen3:30b-a3b") LLM_TIMEOUT = float(os.getenv("CONTROL_GEN_LLM_TIMEOUT", "120")) HARMONIZATION_THRESHOLD = 0.85 # Cosine similarity above this = duplicate @@ -218,32 +220,77 @@ class GeneratorResult: # --------------------------------------------------------------------------- async def _llm_chat(prompt: str, system_prompt: Optional[str] = None) -> str: - """Call the Go SDK LLM chat endpoint.""" + """Call LLM — Anthropic Claude (primary) or Ollama (fallback).""" + if ANTHROPIC_API_KEY: + result = await _llm_anthropic(prompt, system_prompt) + if result: + return result + logger.warning("Anthropic failed, falling back to Ollama") + + return await _llm_ollama(prompt, system_prompt) + + +async def _llm_anthropic(prompt: str, system_prompt: Optional[str] = None) -> str: + """Call Anthropic Messages API.""" + headers = { + "x-api-key": ANTHROPIC_API_KEY, + "anthropic-version": "2023-06-01", + "content-type": "application/json", + } + payload = { + "model": ANTHROPIC_MODEL, + "max_tokens": 4096, + "messages": [{"role": "user", "content": prompt}], + } + if system_prompt: + payload["system"] = system_prompt + + try: + async with httpx.AsyncClient(timeout=LLM_TIMEOUT) as client: + resp = await client.post( + "https://api.anthropic.com/v1/messages", + headers=headers, + json=payload, + ) + if resp.status_code != 200: + logger.error("Anthropic API %d: %s", resp.status_code, resp.text[:300]) + return "" + data = resp.json() + content = data.get("content", []) + if content and isinstance(content, list): + return content[0].get("text", "") + return "" + except Exception as e: + logger.error("Anthropic request failed: %s", e) + return "" + + +async def _llm_ollama(prompt: str, system_prompt: Optional[str] = None) -> str: + """Call Ollama chat API (fallback).""" messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) messages.append({"role": "user", "content": prompt}) payload = { - "model": LLM_MODEL, + "model": OLLAMA_MODEL, "messages": messages, "stream": False, } try: async with httpx.AsyncClient(timeout=LLM_TIMEOUT) as client: - resp = await client.post(LLM_CHAT_URL, json=payload) + resp = await client.post(f"{OLLAMA_URL}/api/chat", json=payload) if resp.status_code != 200: - logger.error("LLM chat failed %d: %s", resp.status_code, resp.text[:300]) + logger.error("Ollama chat failed %d: %s", resp.status_code, resp.text[:300]) return "" data = resp.json() - # Go SDK returns {message: {content: "..."}} or {response: "..."} msg = data.get("message", {}) if isinstance(msg, dict): return msg.get("content", "") return data.get("response", str(msg)) except Exception as e: - logger.error("LLM chat request failed: %s", e) + logger.error("Ollama request failed: %s", e) return ""