""" Compliance Agent — LLM-agnostic agent with tool calling. Supports three LLM providers: - ollama: Local Ollama instance (default) - openai: OpenAI-compatible API (OVH vLLM, etc.) - anthropic: Anthropic Messages API The agent loop: 1. Send messages + tool definitions to the LLM 2. If the LLM returns tool_calls → execute them → append results 3. Repeat until no more tool_calls or max_iterations reached 4. Return the collected results from submit_results() All tool execution is deterministic (keyword matching, DB queries). The LLM only decides WHICH tools to call and HOW to interpret results. """ import httpx import json import logging import os import re from compliance.services.agent_tools import TOOLS, execute_tool logger = logging.getLogger(__name__) # ═══════════════════════════════════════════════════════════════ # Provider configuration # ═══════════════════════════════════════════════════════════════ OLLAMA_URL = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434") OVH_URL = os.getenv("OVH_LLM_URL", "") ANTHROPIC_KEY = os.getenv("ANTHROPIC_API_KEY", "") AGENT_PROVIDER = os.getenv("COMPLIANCE_AGENT_PROVIDER", "ollama") AGENT_MODEL = os.getenv("COMPLIANCE_AGENT_MODEL", "qwen3.5:35b-a3b") SYSTEM_PROMPT = ( "Du bist ein Datenschutz-Compliance-Analyst. " "Pruefe das Dokument systematisch:\n" "1. Lade alle Master Controls fuer den Dokumenttyp (query_controls)\n" "2. Pruefe die Controls in Batches von 20 (evaluate_controls_batch)\n" "3. Bei unklaren Ergebnissen: suche im Dokument nach " "Schluesselwoertern (search_document)\n" "4. Reiche die finalen Ergebnisse ein (submit_results) " "mit konkreten Empfehlungen\n" "Arbeite gruendlich und pruefe ALLE Controls. " "Antworte immer auf Deutsch." ) # ═══════════════════════════════════════════════════════════════ # Agent class # ═══════════════════════════════════════════════════════════════ class ComplianceAgent: """LLM-agnostic compliance agent with tool calling loop.""" def __init__( self, provider_type: str, model: str, doc_text: str, doc_type: str, doc_title: str, db_url: str = "", ): self.provider = provider_type or AGENT_PROVIDER self.model = model or AGENT_MODEL self.doc_text = doc_text self.doc_type = doc_type self.doc_title = doc_title self.context = { "doc_text": doc_text, "doc_type": doc_type, "db_url": db_url, "results": [], } async def run(self, max_iterations: int = 15) -> list[dict]: """Run the agent loop until completion or max iterations.""" messages = [ {"role": "system", "content": SYSTEM_PROMPT}, { "role": "user", "content": ( f"Pruefe das folgende Dokument vom Typ '{self.doc_type}' " f"(Titel: '{self.doc_title}').\n\n" f"Dokumenttext ({len(self.doc_text)} Zeichen):\n" f"{self.doc_text[:6000]}" ), }, ] tools = TOOLS iteration = 0 while iteration < max_iterations: iteration += 1 logger.info( "Agent iteration %d/%d (provider=%s, model=%s)", iteration, max_iterations, self.provider, self.model, ) response = await self._chat_with_tools(messages, tools) content = response.get("content", "") tool_calls = response.get("tool_calls", []) if content: messages.append({"role": "assistant", "content": content}) if not tool_calls: logger.info("Agent finished after %d iterations", iteration) break # Execute each tool call and append results for tc in tool_calls: tool_name = tc.get("name", "") tool_args = tc.get("arguments", {}) if isinstance(tool_args, str): try: tool_args = json.loads(tool_args) except json.JSONDecodeError: tool_args = {} logger.info("Tool call: %s(%s)", tool_name, list(tool_args.keys())) result = await self._execute_tool(tc) result_str = json.dumps(result, ensure_ascii=False, default=str) # Append tool call + result as messages messages.append({ "role": "assistant", "content": None, "tool_calls": [ { "id": tc.get("id", f"call_{iteration}_{tool_name}"), "type": "function", "function": {"name": tool_name, "arguments": json.dumps(tool_args)}, } ], }) messages.append({ "role": "tool", "tool_call_id": tc.get("id", f"call_{iteration}_{tool_name}"), "content": result_str[:8000], }) return self.context.get("results", []) async def _chat_with_tools(self, messages: list, tools: list) -> dict: """Send messages+tools to the LLM. Returns {content, tool_calls}.""" if self.provider == "ollama": return await self._chat_ollama(messages, tools) elif self.provider == "openai": return await self._chat_openai(messages, tools) elif self.provider == "anthropic": return await self._chat_anthropic(messages, tools) else: raise ValueError(f"Unbekannter Provider: {self.provider}") # ─────────────────────────────────────────────────────────── # Ollama # ─────────────────────────────────────────────────────────── async def _chat_ollama(self, messages: list, tools: list) -> dict: """Ollama /api/chat with tools support.""" payload = { "model": self.model, "messages": messages, "tools": tools, "stream": False, "options": {"temperature": 0.1, "num_predict": 4000}, } async with httpx.AsyncClient(timeout=120.0) as client: resp = await client.post(f"{OLLAMA_URL}/api/chat", json=payload) resp.raise_for_status() data = resp.json() msg = data.get("message", {}) content = msg.get("content", "") # Strip think tags content = re.sub(r".*?", "", content, flags=re.DOTALL).strip() raw_calls = msg.get("tool_calls", []) tool_calls = [] for tc in raw_calls: fn = tc.get("function", {}) tool_calls.append({ "id": f"ollama_{fn.get('name', '')}", "name": fn.get("name", ""), "arguments": fn.get("arguments", {}), }) return {"content": content, "tool_calls": tool_calls} # ─────────────────────────────────────────────────────────── # OpenAI-compatible (OVH vLLM, etc.) # ─────────────────────────────────────────────────────────── async def _chat_openai(self, messages: list, tools: list) -> dict: """OpenAI-compatible /v1/chat/completions with tools.""" base_url = OVH_URL or os.getenv("OPENAI_BASE_URL", "") api_key = os.getenv("OVH_LLM_KEY", os.getenv("OPENAI_API_KEY", "")) headers = {"Content-Type": "application/json"} if api_key: headers["Authorization"] = f"Bearer {api_key}" payload = { "model": self.model, "messages": messages, "tools": tools, "temperature": 0.1, "max_tokens": 4000, } async with httpx.AsyncClient(timeout=120.0) as client: resp = await client.post( f"{base_url.rstrip('/')}/v1/chat/completions", json=payload, headers=headers, ) resp.raise_for_status() data = resp.json() choice = data.get("choices", [{}])[0] msg = choice.get("message", {}) tool_calls = [] for tc in msg.get("tool_calls", []): fn = tc.get("function", {}) tool_calls.append({ "id": tc.get("id", ""), "name": fn.get("name", ""), "arguments": fn.get("arguments", "{}"), }) return {"content": msg.get("content", "") or "", "tool_calls": tool_calls} # ─────────────────────────────────────────────────────────── # Anthropic # ─────────────────────────────────────────────────────────── async def _chat_anthropic(self, messages: list, tools: list) -> dict: """Anthropic Messages API with tools (different format).""" api_key = ANTHROPIC_KEY or os.getenv("ANTHROPIC_API_KEY", "") # Convert tools to Anthropic format anthropic_tools = [] for t in tools: fn = t.get("function", {}) anthropic_tools.append({ "name": fn["name"], "description": fn.get("description", ""), "input_schema": fn.get("parameters", {"type": "object", "properties": {}}), }) # Extract system from messages, convert rest system_text = "" api_messages = [] for m in messages: if m["role"] == "system": system_text = m["content"] elif m["role"] == "tool": api_messages.append({ "role": "user", "content": [ { "type": "tool_result", "tool_use_id": m.get("tool_call_id", ""), "content": m.get("content", ""), } ], }) elif m["role"] == "assistant" and m.get("tool_calls"): blocks = [] if m.get("content"): blocks.append({"type": "text", "text": m["content"]}) for tc in m["tool_calls"]: fn = tc.get("function", tc) args = fn.get("arguments", {}) if isinstance(args, str): try: args = json.loads(args) except Exception: args = {} blocks.append({ "type": "tool_use", "id": tc.get("id", ""), "name": fn.get("name", tc.get("name", "")), "input": args, }) api_messages.append({"role": "assistant", "content": blocks}) else: api_messages.append({"role": m["role"], "content": m.get("content", "")}) payload = { "model": self.model, "max_tokens": 4000, "system": system_text, "messages": api_messages, "tools": anthropic_tools, "temperature": 0.1, } headers = { "x-api-key": api_key, "anthropic-version": "2023-06-01", "content-type": "application/json", } async with httpx.AsyncClient(timeout=120.0) as client: resp = await client.post( "https://api.anthropic.com/v1/messages", json=payload, headers=headers, ) resp.raise_for_status() data = resp.json() content_parts = data.get("content", []) text = "" tool_calls = [] for part in content_parts: if part["type"] == "text": text += part.get("text", "") elif part["type"] == "tool_use": tool_calls.append({ "id": part.get("id", ""), "name": part.get("name", ""), "arguments": part.get("input", {}), }) return {"content": text, "tool_calls": tool_calls} # ─────────────────────────────────────────────────────────── # Tool execution # ─────────────────────────────────────────────────────────── async def _execute_tool(self, tool_call: dict) -> dict: """Execute a single tool call via agent_tools.execute_tool().""" name = tool_call.get("name", "") args = tool_call.get("arguments", {}) if isinstance(args, str): try: args = json.loads(args) except json.JSONDecodeError: args = {} return await execute_tool(name, args, self.context) # ═══════════════════════════════════════════════════════════════ # Convenience function # ═══════════════════════════════════════════════════════════════ async def run_compliance_check( text: str, doc_type: str, doc_title: str, provider_type: str = "", model: str = "", db_url: str = "", ) -> list[dict]: """Run a full compliance check with the agent. Args: text: Document text to check. doc_type: Document type (dse, agb, impressum, etc.). doc_title: Human-readable document title. provider_type: LLM provider (ollama, openai, anthropic). model: Model name/ID (provider-specific). db_url: PostgreSQL connection string (optional). Returns: List of CheckItem-compatible dicts with pass/fail results. """ provider = provider_type or AGENT_PROVIDER mdl = model or AGENT_MODEL logger.info( "Starting compliance check: doc_type=%s, title='%s', " "provider=%s, model=%s, text_len=%d", doc_type, doc_title, provider, mdl, len(text), ) agent = ComplianceAgent( provider_type=provider, model=mdl, doc_text=text, doc_type=doc_type, doc_title=doc_title, db_url=db_url, ) results = await agent.run(max_iterations=15) logger.info( "Compliance check complete: %d results (%d passed, %d failed)", len(results), sum(1 for r in results if r.get("passed")), sum(1 for r in results if not r.get("passed")), ) return results