Files
breakpilot-compliance/backend-compliance/compliance/services/agent_tools.py
T
Benjamin Admin 58f370f4ff feat: LLM-agnostic Compliance Agent with tool calling
New agent architecture for intelligent MC evaluation:

agent_tools.py (367 LOC):
- 5 tools in OpenAI function-calling format
- query_controls: async DB query for MCs by doc_type
- evaluate_controls_batch: deterministic keyword matching
- search_document: text search with context
- get_document_stats: word count, sections, language
- submit_results: finalize check results

compliance_agent.py (398 LOC):
- ComplianceAgent class with agent loop
- 3 LLM providers: Ollama, OpenAI-compatible (OVH), Anthropic
- Tool call dispatch + result collection
- System prompt for systematic compliance analysis
- run_compliance_check() convenience function

Hybrid mode:
- COMPLIANCE_USE_AGENT=false (default): deterministic regex
- COMPLIANCE_USE_AGENT=true: LLM agent with tool calling
- Agent fallback to regex if LLM unavailable

Works with Qwen 35B (Ollama), Qwen 120B (OVH vLLM), Claude.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-10 22:56:09 +02:00

368 lines
14 KiB
Python

"""
Agent Tools — LLM-agnostic tool definitions for the Compliance Agent.
Provides 5 tools in OpenAI function-calling format:
1. query_controls — load Master Controls from DB
2. evaluate_controls_batch — deterministic keyword check against doc text
3. search_document — find keyword in document with context
4. get_document_stats — word count, sections, language
5. submit_results — final submission of pass/fail results
All implementations are deterministic (no LLM). The agent decides
which tools to call and in what order.
"""
import asyncpg
import json
import logging
import os
import re
logger = logging.getLogger(__name__)
DATABASE_URL = os.getenv(
"DATABASE_URL",
"postgresql://breakpilot:breakpilot@bp-core-postgres:5432/breakpilot",
)
# ═══════════════════════════════════════════════════════════════
# Tool definitions (OpenAI function-calling format)
# ═══════════════════════════════════════════════════════════════
TOOLS = [
{
"type": "function",
"function": {
"name": "query_controls",
"description": (
"Lade Master Controls aus der Datenbank fuer einen Dokumenttyp. "
"Gibt eine Liste von Pruefpunkten mit check_question, "
"pass_criteria und fail_criteria zurueck."
),
"parameters": {
"type": "object",
"properties": {
"doc_type": {
"type": "string",
"description": "Dokumenttyp: dse, agb, impressum, cookie, widerruf, avv, dsfa",
},
"severity": {
"type": "string",
"description": "Optional: nur Controls mit dieser Severity (HIGH, MEDIUM, LOW)",
},
"limit": {
"type": "integer",
"description": "Max Anzahl Controls (default: alle)",
},
},
"required": ["doc_type"],
},
},
},
{
"type": "function",
"function": {
"name": "evaluate_controls_batch",
"description": (
"Pruefe mehrere Master Controls gegen den Dokumenttext. "
"Deterministisch: Keyword-Matching der pass_criteria. "
"Gibt fuer jeden Control pass/fail mit Evidence zurueck."
),
"parameters": {
"type": "object",
"properties": {
"controls": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {"type": "string"},
"check_question": {"type": "string"},
"pass_criteria": {
"type": "array",
"items": {"type": "string"},
},
},
"required": ["id", "check_question", "pass_criteria"],
},
"description": "Liste von Controls zum Pruefen (max 20 pro Batch)",
},
},
"required": ["controls"],
},
},
},
{
"type": "function",
"function": {
"name": "search_document",
"description": (
"Suche ein Schluesselwort im Dokument und gib die "
"Fundstelle mit Kontext zurueck."
),
"parameters": {
"type": "object",
"properties": {
"keyword": {
"type": "string",
"description": "Suchbegriff (case-insensitive)",
},
"context_chars": {
"type": "integer",
"description": "Zeichen Kontext um die Fundstelle (default: 200)",
},
},
"required": ["keyword"],
},
},
},
{
"type": "function",
"function": {
"name": "get_document_stats",
"description": (
"Statistiken zum Dokument: Wortanzahl, erkannte Abschnitte, "
"Sprache, Laenge."
),
"parameters": {
"type": "object",
"properties": {},
},
},
},
{
"type": "function",
"function": {
"name": "submit_results",
"description": (
"Reiche die finalen Pruefergebnisse ein. Jedes Ergebnis "
"hat id, label, passed, severity und eine optionale Empfehlung."
),
"parameters": {
"type": "object",
"properties": {
"results": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {"type": "string"},
"label": {"type": "string"},
"passed": {"type": "boolean"},
"severity": {"type": "string"},
"hint": {"type": "string"},
"matched_text": {"type": "string"},
},
"required": ["id", "label", "passed", "severity"],
},
},
},
"required": ["results"],
},
},
},
]
# ═══════════════════════════════════════════════════════════════
# Tool dispatcher
# ═══════════════════════════════════════════════════════════════
async def execute_tool(name: str, args: dict, context: dict) -> dict:
"""Dispatch a tool call to its implementation.
Args:
name: Tool function name.
args: Parsed arguments dict.
context: Shared state with doc_text, doc_type, db_url, results.
"""
dispatch = {
"query_controls": _query_controls,
"evaluate_controls_batch": _evaluate_controls_batch,
"search_document": _search_document,
"get_document_stats": _get_document_stats,
"submit_results": _submit_results,
}
fn = dispatch.get(name)
if not fn:
return {"error": f"Unbekanntes Tool: {name}"}
try:
return await fn(args, context)
except Exception as e:
logger.exception("Tool %s failed", name)
return {"error": str(e)}
# ═══════════════════════════════════════════════════════════════
# Tool implementations
# ═══════════════════════════════════════════════════════════════
async def _query_controls(args: dict, ctx: dict) -> dict:
"""Load Master Controls from compliance.doc_check_controls."""
doc_type = args.get("doc_type", ctx.get("doc_type", "dse"))
severity = args.get("severity")
limit = args.get("limit", 0)
db_url = ctx.get("db_url") or DATABASE_URL
try:
conn = await asyncpg.connect(db_url)
except Exception as e:
return {"error": f"DB-Verbindung fehlgeschlagen: {e}", "controls": []}
try:
query = (
"SELECT id, control_id, title, regulation, check_question, "
" pass_criteria, fail_criteria, severity "
"FROM compliance.doc_check_controls "
"WHERE doc_type = $1"
)
params: list = [doc_type]
if severity:
query += " AND UPPER(severity) = $2"
params.append(severity.upper())
query += " ORDER BY severity DESC, title"
if limit and limit > 0:
query += f" LIMIT {int(limit)}"
rows = await conn.fetch(query, *params)
controls = []
for r in rows:
pc = r["pass_criteria"]
if isinstance(pc, str):
try:
pc = json.loads(pc)
except Exception:
pc = [pc] if pc else []
controls.append({
"id": str(r["id"]),
"control_id": r.get("control_id", ""),
"title": r.get("title", ""),
"regulation": r.get("regulation", ""),
"check_question": r.get("check_question", ""),
"pass_criteria": pc if isinstance(pc, list) else [pc],
"severity": r.get("severity", "MEDIUM"),
})
return {"count": len(controls), "controls": controls}
except Exception as e:
return {"error": f"Abfrage fehlgeschlagen: {e}", "controls": []}
finally:
await conn.close()
_STOP = {
"oder", "und", "der", "die", "das", "ein", "eine", "von", "vom",
"zur", "zum", "mit", "auf", "aus", "bei", "nach", "nicht", "kein",
"wird", "werden", "kann", "muss", "ist", "sind", "hat", "dass",
}
async def _evaluate_controls_batch(args: dict, ctx: dict) -> dict:
"""Keyword-match each control's pass_criteria against doc_text."""
controls = args.get("controls", [])[:20]
text_lower = ctx.get("doc_text", "").lower().replace("\xad", "")
results = []
for ctrl in controls:
criteria = ctrl.get("pass_criteria", [])
met = 0
evidence = ""
for crit in criteria:
words = [w for w in re.findall(r"[a-z\u00e4\u00f6\u00fc\u00df]{4,}", crit.lower()) if w not in _STOP]
if not words:
met += 1
continue
matched = sum(1 for w in words if w in text_lower)
if matched >= len(words) * 0.5:
met += 1
if not evidence:
for w in words:
idx = text_lower.find(w)
if idx >= 0:
s = max(0, idx - 30)
e = min(len(text_lower), idx + len(w) + 30)
evidence = text_lower[s:e].strip()
break
passed = met >= len(criteria) * 0.6 if criteria else False
results.append({
"id": ctrl.get("id", ""),
"passed": passed,
"criteria_met": f"{met}/{len(criteria)}",
"matched_text": evidence[:120],
})
return {"evaluated": len(results), "results": results}
async def _search_document(args: dict, ctx: dict) -> dict:
"""Simple keyword search with context window."""
keyword = args.get("keyword", "").lower()
context_chars = args.get("context_chars", 200)
text = ctx.get("doc_text", "")
text_lower = text.lower()
hits = []
start = 0
while len(hits) < 5:
idx = text_lower.find(keyword, start)
if idx < 0:
break
s = max(0, idx - context_chars)
e = min(len(text), idx + len(keyword) + context_chars)
hits.append({"position": idx, "context": text[s:e].strip()})
start = idx + len(keyword)
return {"keyword": keyword, "found": len(hits) > 0, "hits": hits}
async def _get_document_stats(args: dict, ctx: dict) -> dict:
"""Word count, detected sections, language guess."""
text = ctx.get("doc_text", "")
words = text.split()
# Detect sections (lines that look like headings)
sections = []
for line in text.split("\n"):
stripped = line.strip()
if 3 < len(stripped) < 120 and stripped[0].isupper() and not stripped.endswith(","):
if re.match(r"^(\d+\.?\s+|[IVXLC]+\.?\s+|[a-z]\)\s+)?[A-ZAEOEUE\u00c4\u00d6\u00dc]", stripped):
sections.append(stripped[:80])
# Language guess (DE vs EN heuristic)
de_markers = sum(1 for w in ["der", "die", "das", "und", "ist", "werden", "nicht"] if w in text.lower())
en_markers = sum(1 for w in ["the", "and", "is", "are", "not", "with", "for"] if w in text.lower())
lang = "de" if de_markers > en_markers else "en"
return {
"word_count": len(words),
"char_count": len(text),
"sections_detected": len(sections),
"sections": sections[:20],
"language": lang,
}
async def _submit_results(args: dict, ctx: dict) -> dict:
"""Store final results in context for the caller to retrieve."""
results = args.get("results", [])
# Normalise into CheckItem-compatible format
normalised = []
for r in results:
normalised.append({
"id": r.get("id", ""),
"label": r.get("label", ""),
"passed": r.get("passed", False),
"severity": r.get("severity", "MEDIUM"),
"hint": r.get("hint", ""),
"matched_text": r.get("matched_text", ""),
"level": 2,
"parent": None,
"skipped": False,
"source": "agent",
})
ctx["results"] = normalised
passed = sum(1 for r in normalised if r["passed"])
return {
"submitted": len(normalised),
"passed": passed,
"failed": len(normalised) - passed,
}