58f370f4ff
New agent architecture for intelligent MC evaluation: agent_tools.py (367 LOC): - 5 tools in OpenAI function-calling format - query_controls: async DB query for MCs by doc_type - evaluate_controls_batch: deterministic keyword matching - search_document: text search with context - get_document_stats: word count, sections, language - submit_results: finalize check results compliance_agent.py (398 LOC): - ComplianceAgent class with agent loop - 3 LLM providers: Ollama, OpenAI-compatible (OVH), Anthropic - Tool call dispatch + result collection - System prompt for systematic compliance analysis - run_compliance_check() convenience function Hybrid mode: - COMPLIANCE_USE_AGENT=false (default): deterministic regex - COMPLIANCE_USE_AGENT=true: LLM agent with tool calling - Agent fallback to regex if LLM unavailable Works with Qwen 35B (Ollama), Qwen 120B (OVH vLLM), Claude. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
368 lines
14 KiB
Python
368 lines
14 KiB
Python
"""
|
|
Agent Tools — LLM-agnostic tool definitions for the Compliance Agent.
|
|
|
|
Provides 5 tools in OpenAI function-calling format:
|
|
1. query_controls — load Master Controls from DB
|
|
2. evaluate_controls_batch — deterministic keyword check against doc text
|
|
3. search_document — find keyword in document with context
|
|
4. get_document_stats — word count, sections, language
|
|
5. submit_results — final submission of pass/fail results
|
|
|
|
All implementations are deterministic (no LLM). The agent decides
|
|
which tools to call and in what order.
|
|
"""
|
|
|
|
import asyncpg
|
|
import json
|
|
import logging
|
|
import os
|
|
import re
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
DATABASE_URL = os.getenv(
|
|
"DATABASE_URL",
|
|
"postgresql://breakpilot:breakpilot@bp-core-postgres:5432/breakpilot",
|
|
)
|
|
|
|
# ═══════════════════════════════════════════════════════════════
|
|
# Tool definitions (OpenAI function-calling format)
|
|
# ═══════════════════════════════════════════════════════════════
|
|
|
|
TOOLS = [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "query_controls",
|
|
"description": (
|
|
"Lade Master Controls aus der Datenbank fuer einen Dokumenttyp. "
|
|
"Gibt eine Liste von Pruefpunkten mit check_question, "
|
|
"pass_criteria und fail_criteria zurueck."
|
|
),
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"doc_type": {
|
|
"type": "string",
|
|
"description": "Dokumenttyp: dse, agb, impressum, cookie, widerruf, avv, dsfa",
|
|
},
|
|
"severity": {
|
|
"type": "string",
|
|
"description": "Optional: nur Controls mit dieser Severity (HIGH, MEDIUM, LOW)",
|
|
},
|
|
"limit": {
|
|
"type": "integer",
|
|
"description": "Max Anzahl Controls (default: alle)",
|
|
},
|
|
},
|
|
"required": ["doc_type"],
|
|
},
|
|
},
|
|
},
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "evaluate_controls_batch",
|
|
"description": (
|
|
"Pruefe mehrere Master Controls gegen den Dokumenttext. "
|
|
"Deterministisch: Keyword-Matching der pass_criteria. "
|
|
"Gibt fuer jeden Control pass/fail mit Evidence zurueck."
|
|
),
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"controls": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "object",
|
|
"properties": {
|
|
"id": {"type": "string"},
|
|
"check_question": {"type": "string"},
|
|
"pass_criteria": {
|
|
"type": "array",
|
|
"items": {"type": "string"},
|
|
},
|
|
},
|
|
"required": ["id", "check_question", "pass_criteria"],
|
|
},
|
|
"description": "Liste von Controls zum Pruefen (max 20 pro Batch)",
|
|
},
|
|
},
|
|
"required": ["controls"],
|
|
},
|
|
},
|
|
},
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "search_document",
|
|
"description": (
|
|
"Suche ein Schluesselwort im Dokument und gib die "
|
|
"Fundstelle mit Kontext zurueck."
|
|
),
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"keyword": {
|
|
"type": "string",
|
|
"description": "Suchbegriff (case-insensitive)",
|
|
},
|
|
"context_chars": {
|
|
"type": "integer",
|
|
"description": "Zeichen Kontext um die Fundstelle (default: 200)",
|
|
},
|
|
},
|
|
"required": ["keyword"],
|
|
},
|
|
},
|
|
},
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "get_document_stats",
|
|
"description": (
|
|
"Statistiken zum Dokument: Wortanzahl, erkannte Abschnitte, "
|
|
"Sprache, Laenge."
|
|
),
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {},
|
|
},
|
|
},
|
|
},
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "submit_results",
|
|
"description": (
|
|
"Reiche die finalen Pruefergebnisse ein. Jedes Ergebnis "
|
|
"hat id, label, passed, severity und eine optionale Empfehlung."
|
|
),
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"results": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "object",
|
|
"properties": {
|
|
"id": {"type": "string"},
|
|
"label": {"type": "string"},
|
|
"passed": {"type": "boolean"},
|
|
"severity": {"type": "string"},
|
|
"hint": {"type": "string"},
|
|
"matched_text": {"type": "string"},
|
|
},
|
|
"required": ["id", "label", "passed", "severity"],
|
|
},
|
|
},
|
|
},
|
|
"required": ["results"],
|
|
},
|
|
},
|
|
},
|
|
]
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════
|
|
# Tool dispatcher
|
|
# ═══════════════════════════════════════════════════════════════
|
|
|
|
async def execute_tool(name: str, args: dict, context: dict) -> dict:
|
|
"""Dispatch a tool call to its implementation.
|
|
|
|
Args:
|
|
name: Tool function name.
|
|
args: Parsed arguments dict.
|
|
context: Shared state with doc_text, doc_type, db_url, results.
|
|
"""
|
|
dispatch = {
|
|
"query_controls": _query_controls,
|
|
"evaluate_controls_batch": _evaluate_controls_batch,
|
|
"search_document": _search_document,
|
|
"get_document_stats": _get_document_stats,
|
|
"submit_results": _submit_results,
|
|
}
|
|
fn = dispatch.get(name)
|
|
if not fn:
|
|
return {"error": f"Unbekanntes Tool: {name}"}
|
|
try:
|
|
return await fn(args, context)
|
|
except Exception as e:
|
|
logger.exception("Tool %s failed", name)
|
|
return {"error": str(e)}
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════
|
|
# Tool implementations
|
|
# ═══════════════════════════════════════════════════════════════
|
|
|
|
async def _query_controls(args: dict, ctx: dict) -> dict:
|
|
"""Load Master Controls from compliance.doc_check_controls."""
|
|
doc_type = args.get("doc_type", ctx.get("doc_type", "dse"))
|
|
severity = args.get("severity")
|
|
limit = args.get("limit", 0)
|
|
db_url = ctx.get("db_url") or DATABASE_URL
|
|
|
|
try:
|
|
conn = await asyncpg.connect(db_url)
|
|
except Exception as e:
|
|
return {"error": f"DB-Verbindung fehlgeschlagen: {e}", "controls": []}
|
|
|
|
try:
|
|
query = (
|
|
"SELECT id, control_id, title, regulation, check_question, "
|
|
" pass_criteria, fail_criteria, severity "
|
|
"FROM compliance.doc_check_controls "
|
|
"WHERE doc_type = $1"
|
|
)
|
|
params: list = [doc_type]
|
|
if severity:
|
|
query += " AND UPPER(severity) = $2"
|
|
params.append(severity.upper())
|
|
query += " ORDER BY severity DESC, title"
|
|
if limit and limit > 0:
|
|
query += f" LIMIT {int(limit)}"
|
|
|
|
rows = await conn.fetch(query, *params)
|
|
controls = []
|
|
for r in rows:
|
|
pc = r["pass_criteria"]
|
|
if isinstance(pc, str):
|
|
try:
|
|
pc = json.loads(pc)
|
|
except Exception:
|
|
pc = [pc] if pc else []
|
|
controls.append({
|
|
"id": str(r["id"]),
|
|
"control_id": r.get("control_id", ""),
|
|
"title": r.get("title", ""),
|
|
"regulation": r.get("regulation", ""),
|
|
"check_question": r.get("check_question", ""),
|
|
"pass_criteria": pc if isinstance(pc, list) else [pc],
|
|
"severity": r.get("severity", "MEDIUM"),
|
|
})
|
|
return {"count": len(controls), "controls": controls}
|
|
except Exception as e:
|
|
return {"error": f"Abfrage fehlgeschlagen: {e}", "controls": []}
|
|
finally:
|
|
await conn.close()
|
|
|
|
|
|
_STOP = {
|
|
"oder", "und", "der", "die", "das", "ein", "eine", "von", "vom",
|
|
"zur", "zum", "mit", "auf", "aus", "bei", "nach", "nicht", "kein",
|
|
"wird", "werden", "kann", "muss", "ist", "sind", "hat", "dass",
|
|
}
|
|
|
|
|
|
async def _evaluate_controls_batch(args: dict, ctx: dict) -> dict:
|
|
"""Keyword-match each control's pass_criteria against doc_text."""
|
|
controls = args.get("controls", [])[:20]
|
|
text_lower = ctx.get("doc_text", "").lower().replace("\xad", "")
|
|
results = []
|
|
|
|
for ctrl in controls:
|
|
criteria = ctrl.get("pass_criteria", [])
|
|
met = 0
|
|
evidence = ""
|
|
for crit in criteria:
|
|
words = [w for w in re.findall(r"[a-z\u00e4\u00f6\u00fc\u00df]{4,}", crit.lower()) if w not in _STOP]
|
|
if not words:
|
|
met += 1
|
|
continue
|
|
matched = sum(1 for w in words if w in text_lower)
|
|
if matched >= len(words) * 0.5:
|
|
met += 1
|
|
if not evidence:
|
|
for w in words:
|
|
idx = text_lower.find(w)
|
|
if idx >= 0:
|
|
s = max(0, idx - 30)
|
|
e = min(len(text_lower), idx + len(w) + 30)
|
|
evidence = text_lower[s:e].strip()
|
|
break
|
|
passed = met >= len(criteria) * 0.6 if criteria else False
|
|
results.append({
|
|
"id": ctrl.get("id", ""),
|
|
"passed": passed,
|
|
"criteria_met": f"{met}/{len(criteria)}",
|
|
"matched_text": evidence[:120],
|
|
})
|
|
return {"evaluated": len(results), "results": results}
|
|
|
|
|
|
async def _search_document(args: dict, ctx: dict) -> dict:
|
|
"""Simple keyword search with context window."""
|
|
keyword = args.get("keyword", "").lower()
|
|
context_chars = args.get("context_chars", 200)
|
|
text = ctx.get("doc_text", "")
|
|
text_lower = text.lower()
|
|
|
|
hits = []
|
|
start = 0
|
|
while len(hits) < 5:
|
|
idx = text_lower.find(keyword, start)
|
|
if idx < 0:
|
|
break
|
|
s = max(0, idx - context_chars)
|
|
e = min(len(text), idx + len(keyword) + context_chars)
|
|
hits.append({"position": idx, "context": text[s:e].strip()})
|
|
start = idx + len(keyword)
|
|
|
|
return {"keyword": keyword, "found": len(hits) > 0, "hits": hits}
|
|
|
|
|
|
async def _get_document_stats(args: dict, ctx: dict) -> dict:
|
|
"""Word count, detected sections, language guess."""
|
|
text = ctx.get("doc_text", "")
|
|
words = text.split()
|
|
|
|
# Detect sections (lines that look like headings)
|
|
sections = []
|
|
for line in text.split("\n"):
|
|
stripped = line.strip()
|
|
if 3 < len(stripped) < 120 and stripped[0].isupper() and not stripped.endswith(","):
|
|
if re.match(r"^(\d+\.?\s+|[IVXLC]+\.?\s+|[a-z]\)\s+)?[A-ZAEOEUE\u00c4\u00d6\u00dc]", stripped):
|
|
sections.append(stripped[:80])
|
|
|
|
# Language guess (DE vs EN heuristic)
|
|
de_markers = sum(1 for w in ["der", "die", "das", "und", "ist", "werden", "nicht"] if w in text.lower())
|
|
en_markers = sum(1 for w in ["the", "and", "is", "are", "not", "with", "for"] if w in text.lower())
|
|
lang = "de" if de_markers > en_markers else "en"
|
|
|
|
return {
|
|
"word_count": len(words),
|
|
"char_count": len(text),
|
|
"sections_detected": len(sections),
|
|
"sections": sections[:20],
|
|
"language": lang,
|
|
}
|
|
|
|
|
|
async def _submit_results(args: dict, ctx: dict) -> dict:
|
|
"""Store final results in context for the caller to retrieve."""
|
|
results = args.get("results", [])
|
|
# Normalise into CheckItem-compatible format
|
|
normalised = []
|
|
for r in results:
|
|
normalised.append({
|
|
"id": r.get("id", ""),
|
|
"label": r.get("label", ""),
|
|
"passed": r.get("passed", False),
|
|
"severity": r.get("severity", "MEDIUM"),
|
|
"hint": r.get("hint", ""),
|
|
"matched_text": r.get("matched_text", ""),
|
|
"level": 2,
|
|
"parent": None,
|
|
"skipped": False,
|
|
"source": "agent",
|
|
})
|
|
ctx["results"] = normalised
|
|
passed = sum(1 for r in normalised if r["passed"])
|
|
return {
|
|
"submitted": len(normalised),
|
|
"passed": passed,
|
|
"failed": len(normalised) - passed,
|
|
}
|