breakpilot-compliance/backend-compliance/compliance/services/agent_tools.py

"""
Agent Tools — LLM-agnostic tool definitions for the Compliance Agent.

Provides 5 tools in OpenAI function-calling format:
  1. query_controls      — load Master Controls from DB
  2. evaluate_controls_batch — deterministic keyword check against doc text
  3. search_document     — find keyword in document with context
  4. get_document_stats  — word count, sections, language
  5. submit_results      — final submission of pass/fail results

All implementations are deterministic (no LLM). The agent decides
which tools to call and in what order.
"""

import asyncpg
import json
import logging
import os
import re

logger = logging.getLogger(__name__)

DATABASE_URL = os.getenv(
    "DATABASE_URL",
    "postgresql://breakpilot:breakpilot@bp-core-postgres:5432/breakpilot",
)

# ═══════════════════════════════════════════════════════════════
# Tool definitions (OpenAI function-calling format)
# ═══════════════════════════════════════════════════════════════

TOOLS = [
    {
        "type": "function",
        "function": {
            "name": "query_controls",
            "description": (
                "Lade Master Controls aus der Datenbank fuer einen Dokumenttyp. "
                "Gibt eine Liste von Pruefpunkten mit check_question, "
                "pass_criteria und fail_criteria zurueck."
            ),
            "parameters": {
                "type": "object",
                "properties": {
                    "doc_type": {
                        "type": "string",
                        "description": "Dokumenttyp: dse, agb, impressum, cookie, widerruf, avv, dsfa",
                    },
                    "severity": {
                        "type": "string",
                        "description": "Optional: nur Controls mit dieser Severity (HIGH, MEDIUM, LOW)",
                    },
                    "limit": {
                        "type": "integer",
                        "description": "Max Anzahl Controls (default: alle)",
                    },
                },
                "required": ["doc_type"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "evaluate_controls_batch",
            "description": (
                "Pruefe mehrere Master Controls gegen den Dokumenttext. "
                "Deterministisch: Keyword-Matching der pass_criteria. "
                "Gibt fuer jeden Control pass/fail mit Evidence zurueck."
            ),
            "parameters": {
                "type": "object",
                "properties": {
                    "controls": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {
                                "id": {"type": "string"},
                                "check_question": {"type": "string"},
                                "pass_criteria": {
                                    "type": "array",
                                    "items": {"type": "string"},
                                },
                            },
                            "required": ["id", "check_question", "pass_criteria"],
                        },
                        "description": "Liste von Controls zum Pruefen (max 20 pro Batch)",
                    },
                },
                "required": ["controls"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "search_document",
            "description": (
                "Suche ein Schluesselwort im Dokument und gib die "
                "Fundstelle mit Kontext zurueck."
            ),
            "parameters": {
                "type": "object",
                "properties": {
                    "keyword": {
                        "type": "string",
                        "description": "Suchbegriff (case-insensitive)",
                    },
                    "context_chars": {
                        "type": "integer",
                        "description": "Zeichen Kontext um die Fundstelle (default: 200)",
                    },
                },
                "required": ["keyword"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "get_document_stats",
            "description": (
                "Statistiken zum Dokument: Wortanzahl, erkannte Abschnitte, "
                "Sprache, Laenge."
            ),
            "parameters": {
                "type": "object",
                "properties": {},
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "submit_results",
            "description": (
                "Reiche die finalen Pruefergebnisse ein. Jedes Ergebnis "
                "hat id, label, passed, severity und eine optionale Empfehlung."
            ),
            "parameters": {
                "type": "object",
                "properties": {
                    "results": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {
                                "id": {"type": "string"},
                                "label": {"type": "string"},
                                "passed": {"type": "boolean"},
                                "severity": {"type": "string"},
                                "hint": {"type": "string"},
                                "matched_text": {"type": "string"},
                            },
                            "required": ["id", "label", "passed", "severity"],
                        },
                    },
                },
                "required": ["results"],
            },
        },
    },
]


# ═══════════════════════════════════════════════════════════════
# Tool dispatcher
# ═══════════════════════════════════════════════════════════════

async def execute_tool(name: str, args: dict, context: dict) -> dict:
    """Dispatch a tool call to its implementation.

    Args:
        name: Tool function name.
        args: Parsed arguments dict.
        context: Shared state with doc_text, doc_type, db_url, results.
    """
    dispatch = {
        "query_controls": _query_controls,
        "evaluate_controls_batch": _evaluate_controls_batch,
        "search_document": _search_document,
        "get_document_stats": _get_document_stats,
        "submit_results": _submit_results,
    }
    fn = dispatch.get(name)
    if not fn:
        return {"error": f"Unbekanntes Tool: {name}"}
    try:
        return await fn(args, context)
    except Exception as e:
        logger.exception("Tool %s failed", name)
        return {"error": str(e)}


# ═══════════════════════════════════════════════════════════════
# Tool implementations
# ═══════════════════════════════════════════════════════════════

async def _query_controls(args: dict, ctx: dict) -> dict:
    """Load Master Controls from compliance.doc_check_controls."""
    doc_type = args.get("doc_type", ctx.get("doc_type", "dse"))
    severity = args.get("severity")
    limit = args.get("limit", 0)
    db_url = ctx.get("db_url") or DATABASE_URL

    try:
        conn = await asyncpg.connect(db_url)
    except Exception as e:
        return {"error": f"DB-Verbindung fehlgeschlagen: {e}", "controls": []}

    try:
        query = (
            "SELECT id, control_id, title, regulation, check_question, "
            "       pass_criteria, fail_criteria, severity "
            "FROM compliance.doc_check_controls "
            "WHERE doc_type = $1"
        )
        params: list = [doc_type]
        if severity:
            query += " AND UPPER(severity) = $2"
            params.append(severity.upper())
        query += " ORDER BY severity DESC, title"
        if limit and limit > 0:
            query += f" LIMIT {int(limit)}"

        rows = await conn.fetch(query, *params)
        controls = []
        for r in rows:
            pc = r["pass_criteria"]
            if isinstance(pc, str):
                try:
                    pc = json.loads(pc)
                except Exception:
                    pc = [pc] if pc else []
            controls.append({
                "id": str(r["id"]),
                "control_id": r.get("control_id", ""),
                "title": r.get("title", ""),
                "regulation": r.get("regulation", ""),
                "check_question": r.get("check_question", ""),
                "pass_criteria": pc if isinstance(pc, list) else [pc],
                "severity": r.get("severity", "MEDIUM"),
            })
        return {"count": len(controls), "controls": controls}
    except Exception as e:
        return {"error": f"Abfrage fehlgeschlagen: {e}", "controls": []}
    finally:
        await conn.close()


_STOP = {
    "oder", "und", "der", "die", "das", "ein", "eine", "von", "vom",
    "zur", "zum", "mit", "auf", "aus", "bei", "nach", "nicht", "kein",
    "wird", "werden", "kann", "muss", "ist", "sind", "hat", "dass",
}


async def _evaluate_controls_batch(args: dict, ctx: dict) -> dict:
    """Keyword-match each control's pass_criteria against doc_text."""
    controls = args.get("controls", [])[:20]
    text_lower = ctx.get("doc_text", "").lower().replace("\xad", "")
    results = []

    for ctrl in controls:
        criteria = ctrl.get("pass_criteria", [])
        met = 0
        evidence = ""
        for crit in criteria:
            words = [w for w in re.findall(r"[a-z\u00e4\u00f6\u00fc\u00df]{4,}", crit.lower()) if w not in _STOP]
            if not words:
                met += 1
                continue
            matched = sum(1 for w in words if w in text_lower)
            if matched >= len(words) * 0.5:
                met += 1
                if not evidence:
                    for w in words:
                        idx = text_lower.find(w)
                        if idx >= 0:
                            s = max(0, idx - 30)
                            e = min(len(text_lower), idx + len(w) + 30)
                            evidence = text_lower[s:e].strip()
                            break
        passed = met >= len(criteria) * 0.6 if criteria else False
        results.append({
            "id": ctrl.get("id", ""),
            "passed": passed,
            "criteria_met": f"{met}/{len(criteria)}",
            "matched_text": evidence[:120],
        })
    return {"evaluated": len(results), "results": results}


async def _search_document(args: dict, ctx: dict) -> dict:
    """Simple keyword search with context window."""
    keyword = args.get("keyword", "").lower()
    context_chars = args.get("context_chars", 200)
    text = ctx.get("doc_text", "")
    text_lower = text.lower()

    hits = []
    start = 0
    while len(hits) < 5:
        idx = text_lower.find(keyword, start)
        if idx < 0:
            break
        s = max(0, idx - context_chars)
        e = min(len(text), idx + len(keyword) + context_chars)
        hits.append({"position": idx, "context": text[s:e].strip()})
        start = idx + len(keyword)

    return {"keyword": keyword, "found": len(hits) > 0, "hits": hits}


async def _get_document_stats(args: dict, ctx: dict) -> dict:
    """Word count, detected sections, language guess."""
    text = ctx.get("doc_text", "")
    words = text.split()

    # Detect sections (lines that look like headings)
    sections = []
    for line in text.split("\n"):
        stripped = line.strip()
        if 3 < len(stripped) < 120 and stripped[0].isupper() and not stripped.endswith(","):
            if re.match(r"^(\d+\.?\s+|[IVXLC]+\.?\s+|[a-z]\)\s+)?[A-ZAEOEUE\u00c4\u00d6\u00dc]", stripped):
                sections.append(stripped[:80])

    # Language guess (DE vs EN heuristic)
    de_markers = sum(1 for w in ["der", "die", "das", "und", "ist", "werden", "nicht"] if w in text.lower())
    en_markers = sum(1 for w in ["the", "and", "is", "are", "not", "with", "for"] if w in text.lower())
    lang = "de" if de_markers > en_markers else "en"

    return {
        "word_count": len(words),
        "char_count": len(text),
        "sections_detected": len(sections),
        "sections": sections[:20],
        "language": lang,
    }


async def _submit_results(args: dict, ctx: dict) -> dict:
    """Store final results in context for the caller to retrieve."""
    results = args.get("results", [])
    # Normalise into CheckItem-compatible format
    normalised = []
    for r in results:
        normalised.append({
            "id": r.get("id", ""),
            "label": r.get("label", ""),
            "passed": r.get("passed", False),
            "severity": r.get("severity", "MEDIUM"),
            "hint": r.get("hint", ""),
            "matched_text": r.get("matched_text", ""),
            "level": 2,
            "parent": None,
            "skipped": False,
            "source": "agent",
        })
    ctx["results"] = normalised
    passed = sum(1 for r in normalised if r["passed"])
    return {
        "submitted": len(normalised),
        "passed": passed,
        "failed": len(normalised) - passed,
    }