feat(agent): MC scorecard + audit drill-down + tenant trend (A1-A6)
Now that all 1874 MCs run per check (Task #30 cap removal), the report was about to drown in noise. This commit adds the full aggregation / persistence / drill-down stack so each MC is actionable, not just counted. A1 mc_scorecard.py (new): build_scorecard(checks) -> per-regulation PASS/FAIL/SKIP + severity top_fails(checks, n) -> N most severe failed MCs full_audit_records(...) -> flat rows ready for sidecar SQLite A2 Email rendering: agent_doc_check_scorecard.py (new) builds an HTML scorecard table (regulation × passed/failed/HIGH/MEDIUM/score) shown at the top of the email. agent_doc_check_report._render_document now collapses the 500-MC L2 forest into 'X/Y bestanden (Z Fail)' summary plus a top-10 fails block per doc — old verbose render is gone. A3 compliance_audit_log.py (new) — sidecar SQLite at /data/compliance_audits.db (separate from compliance Postgres schema to comply with the no-new-migrations rule in CLAUDE.md): check_runs(check_id, ts, tenant_id, site_name, base_domain, doc_count, scorecard json, vvt_summary json) mc_results(check_id, doc_type, mc_id, label, passed, skipped, severity, regulation, matched_text, hint) Route persists every run after the email is sent. docker-compose.yml adds compliance-audit volume + env. A4 backfill_mc_regulation_llm.py (new) — Qwen-tagged backfill for the 1636 MCs the regex pass couldn't classify. Batches of 25, format=json, output constrained to the canonical regulation list. Run manually: docker exec bp-compliance-backend python3 \ /app/scripts/backfill_mc_regulation_llm.py [--dry-run] A5 Admin audit tab — GET /api/compliance/agent/audit/<check_id> proxied via /api/sdk/v1/agent/audit/<id>. New page /sdk/agent/audit/[checkId] renders scorecard + filterable MC table (status / doc_type / regulation, expandable rows with matched_text + hint). ComplianceCheckTab now shows 'Voll-Audit oeffnen' link. A6 Trend per tenant — GET /api/compliance/agent/audit/tenant/<id> returns recent runs. Email scorecard shows per-regulation delta badges ('(+12%)', '(-3%)') compared with the previous run for the same tenant + base_domain. Lookup is one SQLite query. Plumbing: rag_document_checker.py — SELECT now includes 'article'; MC results carry 'regulation' + 'article' through to CheckItem. agent_doc_check_routes.CheckItem schema gains regulation + article fields (defaults '') so old clients still parse. agent_compliance_check_routes — response gains 'check_id' so the frontend can build the audit link.
This commit is contained in:
@@ -0,0 +1,196 @@
|
||||
"""
|
||||
Compliance-Check Audit Log — sidecar SQLite persistence.
|
||||
|
||||
Every compliance-check run flattens its MC results into rows here so
|
||||
we have:
|
||||
- per-tenant history of scorecards (Task A6 trend view)
|
||||
- drill-down on individual MCs for the admin frontend (Task A5)
|
||||
- export-ability (DSB receives JSON attachment derived from this)
|
||||
|
||||
Sidecar SQLite (`/data/compliance_audits.db`) instead of a new table in
|
||||
the compliance schema, because the repo policy forbids new migrations
|
||||
without explicit DB-owner sign-off (see CLAUDE.md guardrails).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sqlite3
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DB_PATH = os.getenv("COMPLIANCE_AUDIT_DB", "/data/compliance_audits.db")
|
||||
|
||||
|
||||
def _ensure_db() -> None:
|
||||
Path(DB_PATH).parent.mkdir(parents=True, exist_ok=True)
|
||||
with sqlite3.connect(DB_PATH) as conn:
|
||||
conn.executescript("""
|
||||
CREATE TABLE IF NOT EXISTS check_runs (
|
||||
check_id TEXT PRIMARY KEY,
|
||||
ts TEXT NOT NULL,
|
||||
tenant_id TEXT,
|
||||
site_name TEXT,
|
||||
base_domain TEXT,
|
||||
doc_count INTEGER,
|
||||
scorecard TEXT, -- JSON {by_regulation, totals}
|
||||
vvt_summary TEXT -- JSON {total, internal, external, critical}
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_runs_tenant ON check_runs(tenant_id, ts);
|
||||
CREATE INDEX IF NOT EXISTS idx_runs_domain ON check_runs(base_domain, ts);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS mc_results (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
check_id TEXT NOT NULL,
|
||||
doc_type TEXT,
|
||||
mc_id TEXT,
|
||||
label TEXT,
|
||||
passed INTEGER,
|
||||
skipped INTEGER,
|
||||
severity TEXT,
|
||||
regulation TEXT,
|
||||
matched_text TEXT,
|
||||
hint TEXT
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_mc_check ON mc_results(check_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_mc_reg ON mc_results(regulation, passed);
|
||||
""")
|
||||
|
||||
|
||||
def record_check_run(
|
||||
check_id: str,
|
||||
tenant_id: str,
|
||||
site_name: str,
|
||||
base_domain: str,
|
||||
doc_count: int,
|
||||
scorecard: dict,
|
||||
vvt_summary: dict | None = None,
|
||||
mc_records: list[dict] | None = None,
|
||||
) -> None:
|
||||
"""Persist one check run + all its MC rows. Idempotent on check_id."""
|
||||
try:
|
||||
_ensure_db()
|
||||
ts = datetime.now(timezone.utc).isoformat()
|
||||
with sqlite3.connect(DB_PATH) as conn:
|
||||
conn.execute(
|
||||
"INSERT OR REPLACE INTO check_runs "
|
||||
"(check_id, ts, tenant_id, site_name, base_domain, doc_count, "
|
||||
" scorecard, vvt_summary) VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
(
|
||||
check_id, ts, tenant_id, site_name, base_domain, doc_count,
|
||||
json.dumps(scorecard, ensure_ascii=False),
|
||||
json.dumps(vvt_summary or {}, ensure_ascii=False),
|
||||
),
|
||||
)
|
||||
# Clear old rows for the same check_id before re-inserting (idempotency)
|
||||
conn.execute("DELETE FROM mc_results WHERE check_id=?", (check_id,))
|
||||
if mc_records:
|
||||
conn.executemany(
|
||||
"INSERT INTO mc_results "
|
||||
"(check_id, doc_type, mc_id, label, passed, skipped, "
|
||||
" severity, regulation, matched_text, hint) "
|
||||
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
[
|
||||
(
|
||||
r.get("check_id", check_id),
|
||||
r.get("doc_type", ""),
|
||||
r.get("mc_id", ""),
|
||||
(r.get("label") or "")[:300],
|
||||
1 if r.get("passed") else 0,
|
||||
1 if r.get("skipped") else 0,
|
||||
(r.get("severity") or "").upper(),
|
||||
r.get("regulation") or "",
|
||||
(r.get("matched_text") or "")[:500],
|
||||
(r.get("hint") or "")[:500],
|
||||
)
|
||||
for r in mc_records
|
||||
],
|
||||
)
|
||||
conn.commit()
|
||||
logger.info("Audit recorded: check_id=%s mc_rows=%d",
|
||||
check_id, len(mc_records or []))
|
||||
except Exception as e:
|
||||
logger.warning("Audit persistence failed for %s: %s", check_id, e)
|
||||
|
||||
|
||||
# ── Read API (used by the admin endpoints + trend view) ─────────────
|
||||
|
||||
def get_check_run(check_id: str) -> dict | None:
|
||||
try:
|
||||
_ensure_db()
|
||||
with sqlite3.connect(DB_PATH) as conn:
|
||||
conn.row_factory = sqlite3.Row
|
||||
row = conn.execute(
|
||||
"SELECT * FROM check_runs WHERE check_id=?", (check_id,),
|
||||
).fetchone()
|
||||
if not row:
|
||||
return None
|
||||
d = dict(row)
|
||||
d["scorecard"] = json.loads(d.get("scorecard") or "{}")
|
||||
d["vvt_summary"] = json.loads(d.get("vvt_summary") or "{}")
|
||||
return d
|
||||
except Exception as e:
|
||||
logger.warning("get_check_run failed: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
def list_mc_results(
|
||||
check_id: str,
|
||||
doc_type: str | None = None,
|
||||
regulation: str | None = None,
|
||||
only_failed: bool = False,
|
||||
) -> list[dict]:
|
||||
try:
|
||||
_ensure_db()
|
||||
where = ["check_id = ?"]
|
||||
params: list = [check_id]
|
||||
if doc_type:
|
||||
where.append("doc_type = ?")
|
||||
params.append(doc_type)
|
||||
if regulation:
|
||||
where.append("regulation = ?")
|
||||
params.append(regulation)
|
||||
if only_failed:
|
||||
where.append("passed = 0 AND skipped = 0")
|
||||
sql = ("SELECT * FROM mc_results WHERE " + " AND ".join(where)
|
||||
+ " ORDER BY severity, label")
|
||||
with sqlite3.connect(DB_PATH) as conn:
|
||||
conn.row_factory = sqlite3.Row
|
||||
rows = conn.execute(sql, params).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
except Exception as e:
|
||||
logger.warning("list_mc_results failed: %s", e)
|
||||
return []
|
||||
|
||||
|
||||
def list_runs_for_tenant(
|
||||
tenant_id: str,
|
||||
base_domain: str | None = None,
|
||||
limit: int = 30,
|
||||
) -> list[dict]:
|
||||
try:
|
||||
_ensure_db()
|
||||
where = ["tenant_id = ?"]
|
||||
params: list = [tenant_id]
|
||||
if base_domain:
|
||||
where.append("base_domain = ?")
|
||||
params.append(base_domain)
|
||||
sql = ("SELECT * FROM check_runs WHERE " + " AND ".join(where)
|
||||
+ " ORDER BY ts DESC LIMIT ?")
|
||||
params.append(limit)
|
||||
with sqlite3.connect(DB_PATH) as conn:
|
||||
conn.row_factory = sqlite3.Row
|
||||
rows = conn.execute(sql, params).fetchall()
|
||||
out = []
|
||||
for r in rows:
|
||||
d = dict(r)
|
||||
d["scorecard"] = json.loads(d.get("scorecard") or "{}")
|
||||
out.append(d)
|
||||
return out
|
||||
except Exception as e:
|
||||
logger.warning("list_runs_for_tenant failed: %s", e)
|
||||
return []
|
||||
@@ -0,0 +1,151 @@
|
||||
"""
|
||||
Master-Control Scorecard — group + summarise MC results.
|
||||
|
||||
With max_controls=0 (#30 fix) every doc-check now evaluates 75-571 MCs
|
||||
per document. Rendering all of them verbatim makes the email + frontend
|
||||
unreadable. This module produces three structured artefacts:
|
||||
|
||||
1. `build_scorecard(check_results)` — per-regulation aggregate (PASS /
|
||||
FAIL / SKIP counts + severity histogram + compliance %)
|
||||
|
||||
2. `top_fails(check_results, n=10)` — top-N failed MCs ranked by
|
||||
severity then absence of evidence
|
||||
|
||||
3. `full_audit_records(check_results, check_id, tenant_id)` — flat
|
||||
list ready for SQLite persistence + JSON export
|
||||
|
||||
The functions are pure — no DB / network — so they're cheap to call
|
||||
from inside the route and unit-testable.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
from datetime import datetime, timezone
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Severity order: CRITICAL > HIGH > MEDIUM > LOW > INFO
|
||||
_SEV_RANK = {"CRITICAL": 0, "HIGH": 1, "MEDIUM": 2, "LOW": 3, "INFO": 4}
|
||||
|
||||
|
||||
def build_scorecard(check_results: list[dict]) -> dict:
|
||||
"""Aggregate per-regulation pass/fail/skip + severity buckets.
|
||||
|
||||
Args:
|
||||
check_results: list of dicts, each typically a CheckItem-like
|
||||
record with keys: id, label, passed, severity, skipped,
|
||||
regulation, doc_type.
|
||||
|
||||
Returns:
|
||||
{
|
||||
"by_regulation": [
|
||||
{"regulation": "DSGVO", "total": 193, "passed": 167,
|
||||
"failed": 24, "skipped": 2, "pct": 87,
|
||||
"severity": {"HIGH": 22, "MEDIUM": 2}}
|
||||
],
|
||||
"totals": {"total": 1874, "passed": 1300, "failed": 540,
|
||||
"skipped": 34, "pct": 70},
|
||||
}
|
||||
"""
|
||||
buckets: dict[str, dict] = defaultdict(
|
||||
lambda: {"total": 0, "passed": 0, "failed": 0, "skipped": 0,
|
||||
"severity": defaultdict(int)},
|
||||
)
|
||||
for r in check_results or []:
|
||||
reg = (r.get("regulation") or "—").strip() or "—"
|
||||
b = buckets[reg]
|
||||
b["total"] += 1
|
||||
if r.get("skipped"):
|
||||
b["skipped"] += 1
|
||||
elif r.get("passed"):
|
||||
b["passed"] += 1
|
||||
else:
|
||||
b["failed"] += 1
|
||||
sev = (r.get("severity") or "MEDIUM").upper()
|
||||
b["severity"][sev] += 1
|
||||
|
||||
rows = []
|
||||
grand_total = grand_passed = grand_failed = grand_skipped = 0
|
||||
for reg, b in buckets.items():
|
||||
# Convert defaultdict for serialisability
|
||||
sev_dict = dict(b["severity"])
|
||||
active = b["total"] - b["skipped"]
|
||||
pct = round(b["passed"] / active * 100) if active else 0
|
||||
rows.append({
|
||||
"regulation": reg,
|
||||
"total": b["total"],
|
||||
"passed": b["passed"],
|
||||
"failed": b["failed"],
|
||||
"skipped": b["skipped"],
|
||||
"pct": pct,
|
||||
"severity": sev_dict,
|
||||
})
|
||||
grand_total += b["total"]
|
||||
grand_passed += b["passed"]
|
||||
grand_failed += b["failed"]
|
||||
grand_skipped += b["skipped"]
|
||||
|
||||
rows.sort(key=lambda r: (-r["failed"], r["regulation"]))
|
||||
|
||||
grand_active = grand_total - grand_skipped
|
||||
grand_pct = round(grand_passed / grand_active * 100) if grand_active else 0
|
||||
return {
|
||||
"by_regulation": rows,
|
||||
"totals": {
|
||||
"total": grand_total, "passed": grand_passed,
|
||||
"failed": grand_failed, "skipped": grand_skipped,
|
||||
"pct": grand_pct,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def top_fails(check_results: list[dict], n: int = 10) -> list[dict]:
|
||||
"""Return top-N failing MCs sorted by severity then label.
|
||||
|
||||
Skipped + passed MCs are excluded. INFO severity is excluded by
|
||||
default since those are guidance, not findings.
|
||||
"""
|
||||
fails = [
|
||||
r for r in (check_results or [])
|
||||
if not r.get("passed") and not r.get("skipped")
|
||||
and (r.get("severity") or "").upper() != "INFO"
|
||||
]
|
||||
fails.sort(key=lambda r: (
|
||||
_SEV_RANK.get((r.get("severity") or "MEDIUM").upper(), 5),
|
||||
r.get("label", ""),
|
||||
))
|
||||
return fails[:n]
|
||||
|
||||
|
||||
def full_audit_records(
|
||||
check_results: list[dict],
|
||||
check_id: str,
|
||||
tenant_id: str = "",
|
||||
doc_type: str = "",
|
||||
) -> list[dict]:
|
||||
"""Flatten check results into rows ready for SQLite persistence.
|
||||
|
||||
Returns one record per MC. Keeps the original fields plus
|
||||
check_id + doc_type + tenant_id + ts.
|
||||
"""
|
||||
ts = datetime.now(timezone.utc).isoformat()
|
||||
out: list[dict] = []
|
||||
for r in check_results or []:
|
||||
out.append({
|
||||
"check_id": check_id,
|
||||
"tenant_id": tenant_id,
|
||||
"doc_type": doc_type,
|
||||
"ts": ts,
|
||||
"mc_id": r.get("id", ""),
|
||||
"label": (r.get("label") or "")[:300],
|
||||
"passed": bool(r.get("passed")),
|
||||
"skipped": bool(r.get("skipped")),
|
||||
"severity": (r.get("severity") or "").upper(),
|
||||
"regulation": r.get("regulation") or "",
|
||||
"matched_text": (r.get("matched_text") or "")[:500],
|
||||
"hint": (r.get("hint") or "")[:500],
|
||||
"level": int(r.get("level") or 1),
|
||||
})
|
||||
return out
|
||||
@@ -171,6 +171,8 @@ def _check_mc_deterministic(text_lower: str, mc: dict) -> Optional[dict]:
|
||||
"hint": question if not passed else "",
|
||||
"source": "master_control",
|
||||
"criteria_met": f"{criteria_met}/{total_criteria}",
|
||||
"regulation": mc.get("regulation") or "",
|
||||
"article": mc.get("article") or "",
|
||||
}
|
||||
|
||||
|
||||
@@ -282,8 +284,8 @@ async def _load_controls(doc_type: str, db_url: str, limit: int) -> list[dict]:
|
||||
return []
|
||||
|
||||
try:
|
||||
query = """SELECT id, control_id, title, regulation, check_question,
|
||||
pass_criteria, fail_criteria, severity
|
||||
query = """SELECT id, control_id, title, regulation, article,
|
||||
check_question, pass_criteria, fail_criteria, severity
|
||||
FROM compliance.doc_check_controls
|
||||
WHERE doc_type = $1
|
||||
ORDER BY severity DESC, title"""
|
||||
|
||||
Reference in New Issue
Block a user