feat(agent): MC scorecard + audit drill-down + tenant trend (A1-A6)

Now that all 1874 MCs run per check (Task #30 cap removal), the report
was about to drown in noise. This commit adds the full aggregation /
persistence / drill-down stack so each MC is actionable, not just
counted.

A1 mc_scorecard.py (new):
  build_scorecard(checks)    -> per-regulation PASS/FAIL/SKIP + severity
  top_fails(checks, n)       -> N most severe failed MCs
  full_audit_records(...)    -> flat rows ready for sidecar SQLite

A2 Email rendering:
  agent_doc_check_scorecard.py (new) builds an HTML scorecard table
  (regulation × passed/failed/HIGH/MEDIUM/score) shown at the top of
  the email. agent_doc_check_report._render_document now collapses
  the 500-MC L2 forest into 'X/Y bestanden (Z Fail)' summary plus
  a top-10 fails block per doc — old verbose render is gone.

A3 compliance_audit_log.py (new) — sidecar SQLite at
  /data/compliance_audits.db (separate from compliance Postgres
  schema to comply with the no-new-migrations rule in CLAUDE.md):
    check_runs(check_id, ts, tenant_id, site_name, base_domain,
               doc_count, scorecard json, vvt_summary json)
    mc_results(check_id, doc_type, mc_id, label, passed, skipped,
               severity, regulation, matched_text, hint)
  Route persists every run after the email is sent.
  docker-compose.yml adds compliance-audit volume + env.

A4 backfill_mc_regulation_llm.py (new) — Qwen-tagged backfill for
  the 1636 MCs the regex pass couldn't classify. Batches of 25,
  format=json, output constrained to the canonical regulation list.
  Run manually: docker exec bp-compliance-backend python3 \
                 /app/scripts/backfill_mc_regulation_llm.py [--dry-run]

A5 Admin audit tab — GET /api/compliance/agent/audit/<check_id>
  proxied via /api/sdk/v1/agent/audit/<id>. New page
  /sdk/agent/audit/[checkId] renders scorecard + filterable MC table
  (status / doc_type / regulation, expandable rows with matched_text
  + hint). ComplianceCheckTab now shows 'Voll-Audit oeffnen' link.

A6 Trend per tenant — GET /api/compliance/agent/audit/tenant/<id>
  returns recent runs. Email scorecard shows per-regulation delta
  badges ('(+12%)', '(-3%)') compared with the previous run for the
  same tenant + base_domain. Lookup is one SQLite query.

Plumbing:
  rag_document_checker.py — SELECT now includes 'article'; MC results
    carry 'regulation' + 'article' through to CheckItem.
  agent_doc_check_routes.CheckItem schema gains regulation + article
    fields (defaults '') so old clients still parse.
  agent_compliance_check_routes — response gains 'check_id' so the
    frontend can build the audit link.
This commit is contained in:
Benjamin Admin
2026-05-17 13:45:58 +02:00
parent 6d29191e9b
commit 6ed30dae5b
12 changed files with 1159 additions and 10 deletions
@@ -428,10 +428,50 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
scanned_html = build_scanned_urls_html(doc_entries)
providers_html = build_provider_list_html(banner_result, vvt_entries)
vvt_html = build_vvt_table_html(cmp_vendors)
# MC scorecard aggregated across ALL docs in this run (DSGVO/TDDDG/
# BGB/...). Sits at the top so the GF sees the regulation-by-
# regulation view before drilling into per-doc details.
from compliance.services.mc_scorecard import build_scorecard
from .agent_doc_check_scorecard import build_scorecard_html
all_mc_checks: list[dict] = []
for r in results:
for c in r.checks:
if c.id.startswith("mc-"):
all_mc_checks.append({
"id": c.id, "label": c.label, "passed": c.passed,
"severity": c.severity, "skipped": c.skipped,
"regulation": c.regulation,
})
scorecard = build_scorecard(all_mc_checks) if all_mc_checks else {}
# Trend: load previous scorecard for the same tenant + domain so the
# email can show delta indicators (A6).
prev_scorecard: dict | None = None
if scorecard:
try:
from compliance.services.compliance_audit_log import (
list_runs_for_tenant,
)
tenant_id_for_trend = req.recipient or ""
base_domain_for_trend = _extract_domain(doc_entries) or ""
prev_runs = list_runs_for_tenant(
tenant_id_for_trend,
base_domain=base_domain_for_trend,
limit=1,
)
if prev_runs:
prev_scorecard = prev_runs[0].get("scorecard")
except Exception as e:
logger.debug("trend lookup skipped: %s", e)
scorecard_html = (
build_scorecard_html(scorecard, previous_scorecard=prev_scorecard)
if scorecard else ""
)
report_html = build_html_report(results, None)
profile_html = _build_profile_html(profile)
full_html = (
summary_html + scanned_html + profile_html
summary_html + scanned_html + profile_html + scorecard_html
+ providers_html + vvt_html + report_html
)
@@ -452,6 +492,7 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
# Step 7: Store result
response = {
"check_id": check_id,
"results": [_result_to_dict(r) for r in results],
"business_profile": profile_dict,
"extracted_profile": extracted_profile,
@@ -474,6 +515,45 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
_compliance_check_jobs[check_id]["progress"] = "Fertig"
_compliance_check_jobs[check_id]["progress_pct"] = 100
# Persist to sidecar SQLite audit log — enables /audit endpoints
# (A5 admin tab) and trend view (A6). Best-effort; failures here
# do not affect the user-facing response.
try:
from compliance.services.compliance_audit_log import record_check_run
from compliance.services.mc_scorecard import full_audit_records
audit_rows: list[dict] = []
for r in results:
doc_mc = [c for c in r.checks if c.id.startswith("mc-")]
audit_rows.extend(full_audit_records(
[{"id": c.id, "label": c.label, "passed": c.passed,
"severity": c.severity, "skipped": c.skipped,
"regulation": c.regulation, "matched_text": c.matched_text,
"hint": c.hint, "level": c.level}
for c in doc_mc],
check_id=check_id,
doc_type=r.doc_type,
))
record_check_run(
check_id=check_id,
tenant_id=req.recipient or "",
site_name=site_name,
base_domain=domain or "",
doc_count=doc_count,
scorecard=scorecard,
vvt_summary={
"total": len(cmp_vendors),
"internal": sum(1 for v in cmp_vendors
if (v.get("recipient_type") or "").upper()
in ("INTERNAL", "GROUP_COMPANY")),
"external": sum(1 for v in cmp_vendors
if (v.get("recipient_type") or "").upper()
in ("PROCESSOR", "CONTROLLER")),
},
mc_records=audit_rows,
)
except Exception as e:
logger.warning("Audit persistence skipped: %s", e)
except Exception as e:
logger.error("Compliance check %s failed: %s", check_id, e, exc_info=True)
_compliance_check_jobs[check_id]["status"] = "failed"
@@ -1060,3 +1140,51 @@ def _build_profile_html(profile) -> str:
# Cross-check extracted to compliance.services.banner_cookie_cross_check
from compliance.services.banner_cookie_cross_check import cross_check_banner_vs_cookie as _cross_check_banner_vs_cookie
# ── Admin: audit drill-down (A5) + trend view (A6) ──────────────────
@router.get("/audit/{check_id}")
async def audit_drill_down(
check_id: str,
doc_type: str = "",
regulation: str = "",
only_failed: bool = False,
):
"""Return scorecard + filterable MC results for a single check run.
Frontend uses this to render the /sdk/agent/audit/<check_id> view.
"""
from compliance.services.compliance_audit_log import (
get_check_run, list_mc_results,
)
run = get_check_run(check_id)
if not run:
return {"check_id": check_id, "found": False}
rows = list_mc_results(
check_id,
doc_type=doc_type or None,
regulation=regulation or None,
only_failed=only_failed,
)
return {
"check_id": check_id,
"found": True,
"run": run,
"mc_count": len(rows),
"results": rows,
}
@router.get("/audit/tenant/{tenant_id}")
async def audit_tenant_history(
tenant_id: str,
base_domain: str = "",
limit: int = 30,
):
"""Tenant-level history for the trend view (A6)."""
from compliance.services.compliance_audit_log import list_runs_for_tenant
runs = list_runs_for_tenant(
tenant_id, base_domain=base_domain or None, limit=limit,
)
return {"tenant_id": tenant_id, "count": len(runs), "runs": runs}