feat(agent): MC scorecard + audit drill-down + tenant trend (A1-A6)

Now that all 1874 MCs run per check (Task #30 cap removal), the report was about to drown in noise. This commit adds the full aggregation / persistence / drill-down stack so each MC is actionable, not just counted. A1 mc_scorecard.py (new): build_scorecard(checks) -> per-regulation PASS/FAIL/SKIP + severity top_fails(checks, n) -> N most severe failed MCs full_audit_records(...) -> flat rows ready for sidecar SQLite A2 Email rendering: agent_doc_check_scorecard.py (new) builds an HTML scorecard table (regulation × passed/failed/HIGH/MEDIUM/score) shown at the top of the email. agent_doc_check_report._render_document now collapses the 500-MC L2 forest into 'X/Y bestanden (Z Fail)' summary plus a top-10 fails block per doc — old verbose render is gone. A3 compliance_audit_log.py (new) — sidecar SQLite at /data/compliance_audits.db (separate from compliance Postgres schema to comply with the no-new-migrations rule in CLAUDE.md): check_runs(check_id, ts, tenant_id, site_name, base_domain, doc_count, scorecard json, vvt_summary json) mc_results(check_id, doc_type, mc_id, label, passed, skipped, severity, regulation, matched_text, hint) Route persists every run after the email is sent. docker-compose.yml adds compliance-audit volume + env. A4 backfill_mc_regulation_llm.py (new) — Qwen-tagged backfill for the 1636 MCs the regex pass couldn't classify. Batches of 25, format=json, output constrained to the canonical regulation list. Run manually: docker exec bp-compliance-backend python3 \ /app/scripts/backfill_mc_regulation_llm.py [--dry-run] A5 Admin audit tab — GET /api/compliance/agent/audit/<check_id> proxied via /api/sdk/v1/agent/audit/<id>. New page /sdk/agent/audit/[checkId] renders scorecard + filterable MC table (status / doc_type / regulation, expandable rows with matched_text + hint). ComplianceCheckTab now shows 'Voll-Audit oeffnen' link. A6 Trend per tenant — GET /api/compliance/agent/audit/tenant/<id> returns recent runs. Email scorecard shows per-regulation delta badges ('(+12%)', '(-3%)') compared with the previous run for the same tenant + base_domain. Lookup is one SQLite query. Plumbing: rag_document_checker.py — SELECT now includes 'article'; MC results carry 'regulation' + 'article' through to CheckItem. agent_doc_check_routes.CheckItem schema gains regulation + article fields (defaults '') so old clients still parse. agent_compliance_check_routes — response gains 'check_id' so the frontend can build the audit link.
2026-05-17 13:45:58 +02:00
parent 6d29191e9b
commit 6ed30dae5b
12 changed files with 1159 additions and 10 deletions
@@ -428,10 +428,50 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
        scanned_html = build_scanned_urls_html(doc_entries)
        providers_html = build_provider_list_html(banner_result, vvt_entries)
        vvt_html = build_vvt_table_html(cmp_vendors)
+
+        # MC scorecard aggregated across ALL docs in this run (DSGVO/TDDDG/
+        # BGB/...). Sits at the top so the GF sees the regulation-by-
+        # regulation view before drilling into per-doc details.
+        from compliance.services.mc_scorecard import build_scorecard
+        from .agent_doc_check_scorecard import build_scorecard_html
+        all_mc_checks: list[dict] = []
+        for r in results:
+            for c in r.checks:
+                if c.id.startswith("mc-"):
+                    all_mc_checks.append({
+                        "id": c.id, "label": c.label, "passed": c.passed,
+                        "severity": c.severity, "skipped": c.skipped,
+                        "regulation": c.regulation,
+                    })
+        scorecard = build_scorecard(all_mc_checks) if all_mc_checks else {}
+        # Trend: load previous scorecard for the same tenant + domain so the
+        # email can show delta indicators (A6).
+        prev_scorecard: dict | None = None
+        if scorecard:
+            try:
+                from compliance.services.compliance_audit_log import (
+                    list_runs_for_tenant,
+                )
+                tenant_id_for_trend = req.recipient or ""
+                base_domain_for_trend = _extract_domain(doc_entries) or ""
+                prev_runs = list_runs_for_tenant(
+                    tenant_id_for_trend,
+                    base_domain=base_domain_for_trend,
+                    limit=1,
+                )
+                if prev_runs:
+                    prev_scorecard = prev_runs[0].get("scorecard")
+            except Exception as e:
+                logger.debug("trend lookup skipped: %s", e)
+        scorecard_html = (
+            build_scorecard_html(scorecard, previous_scorecard=prev_scorecard)
+            if scorecard else ""
+        )
+
        report_html = build_html_report(results, None)
        profile_html = _build_profile_html(profile)
        full_html = (
-            summary_html + scanned_html + profile_html
+            summary_html + scanned_html + profile_html + scorecard_html
            + providers_html + vvt_html + report_html
        )

@@ -452,6 +492,7 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):

        # Step 7: Store result
        response = {
+            "check_id": check_id,
            "results": [_result_to_dict(r) for r in results],
            "business_profile": profile_dict,
            "extracted_profile": extracted_profile,
@@ -474,6 +515,45 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
        _compliance_check_jobs[check_id]["progress"] = "Fertig"
        _compliance_check_jobs[check_id]["progress_pct"] = 100

+        # Persist to sidecar SQLite audit log — enables /audit endpoints
+        # (A5 admin tab) and trend view (A6). Best-effort; failures here
+        # do not affect the user-facing response.
+        try:
+            from compliance.services.compliance_audit_log import record_check_run
+            from compliance.services.mc_scorecard import full_audit_records
+            audit_rows: list[dict] = []
+            for r in results:
+                doc_mc = [c for c in r.checks if c.id.startswith("mc-")]
+                audit_rows.extend(full_audit_records(
+                    [{"id": c.id, "label": c.label, "passed": c.passed,
+                      "severity": c.severity, "skipped": c.skipped,
+                      "regulation": c.regulation, "matched_text": c.matched_text,
+                      "hint": c.hint, "level": c.level}
+                     for c in doc_mc],
+                    check_id=check_id,
+                    doc_type=r.doc_type,
+                ))
+            record_check_run(
+                check_id=check_id,
+                tenant_id=req.recipient or "",
+                site_name=site_name,
+                base_domain=domain or "",
+                doc_count=doc_count,
+                scorecard=scorecard,
+                vvt_summary={
+                    "total": len(cmp_vendors),
+                    "internal": sum(1 for v in cmp_vendors
+                                    if (v.get("recipient_type") or "").upper()
+                                    in ("INTERNAL", "GROUP_COMPANY")),
+                    "external": sum(1 for v in cmp_vendors
+                                    if (v.get("recipient_type") or "").upper()
+                                    in ("PROCESSOR", "CONTROLLER")),
+                },
+                mc_records=audit_rows,
+            )
+        except Exception as e:
+            logger.warning("Audit persistence skipped: %s", e)
+
    except Exception as e:
        logger.error("Compliance check %s failed: %s", check_id, e, exc_info=True)
        _compliance_check_jobs[check_id]["status"] = "failed"
@@ -1060,3 +1140,51 @@ def _build_profile_html(profile) -> str:

 # Cross-check extracted to compliance.services.banner_cookie_cross_check
 from compliance.services.banner_cookie_cross_check import cross_check_banner_vs_cookie as _cross_check_banner_vs_cookie
+
+
+# ── Admin: audit drill-down (A5) + trend view (A6) ──────────────────
+
+@router.get("/audit/{check_id}")
+async def audit_drill_down(
+    check_id: str,
+    doc_type: str = "",
+    regulation: str = "",
+    only_failed: bool = False,
+):
+    """Return scorecard + filterable MC results for a single check run.
+
+    Frontend uses this to render the /sdk/agent/audit/<check_id> view.
+    """
+    from compliance.services.compliance_audit_log import (
+        get_check_run, list_mc_results,
+    )
+    run = get_check_run(check_id)
+    if not run:
+        return {"check_id": check_id, "found": False}
+    rows = list_mc_results(
+        check_id,
+        doc_type=doc_type or None,
+        regulation=regulation or None,
+        only_failed=only_failed,
+    )
+    return {
+        "check_id": check_id,
+        "found": True,
+        "run": run,
+        "mc_count": len(rows),
+        "results": rows,
+    }
+
+
+@router.get("/audit/tenant/{tenant_id}")
+async def audit_tenant_history(
+    tenant_id: str,
+    base_domain: str = "",
+    limit: int = 30,
+):
+    """Tenant-level history for the trend view (A6)."""
+    from compliance.services.compliance_audit_log import list_runs_for_tenant
+    runs = list_runs_for_tenant(
+        tenant_id, base_domain=base_domain or None, limit=limit,
+    )
+    return {"tenant_id": tenant_id, "count": len(runs), "runs": runs}