Files
breakpilot-compliance/backend-compliance/compliance/services/compliance_audit_log.py
T
Benjamin Admin df7d83134b feat(agent): migrate compliance-check results to banner + documents (M1-M5)
After a compliance-check run finishes, the user can now apply the
extracted vendor inventory directly to their own:

  - CookieBanner config (admin /sdk/einwilligungen)
  - Cookie-Policy / VVT-Register / Privacy-Policy templates
    (admin /sdk/document-generator)

Backend:
  - migration_to_banner.py: vendor list -> CookieBannerConfig with
    ESSENTIAL/PERFORMANCE/PERSONALIZATION/EXTERNAL_MEDIA buckets +
    review flags (broken opt-out URLs, missing expiry, no cookies listed)
  - migration_to_document.py: vendor list -> pre-fills for 3 doc
    templates, recipient-type aware (INTERNAL/GROUP/PROCESSOR/CONTROLLER)
  - agent_migration_routes.py: GET /banner-preview, /document-preview,
    /summary keyed on check_id
  - compliance_audit_log: new check_payloads table persists cmp_vendors +
    extracted_profile so the preview survives an app restart
  - tests: 9 mapper units + 4 endpoint integration tests

Frontend:
  - MigrationPanel.tsx: modal showing banner-config diff + document
    pre-fills, plus links into the existing editors
  - ComplianceCheckTab.tsx: replaces standalone audit link with the
    panel; net -3 lines, stays at the 500-cap

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-17 14:06:28 +02:00

251 lines
9.0 KiB
Python

"""
Compliance-Check Audit Log — sidecar SQLite persistence.
Every compliance-check run flattens its MC results into rows here so
we have:
- per-tenant history of scorecards (Task A6 trend view)
- drill-down on individual MCs for the admin frontend (Task A5)
- export-ability (DSB receives JSON attachment derived from this)
Sidecar SQLite (`/data/compliance_audits.db`) instead of a new table in
the compliance schema, because the repo policy forbids new migrations
without explicit DB-owner sign-off (see CLAUDE.md guardrails).
"""
from __future__ import annotations
import json
import logging
import os
import sqlite3
from datetime import datetime, timezone
from pathlib import Path
logger = logging.getLogger(__name__)
DB_PATH = os.getenv("COMPLIANCE_AUDIT_DB", "/data/compliance_audits.db")
def _ensure_db() -> None:
Path(DB_PATH).parent.mkdir(parents=True, exist_ok=True)
with sqlite3.connect(DB_PATH) as conn:
conn.executescript("""
CREATE TABLE IF NOT EXISTS check_runs (
check_id TEXT PRIMARY KEY,
ts TEXT NOT NULL,
tenant_id TEXT,
site_name TEXT,
base_domain TEXT,
doc_count INTEGER,
scorecard TEXT, -- JSON {by_regulation, totals}
vvt_summary TEXT -- JSON {total, internal, external, critical}
);
CREATE INDEX IF NOT EXISTS idx_runs_tenant ON check_runs(tenant_id, ts);
CREATE INDEX IF NOT EXISTS idx_runs_domain ON check_runs(base_domain, ts);
CREATE TABLE IF NOT EXISTS mc_results (
id INTEGER PRIMARY KEY AUTOINCREMENT,
check_id TEXT NOT NULL,
doc_type TEXT,
mc_id TEXT,
label TEXT,
passed INTEGER,
skipped INTEGER,
severity TEXT,
regulation TEXT,
matched_text TEXT,
hint TEXT
);
CREATE INDEX IF NOT EXISTS idx_mc_check ON mc_results(check_id);
CREATE INDEX IF NOT EXISTS idx_mc_reg ON mc_results(regulation, passed);
-- Migration-source payloads (cmp_vendors + extracted_profile),
-- kept as JSON blobs so the /migration/* endpoints can rebuild
-- a banner config or document pre-fill after the in-memory
-- _compliance_check_jobs entry is gone.
CREATE TABLE IF NOT EXISTS check_payloads (
check_id TEXT PRIMARY KEY,
vendors TEXT, -- JSON list[dict]
profile TEXT -- JSON dict
);
""")
def record_check_payload(
check_id: str,
vendors: list[dict] | None,
profile: dict | None,
) -> None:
"""Persist cmp_vendors + extracted_profile for later migration use."""
try:
_ensure_db()
with sqlite3.connect(DB_PATH) as conn:
conn.execute(
"INSERT OR REPLACE INTO check_payloads "
"(check_id, vendors, profile) VALUES (?, ?, ?)",
(
check_id,
json.dumps(vendors or [], ensure_ascii=False),
json.dumps(profile or {}, ensure_ascii=False),
),
)
conn.commit()
except Exception as e:
logger.warning("record_check_payload failed for %s: %s", check_id, e)
def get_check_payload(check_id: str) -> dict | None:
"""Load cmp_vendors + extracted_profile for a previous check."""
try:
_ensure_db()
with sqlite3.connect(DB_PATH) as conn:
conn.row_factory = sqlite3.Row
row = conn.execute(
"SELECT vendors, profile FROM check_payloads WHERE check_id=?",
(check_id,),
).fetchone()
if not row:
return None
return {
"vendors": json.loads(row["vendors"] or "[]"),
"profile": json.loads(row["profile"] or "{}"),
}
except Exception as e:
logger.warning("get_check_payload failed: %s", e)
return None
def record_check_run(
check_id: str,
tenant_id: str,
site_name: str,
base_domain: str,
doc_count: int,
scorecard: dict,
vvt_summary: dict | None = None,
mc_records: list[dict] | None = None,
) -> None:
"""Persist one check run + all its MC rows. Idempotent on check_id."""
try:
_ensure_db()
ts = datetime.now(timezone.utc).isoformat()
with sqlite3.connect(DB_PATH) as conn:
conn.execute(
"INSERT OR REPLACE INTO check_runs "
"(check_id, ts, tenant_id, site_name, base_domain, doc_count, "
" scorecard, vvt_summary) VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
(
check_id, ts, tenant_id, site_name, base_domain, doc_count,
json.dumps(scorecard, ensure_ascii=False),
json.dumps(vvt_summary or {}, ensure_ascii=False),
),
)
# Clear old rows for the same check_id before re-inserting (idempotency)
conn.execute("DELETE FROM mc_results WHERE check_id=?", (check_id,))
if mc_records:
conn.executemany(
"INSERT INTO mc_results "
"(check_id, doc_type, mc_id, label, passed, skipped, "
" severity, regulation, matched_text, hint) "
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
[
(
r.get("check_id", check_id),
r.get("doc_type", ""),
r.get("mc_id", ""),
(r.get("label") or "")[:300],
1 if r.get("passed") else 0,
1 if r.get("skipped") else 0,
(r.get("severity") or "").upper(),
r.get("regulation") or "",
(r.get("matched_text") or "")[:500],
(r.get("hint") or "")[:500],
)
for r in mc_records
],
)
conn.commit()
logger.info("Audit recorded: check_id=%s mc_rows=%d",
check_id, len(mc_records or []))
except Exception as e:
logger.warning("Audit persistence failed for %s: %s", check_id, e)
# ── Read API (used by the admin endpoints + trend view) ─────────────
def get_check_run(check_id: str) -> dict | None:
try:
_ensure_db()
with sqlite3.connect(DB_PATH) as conn:
conn.row_factory = sqlite3.Row
row = conn.execute(
"SELECT * FROM check_runs WHERE check_id=?", (check_id,),
).fetchone()
if not row:
return None
d = dict(row)
d["scorecard"] = json.loads(d.get("scorecard") or "{}")
d["vvt_summary"] = json.loads(d.get("vvt_summary") or "{}")
return d
except Exception as e:
logger.warning("get_check_run failed: %s", e)
return None
def list_mc_results(
check_id: str,
doc_type: str | None = None,
regulation: str | None = None,
only_failed: bool = False,
) -> list[dict]:
try:
_ensure_db()
where = ["check_id = ?"]
params: list = [check_id]
if doc_type:
where.append("doc_type = ?")
params.append(doc_type)
if regulation:
where.append("regulation = ?")
params.append(regulation)
if only_failed:
where.append("passed = 0 AND skipped = 0")
sql = ("SELECT * FROM mc_results WHERE " + " AND ".join(where)
+ " ORDER BY severity, label")
with sqlite3.connect(DB_PATH) as conn:
conn.row_factory = sqlite3.Row
rows = conn.execute(sql, params).fetchall()
return [dict(r) for r in rows]
except Exception as e:
logger.warning("list_mc_results failed: %s", e)
return []
def list_runs_for_tenant(
tenant_id: str,
base_domain: str | None = None,
limit: int = 30,
) -> list[dict]:
try:
_ensure_db()
where = ["tenant_id = ?"]
params: list = [tenant_id]
if base_domain:
where.append("base_domain = ?")
params.append(base_domain)
sql = ("SELECT * FROM check_runs WHERE " + " AND ".join(where)
+ " ORDER BY ts DESC LIMIT ?")
params.append(limit)
with sqlite3.connect(DB_PATH) as conn:
conn.row_factory = sqlite3.Row
rows = conn.execute(sql, params).fetchall()
out = []
for r in rows:
d = dict(r)
d["scorecard"] = json.loads(d.get("scorecard") or "{}")
out.append(d)
return out
except Exception as e:
logger.warning("list_runs_for_tenant failed: %s", e)
return []