Files
breakpilot-compliance/backend-compliance/compliance/services/compliance_audit_log.py
T
Benjamin Admin 6f16507c5f
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m54s
CI / test-go (push) Has been skipped
CI / detect-changes (push) Successful in 10s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / validate-canonical-controls (push) Successful in 17s
CI / loc-budget (push) Successful in 17s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 43s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
feat(banner): P19 + P20 — Per-Category-Click-Test + Frontend-Drilldown
P19 (consent-tester):
- dp-cookieconsent (TYPO3, Safetykon-Pattern) als CMP-Profil hinzu —
  Selektoren #dp--cookie-statistics/marketing + a.cc-allow Save-Button
- Neues Signal provider_details_visible: nach Kategorie-Toggle prueft
  Playwright ob im Banner sichtbare Provider-/Cookie-Detail-Elemente
  erscheinen. Bei dp-cookieconsent (Banner ohne Listing) immer False
  -> HIGH-Violation "Kategorie zeigt keine Provider-/Cookie-Details —
  Nutzer kann nicht informiert einwilligen (Art. 7 Abs. 1 DSGVO)"
- main.py serialisiert provider_details_visible + cookies_set pro Kategorie

P20 (Frontend-Drilldown):
- Backend: check_payloads-Tabelle um Spalte 'banner' (JSON) — voller
  banner_result persistiert (vorher nur in-memory). ALTER TABLE
  Migration idempotent.
- Neuer Endpoint GET /api/compliance/agent/banner/<check_id> — liefert
  Quality-Score, Phases, Category-Tests, Banner-Checks, alle 46
  structured_checks.
- Frontend: BannerTab im /sdk/agent/audit/<id> mit Quality-Cards,
  3-Phasen-Cookie-Tabelle, Per-Category-Listing (mit P19-Signal
  rot/gruen), Banner-Verstoesse + Rechtsgrundlagen, 46-Check-Drilldown
  filterbar nach Severity.
- Tab-Switcher in page.tsx um "Cookie-Banner-Analyse" erweitert.
- Bonus: 2 alte route.ts auf Next.js 15 Promise-params umgestellt
  (Build-Fix).

Plus: Critical-Findings-Block nutzt provider_details_visible als
primaeres Signal statt nur tracking_services-Anzahl.

Smoke-Test Safetykon: 4 Critical Findings im Mail, banner-Endpoint
liefert 46 checks + 3 phases + 2 categories mit provider_details_visible=False.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-19 14:31:13 +02:00

260 lines
9.6 KiB
Python

"""
Compliance-Check Audit Log — sidecar SQLite persistence.
Every compliance-check run flattens its MC results into rows here so
we have:
- per-tenant history of scorecards (Task A6 trend view)
- drill-down on individual MCs for the admin frontend (Task A5)
- export-ability (DSB receives JSON attachment derived from this)
Sidecar SQLite (`/data/compliance_audits.db`) instead of a new table in
the compliance schema, because the repo policy forbids new migrations
without explicit DB-owner sign-off (see CLAUDE.md guardrails).
"""
from __future__ import annotations
import json
import logging
import os
import sqlite3
from datetime import datetime, timezone
from pathlib import Path
logger = logging.getLogger(__name__)
DB_PATH = os.getenv("COMPLIANCE_AUDIT_DB", "/data/compliance_audits.db")
def _ensure_db() -> None:
Path(DB_PATH).parent.mkdir(parents=True, exist_ok=True)
with sqlite3.connect(DB_PATH) as conn:
conn.executescript("""
CREATE TABLE IF NOT EXISTS check_runs (
check_id TEXT PRIMARY KEY,
ts TEXT NOT NULL,
tenant_id TEXT,
site_name TEXT,
base_domain TEXT,
doc_count INTEGER,
scorecard TEXT, -- JSON {by_regulation, totals}
vvt_summary TEXT -- JSON {total, internal, external, critical}
);
CREATE INDEX IF NOT EXISTS idx_runs_tenant ON check_runs(tenant_id, ts);
CREATE INDEX IF NOT EXISTS idx_runs_domain ON check_runs(base_domain, ts);
CREATE TABLE IF NOT EXISTS mc_results (
id INTEGER PRIMARY KEY AUTOINCREMENT,
check_id TEXT NOT NULL,
doc_type TEXT,
mc_id TEXT,
label TEXT,
passed INTEGER,
skipped INTEGER,
severity TEXT,
regulation TEXT,
matched_text TEXT,
hint TEXT
);
CREATE INDEX IF NOT EXISTS idx_mc_check ON mc_results(check_id);
CREATE INDEX IF NOT EXISTS idx_mc_reg ON mc_results(regulation, passed);
-- Migration-source payloads (cmp_vendors + extracted_profile),
-- kept as JSON blobs so the /migration/* endpoints can rebuild
-- a banner config or document pre-fill after the in-memory
-- _compliance_check_jobs entry is gone.
CREATE TABLE IF NOT EXISTS check_payloads (
check_id TEXT PRIMARY KEY,
vendors TEXT, -- JSON list[dict]
profile TEXT, -- JSON dict
banner TEXT -- P20: JSON dict — full banner_result
);
""")
# P20 migration: spalte 'banner' nachtraeglich anlegen wenn alt
try:
conn.execute("ALTER TABLE check_payloads ADD COLUMN banner TEXT")
except sqlite3.OperationalError:
pass
def record_check_payload(
check_id: str,
vendors: list[dict] | None,
profile: dict | None,
banner: dict | None = None,
) -> None:
"""Persist cmp_vendors + extracted_profile + banner_result (P20)."""
try:
_ensure_db()
with sqlite3.connect(DB_PATH) as conn:
conn.execute(
"INSERT OR REPLACE INTO check_payloads "
"(check_id, vendors, profile, banner) VALUES (?, ?, ?, ?)",
(
check_id,
json.dumps(vendors or [], ensure_ascii=False),
json.dumps(profile or {}, ensure_ascii=False),
json.dumps(banner or {}, ensure_ascii=False) if banner else None,
),
)
conn.commit()
except Exception as e:
logger.warning("record_check_payload failed for %s: %s", check_id, e)
def get_check_payload(check_id: str) -> dict | None:
"""Load cmp_vendors + extracted_profile + banner_result for a previous check."""
try:
_ensure_db()
with sqlite3.connect(DB_PATH) as conn:
conn.row_factory = sqlite3.Row
row = conn.execute(
"SELECT vendors, profile, banner FROM check_payloads WHERE check_id=?",
(check_id,),
).fetchone()
if not row:
return None
return {
"vendors": json.loads(row["vendors"] or "[]"),
"profile": json.loads(row["profile"] or "{}"),
"banner": json.loads(row["banner"]) if row["banner"] else None,
}
except Exception as e:
logger.warning("get_check_payload failed: %s", e)
return None
def record_check_run(
check_id: str,
tenant_id: str,
site_name: str,
base_domain: str,
doc_count: int,
scorecard: dict,
vvt_summary: dict | None = None,
mc_records: list[dict] | None = None,
) -> None:
"""Persist one check run + all its MC rows. Idempotent on check_id."""
try:
_ensure_db()
ts = datetime.now(timezone.utc).isoformat()
with sqlite3.connect(DB_PATH) as conn:
conn.execute(
"INSERT OR REPLACE INTO check_runs "
"(check_id, ts, tenant_id, site_name, base_domain, doc_count, "
" scorecard, vvt_summary) VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
(
check_id, ts, tenant_id, site_name, base_domain, doc_count,
json.dumps(scorecard, ensure_ascii=False),
json.dumps(vvt_summary or {}, ensure_ascii=False),
),
)
# Clear old rows for the same check_id before re-inserting (idempotency)
conn.execute("DELETE FROM mc_results WHERE check_id=?", (check_id,))
if mc_records:
conn.executemany(
"INSERT INTO mc_results "
"(check_id, doc_type, mc_id, label, passed, skipped, "
" severity, regulation, matched_text, hint) "
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
[
(
r.get("check_id", check_id),
r.get("doc_type", ""),
r.get("mc_id", ""),
(r.get("label") or "")[:300],
1 if r.get("passed") else 0,
1 if r.get("skipped") else 0,
(r.get("severity") or "").upper(),
r.get("regulation") or "",
(r.get("matched_text") or "")[:500],
(r.get("hint") or "")[:500],
)
for r in mc_records
],
)
conn.commit()
logger.info("Audit recorded: check_id=%s mc_rows=%d",
check_id, len(mc_records or []))
except Exception as e:
logger.warning("Audit persistence failed for %s: %s", check_id, e)
# ── Read API (used by the admin endpoints + trend view) ─────────────
def get_check_run(check_id: str) -> dict | None:
try:
_ensure_db()
with sqlite3.connect(DB_PATH) as conn:
conn.row_factory = sqlite3.Row
row = conn.execute(
"SELECT * FROM check_runs WHERE check_id=?", (check_id,),
).fetchone()
if not row:
return None
d = dict(row)
d["scorecard"] = json.loads(d.get("scorecard") or "{}")
d["vvt_summary"] = json.loads(d.get("vvt_summary") or "{}")
return d
except Exception as e:
logger.warning("get_check_run failed: %s", e)
return None
def list_mc_results(
check_id: str,
doc_type: str | None = None,
regulation: str | None = None,
only_failed: bool = False,
) -> list[dict]:
try:
_ensure_db()
where = ["check_id = ?"]
params: list = [check_id]
if doc_type:
where.append("doc_type = ?")
params.append(doc_type)
if regulation:
where.append("regulation = ?")
params.append(regulation)
if only_failed:
where.append("passed = 0 AND skipped = 0")
sql = ("SELECT * FROM mc_results WHERE " + " AND ".join(where)
+ " ORDER BY severity, label")
with sqlite3.connect(DB_PATH) as conn:
conn.row_factory = sqlite3.Row
rows = conn.execute(sql, params).fetchall()
return [dict(r) for r in rows]
except Exception as e:
logger.warning("list_mc_results failed: %s", e)
return []
def list_runs_for_tenant(
tenant_id: str,
base_domain: str | None = None,
limit: int = 30,
) -> list[dict]:
try:
_ensure_db()
where = ["tenant_id = ?"]
params: list = [tenant_id]
if base_domain:
where.append("base_domain = ?")
params.append(base_domain)
sql = ("SELECT * FROM check_runs WHERE " + " AND ".join(where)
+ " ORDER BY ts DESC LIMIT ?")
params.append(limit)
with sqlite3.connect(DB_PATH) as conn:
conn.row_factory = sqlite3.Row
rows = conn.execute(sql, params).fetchall()
out = []
for r in rows:
d = dict(r)
d["scorecard"] = json.loads(d.get("scorecard") or "{}")
out.append(d)
return out
except Exception as e:
logger.warning("list_runs_for_tenant failed: %s", e)
return []