""" Compliance-Check Audit Log — sidecar SQLite persistence. Every compliance-check run flattens its MC results into rows here so we have: - per-tenant history of scorecards (Task A6 trend view) - drill-down on individual MCs for the admin frontend (Task A5) - export-ability (DSB receives JSON attachment derived from this) Sidecar SQLite (`/data/compliance_audits.db`) instead of a new table in the compliance schema, because the repo policy forbids new migrations without explicit DB-owner sign-off (see CLAUDE.md guardrails). """ from __future__ import annotations import json import logging import os import sqlite3 from datetime import datetime, timezone from pathlib import Path logger = logging.getLogger(__name__) DB_PATH = os.getenv("COMPLIANCE_AUDIT_DB", "/data/compliance_audits.db") def _ensure_db() -> None: Path(DB_PATH).parent.mkdir(parents=True, exist_ok=True) with sqlite3.connect(DB_PATH) as conn: conn.executescript(""" CREATE TABLE IF NOT EXISTS check_runs ( check_id TEXT PRIMARY KEY, ts TEXT NOT NULL, tenant_id TEXT, site_name TEXT, base_domain TEXT, doc_count INTEGER, scorecard TEXT, -- JSON {by_regulation, totals} vvt_summary TEXT -- JSON {total, internal, external, critical} ); CREATE INDEX IF NOT EXISTS idx_runs_tenant ON check_runs(tenant_id, ts); CREATE INDEX IF NOT EXISTS idx_runs_domain ON check_runs(base_domain, ts); CREATE TABLE IF NOT EXISTS mc_results ( id INTEGER PRIMARY KEY AUTOINCREMENT, check_id TEXT NOT NULL, doc_type TEXT, mc_id TEXT, label TEXT, passed INTEGER, skipped INTEGER, severity TEXT, regulation TEXT, matched_text TEXT, hint TEXT ); CREATE INDEX IF NOT EXISTS idx_mc_check ON mc_results(check_id); CREATE INDEX IF NOT EXISTS idx_mc_reg ON mc_results(regulation, passed); -- Migration-source payloads (cmp_vendors + extracted_profile), -- kept as JSON blobs so the /migration/* endpoints can rebuild -- a banner config or document pre-fill after the in-memory -- _compliance_check_jobs entry is gone. CREATE TABLE IF NOT EXISTS check_payloads ( check_id TEXT PRIMARY KEY, vendors TEXT, -- JSON list[dict] profile TEXT, -- JSON dict banner TEXT -- P20: JSON dict — full banner_result ); """) # P20 migration: spalte 'banner' nachtraeglich anlegen wenn alt try: conn.execute("ALTER TABLE check_payloads ADD COLUMN banner TEXT") except sqlite3.OperationalError: pass def record_check_payload( check_id: str, vendors: list[dict] | None, profile: dict | None, banner: dict | None = None, ) -> None: """Persist cmp_vendors + extracted_profile + banner_result (P20).""" try: _ensure_db() with sqlite3.connect(DB_PATH) as conn: conn.execute( "INSERT OR REPLACE INTO check_payloads " "(check_id, vendors, profile, banner) VALUES (?, ?, ?, ?)", ( check_id, json.dumps(vendors or [], ensure_ascii=False), json.dumps(profile or {}, ensure_ascii=False), json.dumps(banner or {}, ensure_ascii=False) if banner else None, ), ) conn.commit() except Exception as e: logger.warning("record_check_payload failed for %s: %s", check_id, e) def get_check_payload(check_id: str) -> dict | None: """Load cmp_vendors + extracted_profile + banner_result for a previous check.""" try: _ensure_db() with sqlite3.connect(DB_PATH) as conn: conn.row_factory = sqlite3.Row row = conn.execute( "SELECT vendors, profile, banner FROM check_payloads WHERE check_id=?", (check_id,), ).fetchone() if not row: return None return { "vendors": json.loads(row["vendors"] or "[]"), "profile": json.loads(row["profile"] or "{}"), "banner": json.loads(row["banner"]) if row["banner"] else None, } except Exception as e: logger.warning("get_check_payload failed: %s", e) return None def record_check_run( check_id: str, tenant_id: str, site_name: str, base_domain: str, doc_count: int, scorecard: dict, vvt_summary: dict | None = None, mc_records: list[dict] | None = None, ) -> None: """Persist one check run + all its MC rows. Idempotent on check_id.""" try: _ensure_db() ts = datetime.now(timezone.utc).isoformat() with sqlite3.connect(DB_PATH) as conn: conn.execute( "INSERT OR REPLACE INTO check_runs " "(check_id, ts, tenant_id, site_name, base_domain, doc_count, " " scorecard, vvt_summary) VALUES (?, ?, ?, ?, ?, ?, ?, ?)", ( check_id, ts, tenant_id, site_name, base_domain, doc_count, json.dumps(scorecard, ensure_ascii=False), json.dumps(vvt_summary or {}, ensure_ascii=False), ), ) # Clear old rows for the same check_id before re-inserting (idempotency) conn.execute("DELETE FROM mc_results WHERE check_id=?", (check_id,)) if mc_records: conn.executemany( "INSERT INTO mc_results " "(check_id, doc_type, mc_id, label, passed, skipped, " " severity, regulation, matched_text, hint) " "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", [ ( r.get("check_id", check_id), r.get("doc_type", ""), r.get("mc_id", ""), (r.get("label") or "")[:300], 1 if r.get("passed") else 0, 1 if r.get("skipped") else 0, (r.get("severity") or "").upper(), r.get("regulation") or "", (r.get("matched_text") or "")[:500], (r.get("hint") or "")[:500], ) for r in mc_records ], ) conn.commit() logger.info("Audit recorded: check_id=%s mc_rows=%d", check_id, len(mc_records or [])) except Exception as e: logger.warning("Audit persistence failed for %s: %s", check_id, e) # ── Read API (used by the admin endpoints + trend view) ───────────── def get_check_run(check_id: str) -> dict | None: try: _ensure_db() with sqlite3.connect(DB_PATH) as conn: conn.row_factory = sqlite3.Row row = conn.execute( "SELECT * FROM check_runs WHERE check_id=?", (check_id,), ).fetchone() if not row: return None d = dict(row) d["scorecard"] = json.loads(d.get("scorecard") or "{}") d["vvt_summary"] = json.loads(d.get("vvt_summary") or "{}") return d except Exception as e: logger.warning("get_check_run failed: %s", e) return None def list_mc_results( check_id: str, doc_type: str | None = None, regulation: str | None = None, only_failed: bool = False, ) -> list[dict]: try: _ensure_db() where = ["check_id = ?"] params: list = [check_id] if doc_type: where.append("doc_type = ?") params.append(doc_type) if regulation: where.append("regulation = ?") params.append(regulation) if only_failed: where.append("passed = 0 AND skipped = 0") sql = ("SELECT * FROM mc_results WHERE " + " AND ".join(where) + " ORDER BY severity, label") with sqlite3.connect(DB_PATH) as conn: conn.row_factory = sqlite3.Row rows = conn.execute(sql, params).fetchall() return [dict(r) for r in rows] except Exception as e: logger.warning("list_mc_results failed: %s", e) return [] def list_runs_for_tenant( tenant_id: str, base_domain: str | None = None, limit: int = 30, ) -> list[dict]: try: _ensure_db() where = ["tenant_id = ?"] params: list = [tenant_id] if base_domain: where.append("base_domain = ?") params.append(base_domain) sql = ("SELECT * FROM check_runs WHERE " + " AND ".join(where) + " ORDER BY ts DESC LIMIT ?") params.append(limit) with sqlite3.connect(DB_PATH) as conn: conn.row_factory = sqlite3.Row rows = conn.execute(sql, params).fetchall() out = [] for r in rows: d = dict(r) d["scorecard"] = json.loads(d.get("scorecard") or "{}") out.append(d) return out except Exception as e: logger.warning("list_runs_for_tenant failed: %s", e) return []