""" Unified-Findings sidecar store. A compliance check produces findings from 4 sources today: - Master-Controls (mc_results table — already persisted) - Pflichtangaben (L1/L2 doc checks, e.g. Impressum-Vollstaendigkeit) - Vendor scans (per cmp_vendor: missing privacy url, no opt-out, ...) - Redundancies (multi-vendor in same category) Previously the DSB had to look in 4 different blocks of the email to find everything. This store flattens all of them into ONE searchable table so the /audit/ frontend can show a unified list with source / severity / status / doc_type filters. Sidecar SQLite (same DB as compliance_audit_log) — no Postgres migration needed. """ from __future__ import annotations import json import logging import os import sqlite3 from pathlib import Path logger = logging.getLogger(__name__) DB_PATH = os.getenv("COMPLIANCE_AUDIT_DB", "/data/compliance_audits.db") def _ensure_table() -> None: Path(DB_PATH).parent.mkdir(parents=True, exist_ok=True) with sqlite3.connect(DB_PATH) as conn: conn.executescript(""" CREATE TABLE IF NOT EXISTS unified_findings ( id INTEGER PRIMARY KEY AUTOINCREMENT, check_id TEXT NOT NULL, source_type TEXT NOT NULL, -- mc|pflichtangabe|vendor|redundanz doc_type TEXT, -- impressum|dse|cookie|... or '-' for vendor/redundanz severity TEXT, -- CRITICAL|HIGH|MEDIUM|LOW|INFO status TEXT, -- failed|passed|skipped|na|info regulation TEXT, label TEXT, hint TEXT, action_recipe TEXT, -- JSON {what,why,fix_text,where,example} anchor_excerpt TEXT, anchor_conf REAL, vendor_name TEXT, category TEXT, payload TEXT -- JSON extras (matched_text, cookies count, ...) ); CREATE INDEX IF NOT EXISTS idx_uf_check ON unified_findings(check_id); CREATE INDEX IF NOT EXISTS idx_uf_source ON unified_findings(check_id, source_type); CREATE INDEX IF NOT EXISTS idx_uf_status ON unified_findings(check_id, status); CREATE INDEX IF NOT EXISTS idx_uf_severity ON unified_findings(check_id, severity); """) def record_findings(check_id: str, findings: list[dict]) -> int: """Bulk-insert all findings for a check. Idempotent on check_id.""" if not check_id: return 0 try: _ensure_table() with sqlite3.connect(DB_PATH) as conn: conn.execute( "DELETE FROM unified_findings WHERE check_id=?", (check_id,), ) if not findings: conn.commit() return 0 rows = [ ( check_id, (f.get("source_type") or "mc")[:24], (f.get("doc_type") or "")[:32], (f.get("severity") or "MEDIUM").upper()[:16], (f.get("status") or "failed")[:16], (f.get("regulation") or "")[:64], (f.get("label") or "")[:400], (f.get("hint") or "")[:1200], json.dumps(f.get("action_recipe") or {}, ensure_ascii=False), (f.get("anchor_excerpt") or "")[:800], float(f.get("anchor_conf") or 0.0), (f.get("vendor_name") or "")[:160], (f.get("category") or "")[:64], json.dumps(f.get("payload") or {}, ensure_ascii=False), ) for f in findings ] conn.executemany( "INSERT INTO unified_findings " "(check_id, source_type, doc_type, severity, status, regulation, " " label, hint, action_recipe, anchor_excerpt, anchor_conf, " " vendor_name, category, payload) " "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", rows, ) conn.commit() logger.info( "unified_findings: %s rows=%d sources=%s", check_id, len(rows), sorted(set(f.get("source_type", "mc") for f in findings)), ) return len(rows) except Exception as e: logger.warning("record_findings failed for %s: %s", check_id, e) return 0 def list_findings( check_id: str, source_type: str | None = None, severity: str | None = None, doc_type: str | None = None, status: str | None = None, q: str | None = None, limit: int = 1000, ) -> list[dict]: """Return filtered findings. q matches label OR vendor_name (case-insensitive).""" try: _ensure_table() where = ["check_id = ?"] params: list = [check_id] if source_type and source_type != "all": where.append("source_type = ?") params.append(source_type) if severity and severity != "all": where.append("severity = ?") params.append(severity.upper()) if doc_type and doc_type != "all": where.append("doc_type = ?") params.append(doc_type) if status and status != "all": where.append("status = ?") params.append(status) if q: where.append("(LOWER(label) LIKE ? OR LOWER(vendor_name) LIKE ?)") needle = f"%{q.lower()}%" params.extend([needle, needle]) sql = ("SELECT * FROM unified_findings WHERE " + " AND ".join(where) + " ORDER BY CASE severity " " WHEN 'CRITICAL' THEN 0 WHEN 'HIGH' THEN 1 " " WHEN 'MEDIUM' THEN 2 WHEN 'LOW' THEN 3 " " ELSE 4 END, source_type, label LIMIT ?") params.append(int(limit)) with sqlite3.connect(DB_PATH) as conn: conn.row_factory = sqlite3.Row rows = conn.execute(sql, params).fetchall() out = [] for r in rows: d = dict(r) d["action_recipe"] = json.loads(d.get("action_recipe") or "{}") d["payload"] = json.loads(d.get("payload") or "{}") out.append(d) return out except Exception as e: logger.warning("list_findings failed: %s", e) return [] def findings_summary(check_id: str) -> dict: """Return aggregate counts for the filter UI (source/severity/status).""" out = { "total": 0, "by_source": {}, "by_severity": {}, "by_status": {}, "by_doc_type": {}, } try: _ensure_table() with sqlite3.connect(DB_PATH) as conn: conn.row_factory = sqlite3.Row for col in ("source_type", "severity", "status", "doc_type"): rows = conn.execute( f"SELECT {col} AS k, COUNT(*) AS n FROM unified_findings " f"WHERE check_id=? GROUP BY {col}", (check_id,), ).fetchall() bucket = f"by_{col if col != 'source_type' else 'source'}" if col == "doc_type": bucket = "by_doc_type" out[bucket] = {r["k"] or "-": r["n"] for r in rows} out["total"] = max(out["total"], sum(r["n"] for r in rows)) return out except Exception as e: logger.warning("findings_summary failed: %s", e) return out