From 6ed30dae5ba1370f190b6bdad98c0d7eb3f4f286 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Sun, 17 May 2026 13:45:58 +0200 Subject: [PATCH] feat(agent): MC scorecard + audit drill-down + tenant trend (A1-A6) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that all 1874 MCs run per check (Task #30 cap removal), the report was about to drown in noise. This commit adds the full aggregation / persistence / drill-down stack so each MC is actionable, not just counted. A1 mc_scorecard.py (new): build_scorecard(checks) -> per-regulation PASS/FAIL/SKIP + severity top_fails(checks, n) -> N most severe failed MCs full_audit_records(...) -> flat rows ready for sidecar SQLite A2 Email rendering: agent_doc_check_scorecard.py (new) builds an HTML scorecard table (regulation × passed/failed/HIGH/MEDIUM/score) shown at the top of the email. agent_doc_check_report._render_document now collapses the 500-MC L2 forest into 'X/Y bestanden (Z Fail)' summary plus a top-10 fails block per doc — old verbose render is gone. A3 compliance_audit_log.py (new) — sidecar SQLite at /data/compliance_audits.db (separate from compliance Postgres schema to comply with the no-new-migrations rule in CLAUDE.md): check_runs(check_id, ts, tenant_id, site_name, base_domain, doc_count, scorecard json, vvt_summary json) mc_results(check_id, doc_type, mc_id, label, passed, skipped, severity, regulation, matched_text, hint) Route persists every run after the email is sent. docker-compose.yml adds compliance-audit volume + env. A4 backfill_mc_regulation_llm.py (new) — Qwen-tagged backfill for the 1636 MCs the regex pass couldn't classify. Batches of 25, format=json, output constrained to the canonical regulation list. Run manually: docker exec bp-compliance-backend python3 \ /app/scripts/backfill_mc_regulation_llm.py [--dry-run] A5 Admin audit tab — GET /api/compliance/agent/audit/ proxied via /api/sdk/v1/agent/audit/. New page /sdk/agent/audit/[checkId] renders scorecard + filterable MC table (status / doc_type / regulation, expandable rows with matched_text + hint). ComplianceCheckTab now shows 'Voll-Audit oeffnen' link. A6 Trend per tenant — GET /api/compliance/agent/audit/tenant/ returns recent runs. Email scorecard shows per-regulation delta badges ('(+12%)', '(-3%)') compared with the previous run for the same tenant + base_domain. Lookup is one SQLite query. Plumbing: rag_document_checker.py — SELECT now includes 'article'; MC results carry 'regulation' + 'article' through to CheckItem. agent_doc_check_routes.CheckItem schema gains regulation + article fields (defaults '') so old clients still parse. agent_compliance_check_routes — response gains 'check_id' so the frontend can build the audit link. --- .../api/sdk/v1/agent/audit/[checkId]/route.ts | 28 ++ .../agent/_components/ComplianceCheckTab.tsx | 23 +- .../app/sdk/agent/audit/[checkId]/page.tsx | 277 ++++++++++++++++++ .../api/agent_compliance_check_routes.py | 130 +++++++- .../compliance/api/agent_doc_check_report.py | 32 ++ .../compliance/api/agent_doc_check_routes.py | 4 + .../api/agent_doc_check_scorecard.py | 137 +++++++++ .../services/compliance_audit_log.py | 196 +++++++++++++ .../compliance/services/mc_scorecard.py | 151 ++++++++++ .../services/rag_document_checker.py | 6 +- .../scripts/backfill_mc_regulation_llm.py | 181 ++++++++++++ docker-compose.yml | 4 + 12 files changed, 1159 insertions(+), 10 deletions(-) create mode 100644 admin-compliance/app/api/sdk/v1/agent/audit/[checkId]/route.ts create mode 100644 admin-compliance/app/sdk/agent/audit/[checkId]/page.tsx create mode 100644 backend-compliance/compliance/api/agent_doc_check_scorecard.py create mode 100644 backend-compliance/compliance/services/compliance_audit_log.py create mode 100644 backend-compliance/compliance/services/mc_scorecard.py create mode 100644 backend-compliance/scripts/backfill_mc_regulation_llm.py diff --git a/admin-compliance/app/api/sdk/v1/agent/audit/[checkId]/route.ts b/admin-compliance/app/api/sdk/v1/agent/audit/[checkId]/route.ts new file mode 100644 index 00000000..5e599ea0 --- /dev/null +++ b/admin-compliance/app/api/sdk/v1/agent/audit/[checkId]/route.ts @@ -0,0 +1,28 @@ +/** + * Proxy: GET /api/sdk/v1/agent/audit/ + * -> backend GET /api/compliance/agent/audit/ + * + * Forwards optional query params (doc_type, regulation, only_failed). + */ +import { NextRequest, NextResponse } from 'next/server' + +const BACKEND_URL = process.env.BACKEND_API_URL || 'http://backend-compliance:8002' + +export async function GET( + request: NextRequest, + { params }: { params: { checkId: string } }, +) { + const checkId = params.checkId + const qs = request.nextUrl.searchParams.toString() + const url = `${BACKEND_URL}/api/compliance/agent/audit/${checkId}${qs ? `?${qs}` : ''}` + try { + const resp = await fetch(url, { signal: AbortSignal.timeout(15000) }) + const data = await resp.json() + return NextResponse.json(data, { status: resp.status }) + } catch { + return NextResponse.json( + { error: 'Audit-Abfrage fehlgeschlagen' }, + { status: 503 }, + ) + } +} diff --git a/admin-compliance/app/sdk/agent/_components/ComplianceCheckTab.tsx b/admin-compliance/app/sdk/agent/_components/ComplianceCheckTab.tsx index 3c041c80..4f67ab23 100644 --- a/admin-compliance/app/sdk/agent/_components/ComplianceCheckTab.tsx +++ b/admin-compliance/app/sdk/agent/_components/ComplianceCheckTab.tsx @@ -66,6 +66,7 @@ interface HistoryEntry { docCount: number findings: number resultKey: string + checkId?: string } export function ComplianceCheckTab() { @@ -454,13 +455,21 @@ export function ComplianceCheckTab() { - {/* Email status */} - {results.email_status && ( -
- - E-Mail: {results.email_status === 'sent' ? 'Gesendet' : results.email_status} -
- )} + {/* Email status + Full-audit link */} +
+ {results.email_status && ( +
+ + E-Mail: {results.email_status === 'sent' ? 'Gesendet' : results.email_status} +
+ )} + {results.check_id && ( + + Voll-Audit oeffnen (alle MCs) → + + )} +
)} diff --git a/admin-compliance/app/sdk/agent/audit/[checkId]/page.tsx b/admin-compliance/app/sdk/agent/audit/[checkId]/page.tsx new file mode 100644 index 00000000..a13c61f7 --- /dev/null +++ b/admin-compliance/app/sdk/agent/audit/[checkId]/page.tsx @@ -0,0 +1,277 @@ +'use client' + +import React, { useEffect, useState, useMemo } from 'react' +import { use as useUnwrap } from 'react' + +type MCRow = { + id: number + doc_type: string + mc_id: string + label: string + passed: number + skipped: number + severity: string + regulation: string + matched_text: string + hint: string +} + +type ScorecardRow = { + regulation: string + total: number + passed: number + failed: number + skipped: number + pct: number + severity: Record +} + +type AuditResponse = { + found: boolean + run?: { + check_id: string + ts: string + site_name: string + base_domain: string + doc_count: number + scorecard: { by_regulation: ScorecardRow[]; totals: any } + vvt_summary: { total?: number; internal?: number; external?: number } + } + mc_count?: number + results?: MCRow[] +} + +const SEVERITY_COLOR: Record = { + CRITICAL: 'bg-red-600 text-white', + HIGH: 'bg-red-100 text-red-800', + MEDIUM: 'bg-amber-100 text-amber-800', + LOW: 'bg-blue-100 text-blue-800', + INFO: 'bg-gray-100 text-gray-600', +} + +const STATUS_FILTERS = [ + { value: 'all', label: 'Alle' }, + { value: 'failed', label: 'Nur Fail' }, + { value: 'passed', label: 'Nur Pass' }, + { value: 'skipped', label: 'Nur Skipped' }, +] as const + +export default function AuditPage( + { params }: { params: Promise<{ checkId: string }> }, +) { + const { checkId } = useUnwrap(params) + const [data, setData] = useState(null) + const [loading, setLoading] = useState(true) + const [error, setError] = useState(null) + const [filterStatus, setFilterStatus] = useState('failed') + const [filterReg, setFilterReg] = useState('') + const [filterDoc, setFilterDoc] = useState('') + const [expanded, setExpanded] = useState(null) + + useEffect(() => { + let cancelled = false + setLoading(true) + fetch(`/api/sdk/v1/agent/audit/${checkId}`) + .then(r => r.json()) + .then(d => { if (!cancelled) setData(d) }) + .catch(e => { if (!cancelled) setError(String(e)) }) + .finally(() => { if (!cancelled) setLoading(false) }) + return () => { cancelled = true } + }, [checkId]) + + const allRows = data?.results ?? [] + const docTypes = useMemo( + () => Array.from(new Set(allRows.map(r => r.doc_type))).sort(), + [allRows], + ) + const regulations = useMemo( + () => Array.from(new Set(allRows.map(r => r.regulation).filter(Boolean))).sort(), + [allRows], + ) + + const filtered = allRows.filter(r => { + if (filterStatus === 'failed' && (r.passed || r.skipped)) return false + if (filterStatus === 'passed' && !r.passed) return false + if (filterStatus === 'skipped' && !r.skipped) return false + if (filterReg && r.regulation !== filterReg) return false + if (filterDoc && r.doc_type !== filterDoc) return false + return true + }) + + if (loading) { + return
Lade Audit…
+ } + if (error || !data?.found) { + return ( +
+ Audit nicht gefunden{error ? `: ${error}` : ''}. +
+ ) + } + + const run = data.run! + const scorecard = run.scorecard?.by_regulation ?? [] + const totals = run.scorecard?.totals ?? { total: 0, passed: 0, failed: 0, pct: 0 } + + return ( +
+ {/* Header */} +
+

+ MC-Audit: {run.site_name} +

+

+ check_id {checkId} ·{' '} + {new Date(run.ts).toLocaleString('de-DE')} · {run.doc_count} Dokumente ·{' '} + {data.mc_count} MC-Eintraege +

+
+ + {/* Scorecard */} +
+
+

+ Compliance-Scorecard nach Regulation + + {totals.pct}% + + + ({totals.passed} bestanden, {totals.failed} Fail,{' '} + {totals.skipped} skipped — {totals.total} gesamt) + +

+
+ + + + + + + + + + + + + {scorecard.map(row => ( + setFilterReg(row.regulation === filterReg ? '' : row.regulation)}> + + + + + + + + ))} + +
RegulationPassedFailedHIGHMEDIUMScore
{row.regulation}{row.passed}{row.failed} + {(row.severity.HIGH || 0) + (row.severity.CRITICAL || 0)} + + {row.severity.MEDIUM || 0} + = 80 ? 'text-green-700' : + row.pct >= 50 ? 'text-amber-700' : 'text-red-700' + }`}>{row.pct}%
+
+ + {/* Filters */} +
+
+ {STATUS_FILTERS.map(f => ( + + ))} +
+ + + + {filtered.length} von {allRows.length} + +
+ + {/* Results */} +
+ + + + + + + + + + + + {filtered.map(row => ( + + setExpanded(expanded === row.id ? null : row.id)}> + + + + + + + {expanded === row.id && ( + + + + )} + + ))} + {filtered.length === 0 && ( + + + + )} + +
StatusDocRegulationMCSeverity
+ {row.passed ? ( + + ) : row.skipped ? ( + + ) : ( + + )} + {row.doc_type}{row.regulation || '—'}{row.label} + {row.severity || '—'} +
+
+ MC-ID: {row.mc_id} +
+ {row.matched_text && ( +
+ Treffer: + + "{row.matched_text}" + +
+ )} + {row.hint && ( +
+ {row.hint} +
+ )} +
+ Keine MCs entsprechen den aktuellen Filtern. +
+
+
+ ) +} diff --git a/backend-compliance/compliance/api/agent_compliance_check_routes.py b/backend-compliance/compliance/api/agent_compliance_check_routes.py index 0073ac42..01dd00d7 100644 --- a/backend-compliance/compliance/api/agent_compliance_check_routes.py +++ b/backend-compliance/compliance/api/agent_compliance_check_routes.py @@ -428,10 +428,50 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest): scanned_html = build_scanned_urls_html(doc_entries) providers_html = build_provider_list_html(banner_result, vvt_entries) vvt_html = build_vvt_table_html(cmp_vendors) + + # MC scorecard aggregated across ALL docs in this run (DSGVO/TDDDG/ + # BGB/...). Sits at the top so the GF sees the regulation-by- + # regulation view before drilling into per-doc details. + from compliance.services.mc_scorecard import build_scorecard + from .agent_doc_check_scorecard import build_scorecard_html + all_mc_checks: list[dict] = [] + for r in results: + for c in r.checks: + if c.id.startswith("mc-"): + all_mc_checks.append({ + "id": c.id, "label": c.label, "passed": c.passed, + "severity": c.severity, "skipped": c.skipped, + "regulation": c.regulation, + }) + scorecard = build_scorecard(all_mc_checks) if all_mc_checks else {} + # Trend: load previous scorecard for the same tenant + domain so the + # email can show delta indicators (A6). + prev_scorecard: dict | None = None + if scorecard: + try: + from compliance.services.compliance_audit_log import ( + list_runs_for_tenant, + ) + tenant_id_for_trend = req.recipient or "" + base_domain_for_trend = _extract_domain(doc_entries) or "" + prev_runs = list_runs_for_tenant( + tenant_id_for_trend, + base_domain=base_domain_for_trend, + limit=1, + ) + if prev_runs: + prev_scorecard = prev_runs[0].get("scorecard") + except Exception as e: + logger.debug("trend lookup skipped: %s", e) + scorecard_html = ( + build_scorecard_html(scorecard, previous_scorecard=prev_scorecard) + if scorecard else "" + ) + report_html = build_html_report(results, None) profile_html = _build_profile_html(profile) full_html = ( - summary_html + scanned_html + profile_html + summary_html + scanned_html + profile_html + scorecard_html + providers_html + vvt_html + report_html ) @@ -452,6 +492,7 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest): # Step 7: Store result response = { + "check_id": check_id, "results": [_result_to_dict(r) for r in results], "business_profile": profile_dict, "extracted_profile": extracted_profile, @@ -474,6 +515,45 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest): _compliance_check_jobs[check_id]["progress"] = "Fertig" _compliance_check_jobs[check_id]["progress_pct"] = 100 + # Persist to sidecar SQLite audit log — enables /audit endpoints + # (A5 admin tab) and trend view (A6). Best-effort; failures here + # do not affect the user-facing response. + try: + from compliance.services.compliance_audit_log import record_check_run + from compliance.services.mc_scorecard import full_audit_records + audit_rows: list[dict] = [] + for r in results: + doc_mc = [c for c in r.checks if c.id.startswith("mc-")] + audit_rows.extend(full_audit_records( + [{"id": c.id, "label": c.label, "passed": c.passed, + "severity": c.severity, "skipped": c.skipped, + "regulation": c.regulation, "matched_text": c.matched_text, + "hint": c.hint, "level": c.level} + for c in doc_mc], + check_id=check_id, + doc_type=r.doc_type, + )) + record_check_run( + check_id=check_id, + tenant_id=req.recipient or "", + site_name=site_name, + base_domain=domain or "", + doc_count=doc_count, + scorecard=scorecard, + vvt_summary={ + "total": len(cmp_vendors), + "internal": sum(1 for v in cmp_vendors + if (v.get("recipient_type") or "").upper() + in ("INTERNAL", "GROUP_COMPANY")), + "external": sum(1 for v in cmp_vendors + if (v.get("recipient_type") or "").upper() + in ("PROCESSOR", "CONTROLLER")), + }, + mc_records=audit_rows, + ) + except Exception as e: + logger.warning("Audit persistence skipped: %s", e) + except Exception as e: logger.error("Compliance check %s failed: %s", check_id, e, exc_info=True) _compliance_check_jobs[check_id]["status"] = "failed" @@ -1060,3 +1140,51 @@ def _build_profile_html(profile) -> str: # Cross-check extracted to compliance.services.banner_cookie_cross_check from compliance.services.banner_cookie_cross_check import cross_check_banner_vs_cookie as _cross_check_banner_vs_cookie + + +# ── Admin: audit drill-down (A5) + trend view (A6) ────────────────── + +@router.get("/audit/{check_id}") +async def audit_drill_down( + check_id: str, + doc_type: str = "", + regulation: str = "", + only_failed: bool = False, +): + """Return scorecard + filterable MC results for a single check run. + + Frontend uses this to render the /sdk/agent/audit/ view. + """ + from compliance.services.compliance_audit_log import ( + get_check_run, list_mc_results, + ) + run = get_check_run(check_id) + if not run: + return {"check_id": check_id, "found": False} + rows = list_mc_results( + check_id, + doc_type=doc_type or None, + regulation=regulation or None, + only_failed=only_failed, + ) + return { + "check_id": check_id, + "found": True, + "run": run, + "mc_count": len(rows), + "results": rows, + } + + +@router.get("/audit/tenant/{tenant_id}") +async def audit_tenant_history( + tenant_id: str, + base_domain: str = "", + limit: int = 30, +): + """Tenant-level history for the trend view (A6).""" + from compliance.services.compliance_audit_log import list_runs_for_tenant + runs = list_runs_for_tenant( + tenant_id, base_domain=base_domain or None, limit=limit, + ) + return {"tenant_id": tenant_id, "count": len(runs), "runs": runs} diff --git a/backend-compliance/compliance/api/agent_doc_check_report.py b/backend-compliance/compliance/api/agent_doc_check_report.py index 0435ab0b..257352c3 100644 --- a/backend-compliance/compliance/api/agent_doc_check_report.py +++ b/backend-compliance/compliance/api/agent_doc_check_report.py @@ -245,6 +245,38 @@ def _render_document(html: list[str], r: DocCheckResult) -> None: html.append('
') for c in l1_checks: _render_l1_check(html, c, l2_by_parent.get(c.id, [])) + + # Master-Control aggregation: with 1874 MCs evaluated per run, + # rendering every L2 check inline produces ~600 rows per doc and + # makes the email unreadable. Show only top-N severe fails plus a + # one-line summary. Full results live in /sdk/agent/audit/. + from compliance.api.agent_doc_check_scorecard import build_top_fails_html + from compliance.services.mc_scorecard import top_fails + + mc_results = [ + {"id": c.id, "label": c.label, "passed": c.passed, + "severity": c.severity, "skipped": c.skipped, "hint": c.hint, + "regulation": c.regulation} + for c in r.checks + if c.id.startswith("mc-") + ] + if mc_results: + n_total = len(mc_results) + n_passed = sum(1 for x in mc_results if x["passed"]) + n_skipped = sum(1 for x in mc_results if x["skipped"]) + n_failed = n_total - n_passed - n_skipped + html.append( + f'
' + f'Master-Controls: {n_passed}/' + f'{n_total - n_skipped} bestanden ' + f'({n_failed} Fail)' + f'{f" + {n_skipped} nicht anwendbar" if n_skipped else ""}.' + f'
' + ) + top = top_fails(mc_results, n=10) + html.append(build_top_fails_html(top, r.label)) + if r.word_count: html.append( f'
str: + """Render the MC scorecard as an HTML table. + + Expects the dict returned by mc_scorecard.build_scorecard. When + `previous_scorecard` is passed (the run right before this one for + the same tenant + domain), each row shows a delta indicator + ('+12%', '-3%') so the DSB sees direction-of-travel at a glance. + """ + if not scorecard: + return "" + rows = scorecard.get("by_regulation") or [] + totals = scorecard.get("totals") or {} + if not rows: + return "" + + prev_by_reg: dict[str, int] = {} + prev_total_pct: int | None = None + if previous_scorecard: + prev_total_pct = int((previous_scorecard.get("totals") or {}).get("pct") or 0) + for r in (previous_scorecard.get("by_regulation") or []): + prev_by_reg[r.get("regulation", "")] = int(r.get("pct", 0)) + + overall_pct = int(totals.get("pct", 0)) + overall_color = ("#16a34a" if overall_pct >= 80 else + "#d97706" if overall_pct >= 50 else "#dc2626") + trend_str = _delta_badge(overall_pct, prev_total_pct) if prev_total_pct is not None else "" + + head = ( + '
' + '

' + 'MC-Scorecard (Pflichtangaben aus Master-Controls)

' + f'

' + f'{overall_pct}%{trend_str} ' + f'gesamt · {totals.get("passed", 0)} bestanden, ' + f'{totals.get("failed", 0)} nicht bestanden ' + f'({totals.get("total", 0)} MCs ausgewertet, ' + f'{totals.get("skipped", 0)} nicht anwendbar).

' + ) + + body = [ + '' + '' + '' + '' + '' + '' + '' + '' + '' + ] + for r in rows: + pct = int(r.get("pct", 0)) + color = ("#16a34a" if pct >= 80 else + "#d97706" if pct >= 50 else "#dc2626") + sev = r.get("severity") or {} + prev = prev_by_reg.get(r["regulation"]) + trend = _delta_badge(pct, prev) + body.append( + f'' + f'' + f'' + f'' + f'' + f'' + f'' + f'' + ) + body.append('
RegulationBestandenFailHIGHMEDIUMScore
{r["regulation"]}' + f'{r["passed"]}' + f'{r["failed"]}' + f'{sev.get("HIGH", 0) + sev.get("CRITICAL", 0)}' + f'{sev.get("MEDIUM", 0)}{pct}%{trend}
') + return head + "".join(body) + + +def _delta_badge(current: int, previous: int | None) -> str: + """Render a small ±N% badge next to a percentage when previous is known.""" + if previous is None or previous == current: + return "" + delta = current - previous + if delta > 0: + color, sign = "#16a34a", "+" + else: + color, sign = "#dc2626", "" + return (f' ' + f'({sign}{delta}%)') + + +def build_top_fails_html(fails: list[dict], doc_label: str) -> str: + """Render top-N severe MC fails as a compact card list per document.""" + if not fails: + return "" + out = [ + '
' + f'' + f'Top-Auffaelligkeiten in {doc_label} ({len(fails)})' + '
    ' + ] + for f in fails: + sev = f.get("severity") or "MEDIUM" + reg = f.get("regulation") or "" + reg_str = f' [{reg}]' if reg else "" + label = f.get("label") or "Unnamed" + hint = (f.get("hint") or "")[:200] + out.append( + f'
  • ' + f'{sev}' + f'{reg_str} — {label}' + + (f'
    ' + f'{hint}
    ' if hint else "") + + '
  • ' + ) + out.append('
') + return "".join(out) diff --git a/backend-compliance/compliance/services/compliance_audit_log.py b/backend-compliance/compliance/services/compliance_audit_log.py new file mode 100644 index 00000000..9ce74c02 --- /dev/null +++ b/backend-compliance/compliance/services/compliance_audit_log.py @@ -0,0 +1,196 @@ +""" +Compliance-Check Audit Log — sidecar SQLite persistence. + +Every compliance-check run flattens its MC results into rows here so +we have: + - per-tenant history of scorecards (Task A6 trend view) + - drill-down on individual MCs for the admin frontend (Task A5) + - export-ability (DSB receives JSON attachment derived from this) + +Sidecar SQLite (`/data/compliance_audits.db`) instead of a new table in +the compliance schema, because the repo policy forbids new migrations +without explicit DB-owner sign-off (see CLAUDE.md guardrails). +""" + +from __future__ import annotations + +import json +import logging +import os +import sqlite3 +from datetime import datetime, timezone +from pathlib import Path + +logger = logging.getLogger(__name__) + +DB_PATH = os.getenv("COMPLIANCE_AUDIT_DB", "/data/compliance_audits.db") + + +def _ensure_db() -> None: + Path(DB_PATH).parent.mkdir(parents=True, exist_ok=True) + with sqlite3.connect(DB_PATH) as conn: + conn.executescript(""" + CREATE TABLE IF NOT EXISTS check_runs ( + check_id TEXT PRIMARY KEY, + ts TEXT NOT NULL, + tenant_id TEXT, + site_name TEXT, + base_domain TEXT, + doc_count INTEGER, + scorecard TEXT, -- JSON {by_regulation, totals} + vvt_summary TEXT -- JSON {total, internal, external, critical} + ); + CREATE INDEX IF NOT EXISTS idx_runs_tenant ON check_runs(tenant_id, ts); + CREATE INDEX IF NOT EXISTS idx_runs_domain ON check_runs(base_domain, ts); + + CREATE TABLE IF NOT EXISTS mc_results ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + check_id TEXT NOT NULL, + doc_type TEXT, + mc_id TEXT, + label TEXT, + passed INTEGER, + skipped INTEGER, + severity TEXT, + regulation TEXT, + matched_text TEXT, + hint TEXT + ); + CREATE INDEX IF NOT EXISTS idx_mc_check ON mc_results(check_id); + CREATE INDEX IF NOT EXISTS idx_mc_reg ON mc_results(regulation, passed); + """) + + +def record_check_run( + check_id: str, + tenant_id: str, + site_name: str, + base_domain: str, + doc_count: int, + scorecard: dict, + vvt_summary: dict | None = None, + mc_records: list[dict] | None = None, +) -> None: + """Persist one check run + all its MC rows. Idempotent on check_id.""" + try: + _ensure_db() + ts = datetime.now(timezone.utc).isoformat() + with sqlite3.connect(DB_PATH) as conn: + conn.execute( + "INSERT OR REPLACE INTO check_runs " + "(check_id, ts, tenant_id, site_name, base_domain, doc_count, " + " scorecard, vvt_summary) VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + ( + check_id, ts, tenant_id, site_name, base_domain, doc_count, + json.dumps(scorecard, ensure_ascii=False), + json.dumps(vvt_summary or {}, ensure_ascii=False), + ), + ) + # Clear old rows for the same check_id before re-inserting (idempotency) + conn.execute("DELETE FROM mc_results WHERE check_id=?", (check_id,)) + if mc_records: + conn.executemany( + "INSERT INTO mc_results " + "(check_id, doc_type, mc_id, label, passed, skipped, " + " severity, regulation, matched_text, hint) " + "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + [ + ( + r.get("check_id", check_id), + r.get("doc_type", ""), + r.get("mc_id", ""), + (r.get("label") or "")[:300], + 1 if r.get("passed") else 0, + 1 if r.get("skipped") else 0, + (r.get("severity") or "").upper(), + r.get("regulation") or "", + (r.get("matched_text") or "")[:500], + (r.get("hint") or "")[:500], + ) + for r in mc_records + ], + ) + conn.commit() + logger.info("Audit recorded: check_id=%s mc_rows=%d", + check_id, len(mc_records or [])) + except Exception as e: + logger.warning("Audit persistence failed for %s: %s", check_id, e) + + +# ── Read API (used by the admin endpoints + trend view) ───────────── + +def get_check_run(check_id: str) -> dict | None: + try: + _ensure_db() + with sqlite3.connect(DB_PATH) as conn: + conn.row_factory = sqlite3.Row + row = conn.execute( + "SELECT * FROM check_runs WHERE check_id=?", (check_id,), + ).fetchone() + if not row: + return None + d = dict(row) + d["scorecard"] = json.loads(d.get("scorecard") or "{}") + d["vvt_summary"] = json.loads(d.get("vvt_summary") or "{}") + return d + except Exception as e: + logger.warning("get_check_run failed: %s", e) + return None + + +def list_mc_results( + check_id: str, + doc_type: str | None = None, + regulation: str | None = None, + only_failed: bool = False, +) -> list[dict]: + try: + _ensure_db() + where = ["check_id = ?"] + params: list = [check_id] + if doc_type: + where.append("doc_type = ?") + params.append(doc_type) + if regulation: + where.append("regulation = ?") + params.append(regulation) + if only_failed: + where.append("passed = 0 AND skipped = 0") + sql = ("SELECT * FROM mc_results WHERE " + " AND ".join(where) + + " ORDER BY severity, label") + with sqlite3.connect(DB_PATH) as conn: + conn.row_factory = sqlite3.Row + rows = conn.execute(sql, params).fetchall() + return [dict(r) for r in rows] + except Exception as e: + logger.warning("list_mc_results failed: %s", e) + return [] + + +def list_runs_for_tenant( + tenant_id: str, + base_domain: str | None = None, + limit: int = 30, +) -> list[dict]: + try: + _ensure_db() + where = ["tenant_id = ?"] + params: list = [tenant_id] + if base_domain: + where.append("base_domain = ?") + params.append(base_domain) + sql = ("SELECT * FROM check_runs WHERE " + " AND ".join(where) + + " ORDER BY ts DESC LIMIT ?") + params.append(limit) + with sqlite3.connect(DB_PATH) as conn: + conn.row_factory = sqlite3.Row + rows = conn.execute(sql, params).fetchall() + out = [] + for r in rows: + d = dict(r) + d["scorecard"] = json.loads(d.get("scorecard") or "{}") + out.append(d) + return out + except Exception as e: + logger.warning("list_runs_for_tenant failed: %s", e) + return [] diff --git a/backend-compliance/compliance/services/mc_scorecard.py b/backend-compliance/compliance/services/mc_scorecard.py new file mode 100644 index 00000000..b2a01524 --- /dev/null +++ b/backend-compliance/compliance/services/mc_scorecard.py @@ -0,0 +1,151 @@ +""" +Master-Control Scorecard — group + summarise MC results. + +With max_controls=0 (#30 fix) every doc-check now evaluates 75-571 MCs +per document. Rendering all of them verbatim makes the email + frontend +unreadable. This module produces three structured artefacts: + +1. `build_scorecard(check_results)` — per-regulation aggregate (PASS / + FAIL / SKIP counts + severity histogram + compliance %) + +2. `top_fails(check_results, n=10)` — top-N failed MCs ranked by + severity then absence of evidence + +3. `full_audit_records(check_results, check_id, tenant_id)` — flat + list ready for SQLite persistence + JSON export + +The functions are pure — no DB / network — so they're cheap to call +from inside the route and unit-testable. +""" + +from __future__ import annotations + +import logging +from collections import defaultdict +from datetime import datetime, timezone + +logger = logging.getLogger(__name__) + +# Severity order: CRITICAL > HIGH > MEDIUM > LOW > INFO +_SEV_RANK = {"CRITICAL": 0, "HIGH": 1, "MEDIUM": 2, "LOW": 3, "INFO": 4} + + +def build_scorecard(check_results: list[dict]) -> dict: + """Aggregate per-regulation pass/fail/skip + severity buckets. + + Args: + check_results: list of dicts, each typically a CheckItem-like + record with keys: id, label, passed, severity, skipped, + regulation, doc_type. + + Returns: + { + "by_regulation": [ + {"regulation": "DSGVO", "total": 193, "passed": 167, + "failed": 24, "skipped": 2, "pct": 87, + "severity": {"HIGH": 22, "MEDIUM": 2}} + ], + "totals": {"total": 1874, "passed": 1300, "failed": 540, + "skipped": 34, "pct": 70}, + } + """ + buckets: dict[str, dict] = defaultdict( + lambda: {"total": 0, "passed": 0, "failed": 0, "skipped": 0, + "severity": defaultdict(int)}, + ) + for r in check_results or []: + reg = (r.get("regulation") or "—").strip() or "—" + b = buckets[reg] + b["total"] += 1 + if r.get("skipped"): + b["skipped"] += 1 + elif r.get("passed"): + b["passed"] += 1 + else: + b["failed"] += 1 + sev = (r.get("severity") or "MEDIUM").upper() + b["severity"][sev] += 1 + + rows = [] + grand_total = grand_passed = grand_failed = grand_skipped = 0 + for reg, b in buckets.items(): + # Convert defaultdict for serialisability + sev_dict = dict(b["severity"]) + active = b["total"] - b["skipped"] + pct = round(b["passed"] / active * 100) if active else 0 + rows.append({ + "regulation": reg, + "total": b["total"], + "passed": b["passed"], + "failed": b["failed"], + "skipped": b["skipped"], + "pct": pct, + "severity": sev_dict, + }) + grand_total += b["total"] + grand_passed += b["passed"] + grand_failed += b["failed"] + grand_skipped += b["skipped"] + + rows.sort(key=lambda r: (-r["failed"], r["regulation"])) + + grand_active = grand_total - grand_skipped + grand_pct = round(grand_passed / grand_active * 100) if grand_active else 0 + return { + "by_regulation": rows, + "totals": { + "total": grand_total, "passed": grand_passed, + "failed": grand_failed, "skipped": grand_skipped, + "pct": grand_pct, + }, + } + + +def top_fails(check_results: list[dict], n: int = 10) -> list[dict]: + """Return top-N failing MCs sorted by severity then label. + + Skipped + passed MCs are excluded. INFO severity is excluded by + default since those are guidance, not findings. + """ + fails = [ + r for r in (check_results or []) + if not r.get("passed") and not r.get("skipped") + and (r.get("severity") or "").upper() != "INFO" + ] + fails.sort(key=lambda r: ( + _SEV_RANK.get((r.get("severity") or "MEDIUM").upper(), 5), + r.get("label", ""), + )) + return fails[:n] + + +def full_audit_records( + check_results: list[dict], + check_id: str, + tenant_id: str = "", + doc_type: str = "", +) -> list[dict]: + """Flatten check results into rows ready for SQLite persistence. + + Returns one record per MC. Keeps the original fields plus + check_id + doc_type + tenant_id + ts. + """ + ts = datetime.now(timezone.utc).isoformat() + out: list[dict] = [] + for r in check_results or []: + out.append({ + "check_id": check_id, + "tenant_id": tenant_id, + "doc_type": doc_type, + "ts": ts, + "mc_id": r.get("id", ""), + "label": (r.get("label") or "")[:300], + "passed": bool(r.get("passed")), + "skipped": bool(r.get("skipped")), + "severity": (r.get("severity") or "").upper(), + "regulation": r.get("regulation") or "", + "matched_text": (r.get("matched_text") or "")[:500], + "hint": (r.get("hint") or "")[:500], + "level": int(r.get("level") or 1), + }) + return out diff --git a/backend-compliance/compliance/services/rag_document_checker.py b/backend-compliance/compliance/services/rag_document_checker.py index 6fc67524..875479f2 100644 --- a/backend-compliance/compliance/services/rag_document_checker.py +++ b/backend-compliance/compliance/services/rag_document_checker.py @@ -171,6 +171,8 @@ def _check_mc_deterministic(text_lower: str, mc: dict) -> Optional[dict]: "hint": question if not passed else "", "source": "master_control", "criteria_met": f"{criteria_met}/{total_criteria}", + "regulation": mc.get("regulation") or "", + "article": mc.get("article") or "", } @@ -282,8 +284,8 @@ async def _load_controls(doc_type: str, db_url: str, limit: int) -> list[dict]: return [] try: - query = """SELECT id, control_id, title, regulation, check_question, - pass_criteria, fail_criteria, severity + query = """SELECT id, control_id, title, regulation, article, + check_question, pass_criteria, fail_criteria, severity FROM compliance.doc_check_controls WHERE doc_type = $1 ORDER BY severity DESC, title""" diff --git a/backend-compliance/scripts/backfill_mc_regulation_llm.py b/backend-compliance/scripts/backfill_mc_regulation_llm.py new file mode 100644 index 00000000..e19e1b41 --- /dev/null +++ b/backend-compliance/scripts/backfill_mc_regulation_llm.py @@ -0,0 +1,181 @@ +""" +LLM-tagged regulation backfill v2. + +The regex pass (backfill_mc_regulation.py) covers ~13% of MCs that +quote a norm inline ('Art. X DSGVO', '§Y TDDDG'). The remaining 1636 +are abstract security/process controls that don't cite a norm by name +but DO map to one (e.g. 'Pseudonymisierung' -> DSGVO Art. 32). + +This script asks Qwen (Ollama) in batches of 25 to classify each MC. +Output is constrained: one of DSGVO / TDDDG / BGB / HGB / AO / TMG / +BDSG / MStV / UWG / VSBG / NIS2 / ISO27001 / BSI-GS / NIST / OTHER / +NONE — plus an optional article hint. + +Run inside the backend container: + docker exec bp-compliance-backend python3 \\ + /app/scripts/backfill_mc_regulation_llm.py [--dry-run] [--limit N] +""" + +from __future__ import annotations + +import argparse +import asyncio +import json +import logging +import os +import re +import sys + +import asyncpg +import httpx + +logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") +logger = logging.getLogger(__name__) + + +OLLAMA_URL = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434") +MODEL = os.getenv("CMP_LLM_MODEL", "qwen3:30b-a3b") +BATCH = 25 + +_ALLOWED = { + "DSGVO", "TDDDG", "BGB", "HGB", "AO", "TMG", "BDSG", "MStV", "UWG", + "VSBG", "PAngV", "GwG", "NIS2", "ISO27001", "BSI-GS", "NIST", "TKG", + "EU-VO", "OTHER", "NONE", +} + + +_SYSTEM = ( + "Du klassifizierst Compliance Master-Controls nach der wichtigsten " + "rechtlichen oder normativen Grundlage. Antworte AUSSCHLIESSLICH " + 'mit JSON: {"items": [{"id":"", "regulation":"", "article":""}]}. ' + "Eine MC -> EIN Eintrag. Wenn keine Norm passt: NONE. " + "Keine Erklaerung, kein Markdown." +) + + +def _build_user_prompt(batch: list[dict]) -> str: + lines = ["Klassifiziere diese Master-Controls:"] + for r in batch: + title = (r["title"] or "")[:120] + q = (r["check_question"] or "")[:200] + lines.append(f'- id={r["id_short"]} | {title}') + if q: + lines.append(f" -> {q}") + return "\n".join(lines) + + +async def _ask_llm(batch: list[dict]) -> dict[str, dict]: + payload = { + "model": MODEL, + "messages": [ + {"role": "system", "content": _SYSTEM}, + {"role": "user", "content": _build_user_prompt(batch)}, + ], + "stream": False, "format": "json", + "options": {"temperature": 0.0, "num_predict": 2500}, + } + try: + async with httpx.AsyncClient(timeout=120.0) as client: + resp = await client.post( + f"{OLLAMA_URL.rstrip('/')}/api/chat", json=payload, + ) + resp.raise_for_status() + content = (resp.json().get("message") or {}).get("content", "") + except Exception as e: + logger.warning("LLM call failed: %s", e) + return {} + + if not content: + return {} + # Strip qwen thinking + content = re.sub(r".*?", "", content, flags=re.DOTALL).strip() + try: + obj = json.loads(content) + except Exception: + return {} + items = obj.get("items") if isinstance(obj, dict) else None + if not isinstance(items, list): + return {} + out: dict[str, dict] = {} + for it in items: + if not isinstance(it, dict): + continue + sid = str(it.get("id", "")).strip() + reg = str(it.get("regulation", "")).strip().upper() + art = str(it.get("article", "")).strip() + if not sid or reg not in _ALLOWED: + continue + if reg == "NONE": + continue + out[sid] = {"regulation": reg, "article": art[:120]} + return out + + +async def main(dry_run: bool, limit: int) -> None: + db = os.getenv("DATABASE_URL") + if not db: + print("DATABASE_URL not set", file=sys.stderr) + sys.exit(1) + conn = await asyncpg.connect(db) + + sql = ("SELECT id, title, check_question FROM compliance.doc_check_controls " + "WHERE regulation IS NULL ORDER BY id") + if limit > 0: + sql += f" LIMIT {limit}" + rows = await conn.fetch(sql) + print(f"{len(rows)} MCs without regulation — calling LLM in batches of {BATCH}") + + by_short: dict[str, str] = {} + batches: list[list[dict]] = [] + cur: list[dict] = [] + for r in rows: + sid = str(r["id"])[:8] + by_short[sid] = str(r["id"]) + cur.append({"id_short": sid, "title": r["title"], + "check_question": r["check_question"]}) + if len(cur) >= BATCH: + batches.append(cur); cur = [] + if cur: + batches.append(cur) + + updates: list[tuple[str, str, str]] = [] # (regulation, article, uuid) + hits: dict[str, int] = {} + for i, batch in enumerate(batches, 1): + logger.info("batch %d/%d (%d items)", i, len(batches), len(batch)) + res = await _ask_llm(batch) + for short, m in res.items(): + uuid = by_short.get(short) + if not uuid: + continue + updates.append((m["regulation"], m["article"], uuid)) + hits[m["regulation"]] = hits.get(m["regulation"], 0) + 1 + print(f"\nLLM classified: {sum(hits.values())} / {len(rows)}") + for k, v in sorted(hits.items(), key=lambda x: -x[1]): + print(f" {k:10s} {v:>5}") + + if dry_run: + print("\nDRY RUN — no UPDATE issued.") + await conn.close() + return + + for i in range(0, len(updates), 200): + chunk = updates[i:i + 200] + await conn.executemany( + "UPDATE compliance.doc_check_controls " + "SET regulation = $1, article = $2 WHERE id = $3::uuid", + chunk, + ) + print(f"\nApplied {len(updates)} updates.") + await conn.close() + + +if __name__ == "__main__": + ap = argparse.ArgumentParser() + ap.add_argument("--dry-run", action="store_true") + ap.add_argument("--limit", type=int, default=0, + help="process only N MCs (0 = all)") + args = ap.parse_args() + asyncio.run(main(args.dry_run, args.limit)) diff --git a/docker-compose.yml b/docker-compose.yml index 2e9a4bc7..15a0087f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -13,6 +13,7 @@ networks: volumes: dsms_data: cmp-data: # consent-tester: CMP discovery log + auto-promoted modules + compliance-audit: # backend-compliance: per-check audit log (SQLite) services: @@ -124,6 +125,9 @@ services: OVH_LLM_URL: ${OVH_LLM_URL:-} OVH_LLM_KEY: ${OVH_LLM_KEY:-} OVH_LLM_MODEL: ${OVH_LLM_MODEL:-} + COMPLIANCE_AUDIT_DB: /data/compliance_audits.db + volumes: + - compliance-audit:/data extra_hosts: - "host.docker.internal:host-gateway" depends_on: