feat(agent): MC scorecard + audit drill-down + tenant trend (A1-A6)

Now that all 1874 MCs run per check (Task #30 cap removal), the report
was about to drown in noise. This commit adds the full aggregation /
persistence / drill-down stack so each MC is actionable, not just
counted.

A1 mc_scorecard.py (new):
  build_scorecard(checks)    -> per-regulation PASS/FAIL/SKIP + severity
  top_fails(checks, n)       -> N most severe failed MCs
  full_audit_records(...)    -> flat rows ready for sidecar SQLite

A2 Email rendering:
  agent_doc_check_scorecard.py (new) builds an HTML scorecard table
  (regulation × passed/failed/HIGH/MEDIUM/score) shown at the top of
  the email. agent_doc_check_report._render_document now collapses
  the 500-MC L2 forest into 'X/Y bestanden (Z Fail)' summary plus
  a top-10 fails block per doc — old verbose render is gone.

A3 compliance_audit_log.py (new) — sidecar SQLite at
  /data/compliance_audits.db (separate from compliance Postgres
  schema to comply with the no-new-migrations rule in CLAUDE.md):
    check_runs(check_id, ts, tenant_id, site_name, base_domain,
               doc_count, scorecard json, vvt_summary json)
    mc_results(check_id, doc_type, mc_id, label, passed, skipped,
               severity, regulation, matched_text, hint)
  Route persists every run after the email is sent.
  docker-compose.yml adds compliance-audit volume + env.

A4 backfill_mc_regulation_llm.py (new) — Qwen-tagged backfill for
  the 1636 MCs the regex pass couldn't classify. Batches of 25,
  format=json, output constrained to the canonical regulation list.
  Run manually: docker exec bp-compliance-backend python3 \
                 /app/scripts/backfill_mc_regulation_llm.py [--dry-run]

A5 Admin audit tab — GET /api/compliance/agent/audit/<check_id>
  proxied via /api/sdk/v1/agent/audit/<id>. New page
  /sdk/agent/audit/[checkId] renders scorecard + filterable MC table
  (status / doc_type / regulation, expandable rows with matched_text
  + hint). ComplianceCheckTab now shows 'Voll-Audit oeffnen' link.

A6 Trend per tenant — GET /api/compliance/agent/audit/tenant/<id>
  returns recent runs. Email scorecard shows per-regulation delta
  badges ('(+12%)', '(-3%)') compared with the previous run for the
  same tenant + base_domain. Lookup is one SQLite query.

Plumbing:
  rag_document_checker.py — SELECT now includes 'article'; MC results
    carry 'regulation' + 'article' through to CheckItem.
  agent_doc_check_routes.CheckItem schema gains regulation + article
    fields (defaults '') so old clients still parse.
  agent_compliance_check_routes — response gains 'check_id' so the
    frontend can build the audit link.
This commit is contained in:
Benjamin Admin
2026-05-17 13:45:58 +02:00
parent 6d29191e9b
commit 6ed30dae5b
12 changed files with 1159 additions and 10 deletions
@@ -0,0 +1,28 @@
/**
* Proxy: GET /api/sdk/v1/agent/audit/<checkId>
* -> backend GET /api/compliance/agent/audit/<checkId>
*
* Forwards optional query params (doc_type, regulation, only_failed).
*/
import { NextRequest, NextResponse } from 'next/server'
const BACKEND_URL = process.env.BACKEND_API_URL || 'http://backend-compliance:8002'
export async function GET(
request: NextRequest,
{ params }: { params: { checkId: string } },
) {
const checkId = params.checkId
const qs = request.nextUrl.searchParams.toString()
const url = `${BACKEND_URL}/api/compliance/agent/audit/${checkId}${qs ? `?${qs}` : ''}`
try {
const resp = await fetch(url, { signal: AbortSignal.timeout(15000) })
const data = await resp.json()
return NextResponse.json(data, { status: resp.status })
} catch {
return NextResponse.json(
{ error: 'Audit-Abfrage fehlgeschlagen' },
{ status: 503 },
)
}
}
@@ -66,6 +66,7 @@ interface HistoryEntry {
docCount: number
findings: number
resultKey: string
checkId?: string
}
export function ComplianceCheckTab() {
@@ -454,13 +455,21 @@ export function ComplianceCheckTab() {
<ChecklistView results={results.results} />
{/* Email status */}
{results.email_status && (
<div className="mt-3 text-xs text-gray-500 flex items-center gap-2">
<span className={`w-2 h-2 rounded-full ${results.email_status === 'sent' ? 'bg-green-400' : 'bg-gray-300'}`} />
E-Mail: {results.email_status === 'sent' ? 'Gesendet' : results.email_status}
</div>
)}
{/* Email status + Full-audit link */}
<div className="mt-3 flex items-center justify-between gap-3">
{results.email_status && (
<div className="text-xs text-gray-500 flex items-center gap-2">
<span className={`w-2 h-2 rounded-full ${results.email_status === 'sent' ? 'bg-green-400' : 'bg-gray-300'}`} />
E-Mail: {results.email_status === 'sent' ? 'Gesendet' : results.email_status}
</div>
)}
{results.check_id && (
<a href={`/sdk/agent/audit/${results.check_id}`} target="_blank" rel="noopener"
className="text-xs text-blue-700 hover:text-blue-900 underline">
Voll-Audit oeffnen (alle MCs) &rarr;
</a>
)}
</div>
</div>
)}
@@ -0,0 +1,277 @@
'use client'
import React, { useEffect, useState, useMemo } from 'react'
import { use as useUnwrap } from 'react'
type MCRow = {
id: number
doc_type: string
mc_id: string
label: string
passed: number
skipped: number
severity: string
regulation: string
matched_text: string
hint: string
}
type ScorecardRow = {
regulation: string
total: number
passed: number
failed: number
skipped: number
pct: number
severity: Record<string, number>
}
type AuditResponse = {
found: boolean
run?: {
check_id: string
ts: string
site_name: string
base_domain: string
doc_count: number
scorecard: { by_regulation: ScorecardRow[]; totals: any }
vvt_summary: { total?: number; internal?: number; external?: number }
}
mc_count?: number
results?: MCRow[]
}
const SEVERITY_COLOR: Record<string, string> = {
CRITICAL: 'bg-red-600 text-white',
HIGH: 'bg-red-100 text-red-800',
MEDIUM: 'bg-amber-100 text-amber-800',
LOW: 'bg-blue-100 text-blue-800',
INFO: 'bg-gray-100 text-gray-600',
}
const STATUS_FILTERS = [
{ value: 'all', label: 'Alle' },
{ value: 'failed', label: 'Nur Fail' },
{ value: 'passed', label: 'Nur Pass' },
{ value: 'skipped', label: 'Nur Skipped' },
] as const
export default function AuditPage(
{ params }: { params: Promise<{ checkId: string }> },
) {
const { checkId } = useUnwrap(params)
const [data, setData] = useState<AuditResponse | null>(null)
const [loading, setLoading] = useState(true)
const [error, setError] = useState<string | null>(null)
const [filterStatus, setFilterStatus] = useState<typeof STATUS_FILTERS[number]['value']>('failed')
const [filterReg, setFilterReg] = useState<string>('')
const [filterDoc, setFilterDoc] = useState<string>('')
const [expanded, setExpanded] = useState<number | null>(null)
useEffect(() => {
let cancelled = false
setLoading(true)
fetch(`/api/sdk/v1/agent/audit/${checkId}`)
.then(r => r.json())
.then(d => { if (!cancelled) setData(d) })
.catch(e => { if (!cancelled) setError(String(e)) })
.finally(() => { if (!cancelled) setLoading(false) })
return () => { cancelled = true }
}, [checkId])
const allRows = data?.results ?? []
const docTypes = useMemo(
() => Array.from(new Set(allRows.map(r => r.doc_type))).sort(),
[allRows],
)
const regulations = useMemo(
() => Array.from(new Set(allRows.map(r => r.regulation).filter(Boolean))).sort(),
[allRows],
)
const filtered = allRows.filter(r => {
if (filterStatus === 'failed' && (r.passed || r.skipped)) return false
if (filterStatus === 'passed' && !r.passed) return false
if (filterStatus === 'skipped' && !r.skipped) return false
if (filterReg && r.regulation !== filterReg) return false
if (filterDoc && r.doc_type !== filterDoc) return false
return true
})
if (loading) {
return <div className="p-6 text-sm text-gray-500">Lade Audit</div>
}
if (error || !data?.found) {
return (
<div className="p-6 text-sm text-red-600">
Audit nicht gefunden{error ? `: ${error}` : ''}.
</div>
)
}
const run = data.run!
const scorecard = run.scorecard?.by_regulation ?? []
const totals = run.scorecard?.totals ?? { total: 0, passed: 0, failed: 0, pct: 0 }
return (
<div className="space-y-6 p-6 max-w-6xl">
{/* Header */}
<div>
<h1 className="text-xl font-semibold text-gray-900">
MC-Audit: {run.site_name}
</h1>
<p className="text-xs text-gray-500 mt-1">
check_id <code className="bg-gray-100 px-1 rounded">{checkId}</code> ·{' '}
{new Date(run.ts).toLocaleString('de-DE')} · {run.doc_count} Dokumente ·{' '}
{data.mc_count} MC-Eintraege
</p>
</div>
{/* Scorecard */}
<div className="border rounded-lg overflow-hidden">
<div className="px-4 py-3 bg-blue-50 border-b border-blue-100">
<h2 className="text-sm font-medium text-blue-900">
Compliance-Scorecard nach Regulation
<span className="ml-2 text-blue-700 font-semibold text-base">
{totals.pct}%
</span>
<span className="ml-2 text-xs text-blue-600">
({totals.passed} bestanden, {totals.failed} Fail,{' '}
{totals.skipped} skipped {totals.total} gesamt)
</span>
</h2>
</div>
<table className="w-full text-xs">
<thead className="bg-gray-50 text-gray-600">
<tr>
<th className="px-3 py-2 text-left">Regulation</th>
<th className="px-3 py-2 text-center">Passed</th>
<th className="px-3 py-2 text-center">Failed</th>
<th className="px-3 py-2 text-center">HIGH</th>
<th className="px-3 py-2 text-center">MEDIUM</th>
<th className="px-3 py-2 text-right">Score</th>
</tr>
</thead>
<tbody>
{scorecard.map(row => (
<tr key={row.regulation} className="border-t hover:bg-blue-50/30 cursor-pointer"
onClick={() => setFilterReg(row.regulation === filterReg ? '' : row.regulation)}>
<td className="px-3 py-2 font-medium">{row.regulation}</td>
<td className="px-3 py-2 text-center text-green-700">{row.passed}</td>
<td className="px-3 py-2 text-center text-red-700">{row.failed}</td>
<td className="px-3 py-2 text-center text-red-700">
{(row.severity.HIGH || 0) + (row.severity.CRITICAL || 0)}
</td>
<td className="px-3 py-2 text-center text-amber-700">
{row.severity.MEDIUM || 0}
</td>
<td className={`px-3 py-2 text-right font-semibold ${
row.pct >= 80 ? 'text-green-700' :
row.pct >= 50 ? 'text-amber-700' : 'text-red-700'
}`}>{row.pct}%</td>
</tr>
))}
</tbody>
</table>
</div>
{/* Filters */}
<div className="flex flex-wrap gap-3 items-center text-xs">
<div className="flex gap-1">
{STATUS_FILTERS.map(f => (
<button key={f.value}
onClick={() => setFilterStatus(f.value)}
className={`px-2.5 py-1 rounded-full border ${
filterStatus === f.value
? 'bg-blue-600 text-white border-blue-600'
: 'bg-white text-gray-600 border-gray-200 hover:border-gray-300'
}`}>{f.label}</button>
))}
</div>
<select value={filterDoc} onChange={e => setFilterDoc(e.target.value)}
className="border border-gray-200 rounded px-2 py-1">
<option value="">Alle Doc-Types</option>
{docTypes.map(d => <option key={d} value={d}>{d}</option>)}
</select>
<select value={filterReg} onChange={e => setFilterReg(e.target.value)}
className="border border-gray-200 rounded px-2 py-1">
<option value="">Alle Regulations</option>
{regulations.map(r => <option key={r} value={r}>{r}</option>)}
</select>
<span className="text-gray-500">
{filtered.length} von {allRows.length}
</span>
</div>
{/* Results */}
<div className="border rounded-lg overflow-hidden">
<table className="w-full text-xs">
<thead className="bg-gray-50 text-gray-600">
<tr>
<th className="px-3 py-2 text-left">Status</th>
<th className="px-3 py-2 text-left">Doc</th>
<th className="px-3 py-2 text-left">Regulation</th>
<th className="px-3 py-2 text-left">MC</th>
<th className="px-3 py-2 text-left">Severity</th>
</tr>
</thead>
<tbody>
{filtered.map(row => (
<React.Fragment key={row.id}>
<tr className="border-t cursor-pointer hover:bg-gray-50"
onClick={() => setExpanded(expanded === row.id ? null : row.id)}>
<td className="px-3 py-2">
{row.passed ? (
<span className="text-green-600"></span>
) : row.skipped ? (
<span className="text-gray-400"></span>
) : (
<span className="text-red-600"></span>
)}
</td>
<td className="px-3 py-2 text-gray-700">{row.doc_type}</td>
<td className="px-3 py-2 text-gray-500">{row.regulation || '—'}</td>
<td className="px-3 py-2 text-gray-900">{row.label}</td>
<td className="px-3 py-2">
<span className={`px-2 py-0.5 rounded text-[10px] font-medium ${
SEVERITY_COLOR[row.severity] || 'bg-gray-100'
}`}>{row.severity || '—'}</span>
</td>
</tr>
{expanded === row.id && (
<tr className="bg-gray-50/50">
<td colSpan={5} className="px-3 py-3 text-xs">
<div className="text-gray-500 mb-1">
MC-ID: <code>{row.mc_id}</code>
</div>
{row.matched_text && (
<div className="mb-2">
<span className="text-green-700 font-medium">Treffer: </span>
<span className="font-mono text-gray-700">
"{row.matched_text}"
</span>
</div>
)}
{row.hint && (
<div className="text-amber-700 bg-amber-50 border-l-2 border-amber-200 pl-2 py-1">
{row.hint}
</div>
)}
</td>
</tr>
)}
</React.Fragment>
))}
{filtered.length === 0 && (
<tr>
<td colSpan={5} className="px-3 py-6 text-center text-gray-400">
Keine MCs entsprechen den aktuellen Filtern.
</td>
</tr>
)}
</tbody>
</table>
</div>
</div>
)
}
@@ -428,10 +428,50 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
scanned_html = build_scanned_urls_html(doc_entries)
providers_html = build_provider_list_html(banner_result, vvt_entries)
vvt_html = build_vvt_table_html(cmp_vendors)
# MC scorecard aggregated across ALL docs in this run (DSGVO/TDDDG/
# BGB/...). Sits at the top so the GF sees the regulation-by-
# regulation view before drilling into per-doc details.
from compliance.services.mc_scorecard import build_scorecard
from .agent_doc_check_scorecard import build_scorecard_html
all_mc_checks: list[dict] = []
for r in results:
for c in r.checks:
if c.id.startswith("mc-"):
all_mc_checks.append({
"id": c.id, "label": c.label, "passed": c.passed,
"severity": c.severity, "skipped": c.skipped,
"regulation": c.regulation,
})
scorecard = build_scorecard(all_mc_checks) if all_mc_checks else {}
# Trend: load previous scorecard for the same tenant + domain so the
# email can show delta indicators (A6).
prev_scorecard: dict | None = None
if scorecard:
try:
from compliance.services.compliance_audit_log import (
list_runs_for_tenant,
)
tenant_id_for_trend = req.recipient or ""
base_domain_for_trend = _extract_domain(doc_entries) or ""
prev_runs = list_runs_for_tenant(
tenant_id_for_trend,
base_domain=base_domain_for_trend,
limit=1,
)
if prev_runs:
prev_scorecard = prev_runs[0].get("scorecard")
except Exception as e:
logger.debug("trend lookup skipped: %s", e)
scorecard_html = (
build_scorecard_html(scorecard, previous_scorecard=prev_scorecard)
if scorecard else ""
)
report_html = build_html_report(results, None)
profile_html = _build_profile_html(profile)
full_html = (
summary_html + scanned_html + profile_html
summary_html + scanned_html + profile_html + scorecard_html
+ providers_html + vvt_html + report_html
)
@@ -452,6 +492,7 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
# Step 7: Store result
response = {
"check_id": check_id,
"results": [_result_to_dict(r) for r in results],
"business_profile": profile_dict,
"extracted_profile": extracted_profile,
@@ -474,6 +515,45 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
_compliance_check_jobs[check_id]["progress"] = "Fertig"
_compliance_check_jobs[check_id]["progress_pct"] = 100
# Persist to sidecar SQLite audit log — enables /audit endpoints
# (A5 admin tab) and trend view (A6). Best-effort; failures here
# do not affect the user-facing response.
try:
from compliance.services.compliance_audit_log import record_check_run
from compliance.services.mc_scorecard import full_audit_records
audit_rows: list[dict] = []
for r in results:
doc_mc = [c for c in r.checks if c.id.startswith("mc-")]
audit_rows.extend(full_audit_records(
[{"id": c.id, "label": c.label, "passed": c.passed,
"severity": c.severity, "skipped": c.skipped,
"regulation": c.regulation, "matched_text": c.matched_text,
"hint": c.hint, "level": c.level}
for c in doc_mc],
check_id=check_id,
doc_type=r.doc_type,
))
record_check_run(
check_id=check_id,
tenant_id=req.recipient or "",
site_name=site_name,
base_domain=domain or "",
doc_count=doc_count,
scorecard=scorecard,
vvt_summary={
"total": len(cmp_vendors),
"internal": sum(1 for v in cmp_vendors
if (v.get("recipient_type") or "").upper()
in ("INTERNAL", "GROUP_COMPANY")),
"external": sum(1 for v in cmp_vendors
if (v.get("recipient_type") or "").upper()
in ("PROCESSOR", "CONTROLLER")),
},
mc_records=audit_rows,
)
except Exception as e:
logger.warning("Audit persistence skipped: %s", e)
except Exception as e:
logger.error("Compliance check %s failed: %s", check_id, e, exc_info=True)
_compliance_check_jobs[check_id]["status"] = "failed"
@@ -1060,3 +1140,51 @@ def _build_profile_html(profile) -> str:
# Cross-check extracted to compliance.services.banner_cookie_cross_check
from compliance.services.banner_cookie_cross_check import cross_check_banner_vs_cookie as _cross_check_banner_vs_cookie
# ── Admin: audit drill-down (A5) + trend view (A6) ──────────────────
@router.get("/audit/{check_id}")
async def audit_drill_down(
check_id: str,
doc_type: str = "",
regulation: str = "",
only_failed: bool = False,
):
"""Return scorecard + filterable MC results for a single check run.
Frontend uses this to render the /sdk/agent/audit/<check_id> view.
"""
from compliance.services.compliance_audit_log import (
get_check_run, list_mc_results,
)
run = get_check_run(check_id)
if not run:
return {"check_id": check_id, "found": False}
rows = list_mc_results(
check_id,
doc_type=doc_type or None,
regulation=regulation or None,
only_failed=only_failed,
)
return {
"check_id": check_id,
"found": True,
"run": run,
"mc_count": len(rows),
"results": rows,
}
@router.get("/audit/tenant/{tenant_id}")
async def audit_tenant_history(
tenant_id: str,
base_domain: str = "",
limit: int = 30,
):
"""Tenant-level history for the trend view (A6)."""
from compliance.services.compliance_audit_log import list_runs_for_tenant
runs = list_runs_for_tenant(
tenant_id, base_domain=base_domain or None, limit=limit,
)
return {"tenant_id": tenant_id, "count": len(runs), "runs": runs}
@@ -245,6 +245,38 @@ def _render_document(html: list[str], r: DocCheckResult) -> None:
html.append('<div style="padding:8px 16px 12px">')
for c in l1_checks:
_render_l1_check(html, c, l2_by_parent.get(c.id, []))
# Master-Control aggregation: with 1874 MCs evaluated per run,
# rendering every L2 check inline produces ~600 rows per doc and
# makes the email unreadable. Show only top-N severe fails plus a
# one-line summary. Full results live in /sdk/agent/audit/<id>.
from compliance.api.agent_doc_check_scorecard import build_top_fails_html
from compliance.services.mc_scorecard import top_fails
mc_results = [
{"id": c.id, "label": c.label, "passed": c.passed,
"severity": c.severity, "skipped": c.skipped, "hint": c.hint,
"regulation": c.regulation}
for c in r.checks
if c.id.startswith("mc-")
]
if mc_results:
n_total = len(mc_results)
n_passed = sum(1 for x in mc_results if x["passed"])
n_skipped = sum(1 for x in mc_results if x["skipped"])
n_failed = n_total - n_passed - n_skipped
html.append(
f'<div style="margin-top:12px;padding-top:8px;'
f'border-top:1px solid #e5e7eb;font-size:11px;color:#475569">'
f'<strong>Master-Controls:</strong> {n_passed}/'
f'{n_total - n_skipped} bestanden '
f'<span style="color:#dc2626">({n_failed} Fail)</span>'
f'{f" + {n_skipped} nicht anwendbar" if n_skipped else ""}.'
f'</div>'
)
top = top_fails(mc_results, n=10)
html.append(build_top_fails_html(top, r.label))
if r.word_count:
html.append(
f'<div style="font-size:11px;color:#9ca3af;margin-top:8px;'
@@ -53,6 +53,10 @@ class CheckItem(BaseModel):
parent: str | None = None
skipped: bool = False
hint: str = ""
# Regulation + article are filled for MC-sourced items (e.g. 'DSGVO'
# + 'Art. 13 Abs. 1 lit. a'). Used by the mc_scorecard aggregator.
regulation: str = ""
article: str = ""
class DocCheckResult(BaseModel):
@@ -0,0 +1,137 @@
"""
Email rendering for the Master-Control scorecard + top-fails summary.
With all 1874 MCs now evaluated per run (#30 cap removed), the report
must summarise rather than dump everything. This module produces:
- build_scorecard_html(scorecard) compact regulation-by-regulation
table at the top of the email
- build_top_fails_html(fails) top-N severe MC fails as a small
cards block underneath
"""
from __future__ import annotations
def build_scorecard_html(
scorecard: dict,
previous_scorecard: dict | None = None,
) -> str:
"""Render the MC scorecard as an HTML table.
Expects the dict returned by mc_scorecard.build_scorecard. When
`previous_scorecard` is passed (the run right before this one for
the same tenant + domain), each row shows a delta indicator
('+12%', '-3%') so the DSB sees direction-of-travel at a glance.
"""
if not scorecard:
return ""
rows = scorecard.get("by_regulation") or []
totals = scorecard.get("totals") or {}
if not rows:
return ""
prev_by_reg: dict[str, int] = {}
prev_total_pct: int | None = None
if previous_scorecard:
prev_total_pct = int((previous_scorecard.get("totals") or {}).get("pct") or 0)
for r in (previous_scorecard.get("by_regulation") or []):
prev_by_reg[r.get("regulation", "")] = int(r.get("pct", 0))
overall_pct = int(totals.get("pct", 0))
overall_color = ("#16a34a" if overall_pct >= 80 else
"#d97706" if overall_pct >= 50 else "#dc2626")
trend_str = _delta_badge(overall_pct, prev_total_pct) if prev_total_pct is not None else ""
head = (
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
'max-width:700px;margin:0 auto 16px;padding:12px 16px;'
'background:#f0f9ff;border:1px solid #bae6fd;border-radius:8px">'
'<h3 style="margin:0 0 6px;font-size:14px;color:#0369a1">'
'MC-Scorecard (Pflichtangaben aus Master-Controls)</h3>'
f'<p style="margin:0 0 10px;font-size:11px;color:#475569">'
f'<strong style="color:{overall_color};font-size:13px">{overall_pct}%</strong>{trend_str} '
f'gesamt &middot; {totals.get("passed", 0)} bestanden, '
f'{totals.get("failed", 0)} nicht bestanden '
f'({totals.get("total", 0)} MCs ausgewertet, '
f'{totals.get("skipped", 0)} nicht anwendbar).</p>'
)
body = [
'<table style="width:100%;border-collapse:collapse;font-size:11px">'
'<thead><tr style="background:#dbeafe;color:#1e40af;text-align:left">'
'<th style="padding:5px 8px">Regulation</th>'
'<th style="padding:5px 8px;text-align:center">Bestanden</th>'
'<th style="padding:5px 8px;text-align:center">Fail</th>'
'<th style="padding:5px 8px;text-align:center">HIGH</th>'
'<th style="padding:5px 8px;text-align:center">MEDIUM</th>'
'<th style="padding:5px 8px;text-align:right">Score</th>'
'</tr></thead><tbody>'
]
for r in rows:
pct = int(r.get("pct", 0))
color = ("#16a34a" if pct >= 80 else
"#d97706" if pct >= 50 else "#dc2626")
sev = r.get("severity") or {}
prev = prev_by_reg.get(r["regulation"])
trend = _delta_badge(pct, prev)
body.append(
f'<tr style="border-top:1px solid #bfdbfe">'
f'<td style="padding:5px 8px;color:#1e293b">{r["regulation"]}</td>'
f'<td style="padding:5px 8px;text-align:center;color:#16a34a">'
f'{r["passed"]}</td>'
f'<td style="padding:5px 8px;text-align:center;color:#dc2626">'
f'{r["failed"]}</td>'
f'<td style="padding:5px 8px;text-align:center;color:#dc2626">'
f'{sev.get("HIGH", 0) + sev.get("CRITICAL", 0)}</td>'
f'<td style="padding:5px 8px;text-align:center;color:#d97706">'
f'{sev.get("MEDIUM", 0)}</td>'
f'<td style="padding:5px 8px;text-align:right;font-weight:600;'
f'color:{color}">{pct}%{trend}</td>'
f'</tr>'
)
body.append('</tbody></table></div>')
return head + "".join(body)
def _delta_badge(current: int, previous: int | None) -> str:
"""Render a small ±N% badge next to a percentage when previous is known."""
if previous is None or previous == current:
return ""
delta = current - previous
if delta > 0:
color, sign = "#16a34a", "+"
else:
color, sign = "#dc2626", ""
return (f' <span style="font-size:10px;color:{color};font-weight:500">'
f'({sign}{delta}%)</span>')
def build_top_fails_html(fails: list[dict], doc_label: str) -> str:
"""Render top-N severe MC fails as a compact card list per document."""
if not fails:
return ""
out = [
'<div style="margin:10px 0 8px;padding:10px 12px;'
'background:#fef2f2;border-left:3px solid #fca5a5;border-radius:4px">'
f'<strong style="font-size:12px;color:#991b1b">'
f'Top-Auffaelligkeiten in {doc_label} ({len(fails)})</strong>'
'<ul style="margin:6px 0 0 18px;padding:0;font-size:11px;'
'color:#7f1d1d">'
]
for f in fails:
sev = f.get("severity") or "MEDIUM"
reg = f.get("regulation") or ""
reg_str = f' <span style="color:#94a3b8">[{reg}]</span>' if reg else ""
label = f.get("label") or "Unnamed"
hint = (f.get("hint") or "")[:200]
out.append(
f'<li style="margin-bottom:4px">'
f'<span style="color:#dc2626;font-weight:600">{sev}</span>'
f'{reg_str} &mdash; {label}'
+ (f'<div style="font-size:10px;color:#94a3b8;margin-top:1px">'
f'{hint}</div>' if hint else "")
+ '</li>'
)
out.append('</ul></div>')
return "".join(out)
@@ -0,0 +1,196 @@
"""
Compliance-Check Audit Log sidecar SQLite persistence.
Every compliance-check run flattens its MC results into rows here so
we have:
- per-tenant history of scorecards (Task A6 trend view)
- drill-down on individual MCs for the admin frontend (Task A5)
- export-ability (DSB receives JSON attachment derived from this)
Sidecar SQLite (`/data/compliance_audits.db`) instead of a new table in
the compliance schema, because the repo policy forbids new migrations
without explicit DB-owner sign-off (see CLAUDE.md guardrails).
"""
from __future__ import annotations
import json
import logging
import os
import sqlite3
from datetime import datetime, timezone
from pathlib import Path
logger = logging.getLogger(__name__)
DB_PATH = os.getenv("COMPLIANCE_AUDIT_DB", "/data/compliance_audits.db")
def _ensure_db() -> None:
Path(DB_PATH).parent.mkdir(parents=True, exist_ok=True)
with sqlite3.connect(DB_PATH) as conn:
conn.executescript("""
CREATE TABLE IF NOT EXISTS check_runs (
check_id TEXT PRIMARY KEY,
ts TEXT NOT NULL,
tenant_id TEXT,
site_name TEXT,
base_domain TEXT,
doc_count INTEGER,
scorecard TEXT, -- JSON {by_regulation, totals}
vvt_summary TEXT -- JSON {total, internal, external, critical}
);
CREATE INDEX IF NOT EXISTS idx_runs_tenant ON check_runs(tenant_id, ts);
CREATE INDEX IF NOT EXISTS idx_runs_domain ON check_runs(base_domain, ts);
CREATE TABLE IF NOT EXISTS mc_results (
id INTEGER PRIMARY KEY AUTOINCREMENT,
check_id TEXT NOT NULL,
doc_type TEXT,
mc_id TEXT,
label TEXT,
passed INTEGER,
skipped INTEGER,
severity TEXT,
regulation TEXT,
matched_text TEXT,
hint TEXT
);
CREATE INDEX IF NOT EXISTS idx_mc_check ON mc_results(check_id);
CREATE INDEX IF NOT EXISTS idx_mc_reg ON mc_results(regulation, passed);
""")
def record_check_run(
check_id: str,
tenant_id: str,
site_name: str,
base_domain: str,
doc_count: int,
scorecard: dict,
vvt_summary: dict | None = None,
mc_records: list[dict] | None = None,
) -> None:
"""Persist one check run + all its MC rows. Idempotent on check_id."""
try:
_ensure_db()
ts = datetime.now(timezone.utc).isoformat()
with sqlite3.connect(DB_PATH) as conn:
conn.execute(
"INSERT OR REPLACE INTO check_runs "
"(check_id, ts, tenant_id, site_name, base_domain, doc_count, "
" scorecard, vvt_summary) VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
(
check_id, ts, tenant_id, site_name, base_domain, doc_count,
json.dumps(scorecard, ensure_ascii=False),
json.dumps(vvt_summary or {}, ensure_ascii=False),
),
)
# Clear old rows for the same check_id before re-inserting (idempotency)
conn.execute("DELETE FROM mc_results WHERE check_id=?", (check_id,))
if mc_records:
conn.executemany(
"INSERT INTO mc_results "
"(check_id, doc_type, mc_id, label, passed, skipped, "
" severity, regulation, matched_text, hint) "
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
[
(
r.get("check_id", check_id),
r.get("doc_type", ""),
r.get("mc_id", ""),
(r.get("label") or "")[:300],
1 if r.get("passed") else 0,
1 if r.get("skipped") else 0,
(r.get("severity") or "").upper(),
r.get("regulation") or "",
(r.get("matched_text") or "")[:500],
(r.get("hint") or "")[:500],
)
for r in mc_records
],
)
conn.commit()
logger.info("Audit recorded: check_id=%s mc_rows=%d",
check_id, len(mc_records or []))
except Exception as e:
logger.warning("Audit persistence failed for %s: %s", check_id, e)
# ── Read API (used by the admin endpoints + trend view) ─────────────
def get_check_run(check_id: str) -> dict | None:
try:
_ensure_db()
with sqlite3.connect(DB_PATH) as conn:
conn.row_factory = sqlite3.Row
row = conn.execute(
"SELECT * FROM check_runs WHERE check_id=?", (check_id,),
).fetchone()
if not row:
return None
d = dict(row)
d["scorecard"] = json.loads(d.get("scorecard") or "{}")
d["vvt_summary"] = json.loads(d.get("vvt_summary") or "{}")
return d
except Exception as e:
logger.warning("get_check_run failed: %s", e)
return None
def list_mc_results(
check_id: str,
doc_type: str | None = None,
regulation: str | None = None,
only_failed: bool = False,
) -> list[dict]:
try:
_ensure_db()
where = ["check_id = ?"]
params: list = [check_id]
if doc_type:
where.append("doc_type = ?")
params.append(doc_type)
if regulation:
where.append("regulation = ?")
params.append(regulation)
if only_failed:
where.append("passed = 0 AND skipped = 0")
sql = ("SELECT * FROM mc_results WHERE " + " AND ".join(where)
+ " ORDER BY severity, label")
with sqlite3.connect(DB_PATH) as conn:
conn.row_factory = sqlite3.Row
rows = conn.execute(sql, params).fetchall()
return [dict(r) for r in rows]
except Exception as e:
logger.warning("list_mc_results failed: %s", e)
return []
def list_runs_for_tenant(
tenant_id: str,
base_domain: str | None = None,
limit: int = 30,
) -> list[dict]:
try:
_ensure_db()
where = ["tenant_id = ?"]
params: list = [tenant_id]
if base_domain:
where.append("base_domain = ?")
params.append(base_domain)
sql = ("SELECT * FROM check_runs WHERE " + " AND ".join(where)
+ " ORDER BY ts DESC LIMIT ?")
params.append(limit)
with sqlite3.connect(DB_PATH) as conn:
conn.row_factory = sqlite3.Row
rows = conn.execute(sql, params).fetchall()
out = []
for r in rows:
d = dict(r)
d["scorecard"] = json.loads(d.get("scorecard") or "{}")
out.append(d)
return out
except Exception as e:
logger.warning("list_runs_for_tenant failed: %s", e)
return []
@@ -0,0 +1,151 @@
"""
Master-Control Scorecard group + summarise MC results.
With max_controls=0 (#30 fix) every doc-check now evaluates 75-571 MCs
per document. Rendering all of them verbatim makes the email + frontend
unreadable. This module produces three structured artefacts:
1. `build_scorecard(check_results)` per-regulation aggregate (PASS /
FAIL / SKIP counts + severity histogram + compliance %)
2. `top_fails(check_results, n=10)` top-N failed MCs ranked by
severity then absence of evidence
3. `full_audit_records(check_results, check_id, tenant_id)` flat
list ready for SQLite persistence + JSON export
The functions are pure no DB / network so they're cheap to call
from inside the route and unit-testable.
"""
from __future__ import annotations
import logging
from collections import defaultdict
from datetime import datetime, timezone
logger = logging.getLogger(__name__)
# Severity order: CRITICAL > HIGH > MEDIUM > LOW > INFO
_SEV_RANK = {"CRITICAL": 0, "HIGH": 1, "MEDIUM": 2, "LOW": 3, "INFO": 4}
def build_scorecard(check_results: list[dict]) -> dict:
"""Aggregate per-regulation pass/fail/skip + severity buckets.
Args:
check_results: list of dicts, each typically a CheckItem-like
record with keys: id, label, passed, severity, skipped,
regulation, doc_type.
Returns:
{
"by_regulation": [
{"regulation": "DSGVO", "total": 193, "passed": 167,
"failed": 24, "skipped": 2, "pct": 87,
"severity": {"HIGH": 22, "MEDIUM": 2}}
],
"totals": {"total": 1874, "passed": 1300, "failed": 540,
"skipped": 34, "pct": 70},
}
"""
buckets: dict[str, dict] = defaultdict(
lambda: {"total": 0, "passed": 0, "failed": 0, "skipped": 0,
"severity": defaultdict(int)},
)
for r in check_results or []:
reg = (r.get("regulation") or "").strip() or ""
b = buckets[reg]
b["total"] += 1
if r.get("skipped"):
b["skipped"] += 1
elif r.get("passed"):
b["passed"] += 1
else:
b["failed"] += 1
sev = (r.get("severity") or "MEDIUM").upper()
b["severity"][sev] += 1
rows = []
grand_total = grand_passed = grand_failed = grand_skipped = 0
for reg, b in buckets.items():
# Convert defaultdict for serialisability
sev_dict = dict(b["severity"])
active = b["total"] - b["skipped"]
pct = round(b["passed"] / active * 100) if active else 0
rows.append({
"regulation": reg,
"total": b["total"],
"passed": b["passed"],
"failed": b["failed"],
"skipped": b["skipped"],
"pct": pct,
"severity": sev_dict,
})
grand_total += b["total"]
grand_passed += b["passed"]
grand_failed += b["failed"]
grand_skipped += b["skipped"]
rows.sort(key=lambda r: (-r["failed"], r["regulation"]))
grand_active = grand_total - grand_skipped
grand_pct = round(grand_passed / grand_active * 100) if grand_active else 0
return {
"by_regulation": rows,
"totals": {
"total": grand_total, "passed": grand_passed,
"failed": grand_failed, "skipped": grand_skipped,
"pct": grand_pct,
},
}
def top_fails(check_results: list[dict], n: int = 10) -> list[dict]:
"""Return top-N failing MCs sorted by severity then label.
Skipped + passed MCs are excluded. INFO severity is excluded by
default since those are guidance, not findings.
"""
fails = [
r for r in (check_results or [])
if not r.get("passed") and not r.get("skipped")
and (r.get("severity") or "").upper() != "INFO"
]
fails.sort(key=lambda r: (
_SEV_RANK.get((r.get("severity") or "MEDIUM").upper(), 5),
r.get("label", ""),
))
return fails[:n]
def full_audit_records(
check_results: list[dict],
check_id: str,
tenant_id: str = "",
doc_type: str = "",
) -> list[dict]:
"""Flatten check results into rows ready for SQLite persistence.
Returns one record per MC. Keeps the original fields plus
check_id + doc_type + tenant_id + ts.
"""
ts = datetime.now(timezone.utc).isoformat()
out: list[dict] = []
for r in check_results or []:
out.append({
"check_id": check_id,
"tenant_id": tenant_id,
"doc_type": doc_type,
"ts": ts,
"mc_id": r.get("id", ""),
"label": (r.get("label") or "")[:300],
"passed": bool(r.get("passed")),
"skipped": bool(r.get("skipped")),
"severity": (r.get("severity") or "").upper(),
"regulation": r.get("regulation") or "",
"matched_text": (r.get("matched_text") or "")[:500],
"hint": (r.get("hint") or "")[:500],
"level": int(r.get("level") or 1),
})
return out
@@ -171,6 +171,8 @@ def _check_mc_deterministic(text_lower: str, mc: dict) -> Optional[dict]:
"hint": question if not passed else "",
"source": "master_control",
"criteria_met": f"{criteria_met}/{total_criteria}",
"regulation": mc.get("regulation") or "",
"article": mc.get("article") or "",
}
@@ -282,8 +284,8 @@ async def _load_controls(doc_type: str, db_url: str, limit: int) -> list[dict]:
return []
try:
query = """SELECT id, control_id, title, regulation, check_question,
pass_criteria, fail_criteria, severity
query = """SELECT id, control_id, title, regulation, article,
check_question, pass_criteria, fail_criteria, severity
FROM compliance.doc_check_controls
WHERE doc_type = $1
ORDER BY severity DESC, title"""
@@ -0,0 +1,181 @@
"""
LLM-tagged regulation backfill v2.
The regex pass (backfill_mc_regulation.py) covers ~13% of MCs that
quote a norm inline ('Art. X DSGVO', '§Y TDDDG'). The remaining 1636
are abstract security/process controls that don't cite a norm by name
but DO map to one (e.g. 'Pseudonymisierung' -> DSGVO Art. 32).
This script asks Qwen (Ollama) in batches of 25 to classify each MC.
Output is constrained: one of DSGVO / TDDDG / BGB / HGB / AO / TMG /
BDSG / MStV / UWG / VSBG / NIS2 / ISO27001 / BSI-GS / NIST / OTHER /
NONE plus an optional article hint.
Run inside the backend container:
docker exec bp-compliance-backend python3 \\
/app/scripts/backfill_mc_regulation_llm.py [--dry-run] [--limit N]
"""
from __future__ import annotations
import argparse
import asyncio
import json
import logging
import os
import re
import sys
import asyncpg
import httpx
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger(__name__)
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434")
MODEL = os.getenv("CMP_LLM_MODEL", "qwen3:30b-a3b")
BATCH = 25
_ALLOWED = {
"DSGVO", "TDDDG", "BGB", "HGB", "AO", "TMG", "BDSG", "MStV", "UWG",
"VSBG", "PAngV", "GwG", "NIS2", "ISO27001", "BSI-GS", "NIST", "TKG",
"EU-VO", "OTHER", "NONE",
}
_SYSTEM = (
"Du klassifizierst Compliance Master-Controls nach der wichtigsten "
"rechtlichen oder normativen Grundlage. Antworte AUSSCHLIESSLICH "
'mit JSON: {"items": [{"id":"<id>", "regulation":"<one of '
'DSGVO|TDDDG|BGB|HGB|AO|TMG|BDSG|MStV|UWG|VSBG|PAngV|GwG|NIS2|'
'ISO27001|BSI-GS|NIST|TKG|EU-VO|OTHER|NONE>", "article":"<optional '
'kurze Norm-Referenz oder leer>"}]}. '
"Eine MC -> EIN Eintrag. Wenn keine Norm passt: NONE. "
"Keine Erklaerung, kein Markdown."
)
def _build_user_prompt(batch: list[dict]) -> str:
lines = ["Klassifiziere diese Master-Controls:"]
for r in batch:
title = (r["title"] or "")[:120]
q = (r["check_question"] or "")[:200]
lines.append(f'- id={r["id_short"]} | {title}')
if q:
lines.append(f" -> {q}")
return "\n".join(lines)
async def _ask_llm(batch: list[dict]) -> dict[str, dict]:
payload = {
"model": MODEL,
"messages": [
{"role": "system", "content": _SYSTEM},
{"role": "user", "content": _build_user_prompt(batch)},
],
"stream": False, "format": "json",
"options": {"temperature": 0.0, "num_predict": 2500},
}
try:
async with httpx.AsyncClient(timeout=120.0) as client:
resp = await client.post(
f"{OLLAMA_URL.rstrip('/')}/api/chat", json=payload,
)
resp.raise_for_status()
content = (resp.json().get("message") or {}).get("content", "")
except Exception as e:
logger.warning("LLM call failed: %s", e)
return {}
if not content:
return {}
# Strip qwen thinking
content = re.sub(r"<think>.*?</think>", "", content, flags=re.DOTALL).strip()
try:
obj = json.loads(content)
except Exception:
return {}
items = obj.get("items") if isinstance(obj, dict) else None
if not isinstance(items, list):
return {}
out: dict[str, dict] = {}
for it in items:
if not isinstance(it, dict):
continue
sid = str(it.get("id", "")).strip()
reg = str(it.get("regulation", "")).strip().upper()
art = str(it.get("article", "")).strip()
if not sid or reg not in _ALLOWED:
continue
if reg == "NONE":
continue
out[sid] = {"regulation": reg, "article": art[:120]}
return out
async def main(dry_run: bool, limit: int) -> None:
db = os.getenv("DATABASE_URL")
if not db:
print("DATABASE_URL not set", file=sys.stderr)
sys.exit(1)
conn = await asyncpg.connect(db)
sql = ("SELECT id, title, check_question FROM compliance.doc_check_controls "
"WHERE regulation IS NULL ORDER BY id")
if limit > 0:
sql += f" LIMIT {limit}"
rows = await conn.fetch(sql)
print(f"{len(rows)} MCs without regulation — calling LLM in batches of {BATCH}")
by_short: dict[str, str] = {}
batches: list[list[dict]] = []
cur: list[dict] = []
for r in rows:
sid = str(r["id"])[:8]
by_short[sid] = str(r["id"])
cur.append({"id_short": sid, "title": r["title"],
"check_question": r["check_question"]})
if len(cur) >= BATCH:
batches.append(cur); cur = []
if cur:
batches.append(cur)
updates: list[tuple[str, str, str]] = [] # (regulation, article, uuid)
hits: dict[str, int] = {}
for i, batch in enumerate(batches, 1):
logger.info("batch %d/%d (%d items)", i, len(batches), len(batch))
res = await _ask_llm(batch)
for short, m in res.items():
uuid = by_short.get(short)
if not uuid:
continue
updates.append((m["regulation"], m["article"], uuid))
hits[m["regulation"]] = hits.get(m["regulation"], 0) + 1
print(f"\nLLM classified: {sum(hits.values())} / {len(rows)}")
for k, v in sorted(hits.items(), key=lambda x: -x[1]):
print(f" {k:10s} {v:>5}")
if dry_run:
print("\nDRY RUN — no UPDATE issued.")
await conn.close()
return
for i in range(0, len(updates), 200):
chunk = updates[i:i + 200]
await conn.executemany(
"UPDATE compliance.doc_check_controls "
"SET regulation = $1, article = $2 WHERE id = $3::uuid",
chunk,
)
print(f"\nApplied {len(updates)} updates.")
await conn.close()
if __name__ == "__main__":
ap = argparse.ArgumentParser()
ap.add_argument("--dry-run", action="store_true")
ap.add_argument("--limit", type=int, default=0,
help="process only N MCs (0 = all)")
args = ap.parse_args()
asyncio.run(main(args.dry_run, args.limit))
+4
View File
@@ -13,6 +13,7 @@ networks:
volumes:
dsms_data:
cmp-data: # consent-tester: CMP discovery log + auto-promoted modules
compliance-audit: # backend-compliance: per-check audit log (SQLite)
services:
@@ -124,6 +125,9 @@ services:
OVH_LLM_URL: ${OVH_LLM_URL:-}
OVH_LLM_KEY: ${OVH_LLM_KEY:-}
OVH_LLM_MODEL: ${OVH_LLM_MODEL:-}
COMPLIANCE_AUDIT_DB: /data/compliance_audits.db
volumes:
- compliance-audit:/data
extra_hosts:
- "host.docker.internal:host-gateway"
depends_on: