Merge feat/zeroclaw-compliance-agent into main
Brings all compliance doc-check features: - 162 regex checks + 1874 Master Controls - LLM-agnostic agent with tool calling - Banner check (46 checks, 30 CMPs, stealth, Shadow DOM) - Impressum check (24 checks) - Deep consent verification (DataLayer, GCM, TCF) - CMP E2E tests (39 tests) - HTML email reports, FAQ, persistent history Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -63,6 +63,13 @@ _ROUTER_MODULES = [
|
||||
"tom_mapping_routes",
|
||||
"llm_audit_routes",
|
||||
"assertion_routes",
|
||||
"org_role_routes",
|
||||
"document_review_routes",
|
||||
"banner_analytics_routes",
|
||||
"banner_ab_routes",
|
||||
"compliance_report_routes",
|
||||
"whistleblower_routes",
|
||||
"tcf_routes",
|
||||
]
|
||||
|
||||
_loaded_count = 0
|
||||
|
||||
@@ -15,6 +15,14 @@ from fastapi import APIRouter
|
||||
from pydantic import BaseModel
|
||||
|
||||
from compliance.services.smtp_sender import send_email
|
||||
from compliance.services.intake_extractor import extract_intake_flags_from_services, flags_to_ucca_intake
|
||||
from compliance.services.relevance_filter import filter_controls
|
||||
from compliance.services.website_compliance_checks import (
|
||||
check_website_compliance as _check_website_compliance,
|
||||
FollowUpQuestion,
|
||||
to_string_list as _to_string_list,
|
||||
risk_to_escalation as _risk_to_escalation,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -77,21 +85,32 @@ async def analyze_url(req: AnalyzeRequest):
|
||||
# Step 2: Classify via SDK LLM
|
||||
classification = await _classify(client, text)
|
||||
|
||||
# Step 3: Assess via UCCA
|
||||
assessment = await _assess(client, text, classification)
|
||||
# Step 3: Detect services from HTML (deterministic, no LLM needed)
|
||||
from compliance.services.service_registry import SERVICE_REGISTRY
|
||||
detected_services = []
|
||||
html_lower = raw_html.lower()
|
||||
for pattern, meta in SERVICE_REGISTRY.items():
|
||||
if re.search(pattern, html_lower):
|
||||
detected_services.append(meta)
|
||||
|
||||
# Step 4: Determine role
|
||||
# Step 4: Derive intake flags from DETECTED SERVICES (not from text!)
|
||||
intake_flags = extract_intake_flags_from_services(detected_services)
|
||||
|
||||
# Step 5: Assess via UCCA with service-derived flags
|
||||
assessment = await _assess(client, text, classification, intake_flags)
|
||||
|
||||
# Step 5: Determine role
|
||||
esc_level = assessment.get("escalation_level", "E0")
|
||||
role = ESCALATION_ROLES.get(esc_level, ESCALATION_ROLES["E0"])
|
||||
|
||||
# Step 5: Website compliance checks (§312k BGB etc.)
|
||||
# Step 6: Website compliance checks (§312k BGB etc.)
|
||||
site_findings, follow_ups = await _check_website_compliance(client, req.url, raw_html)
|
||||
|
||||
# Step 6: Merge findings
|
||||
# Step 7: Merge and filter findings/controls
|
||||
findings = assessment.get("triggered_rules", [])
|
||||
controls = assessment.get("required_controls", [])
|
||||
findings_str = _to_string_list(findings) + site_findings
|
||||
controls_str = _to_string_list(controls)
|
||||
controls_str = filter_controls(_to_string_list(controls), text, intake_flags)
|
||||
|
||||
# Escalate if website checks found issues
|
||||
if site_findings and esc_level == "E0":
|
||||
@@ -105,7 +124,7 @@ async def analyze_url(req: AnalyzeRequest):
|
||||
email_result = send_email(
|
||||
recipient=req.recipient,
|
||||
subject=f"[{mode_label}] Compliance-Finding: {classification} — {req.url[:60]}",
|
||||
body_html=f"<div>{summary}</div>",
|
||||
body_html=summary,
|
||||
)
|
||||
|
||||
return AnalyzeResponse(
|
||||
@@ -179,34 +198,24 @@ async def _classify(client: httpx.AsyncClient, text: str) -> str:
|
||||
return "other"
|
||||
|
||||
|
||||
async def _assess(client: httpx.AsyncClient, text: str, classification: str) -> dict:
|
||||
async def _assess(client: httpx.AsyncClient, text: str, classification: str, intake_flags: dict | None = None) -> dict:
|
||||
"""Run UCCA assessment via SDK. Returns flattened result dict."""
|
||||
try:
|
||||
# UCCA expects boolean intake flags, not string categories
|
||||
# Use LLM-extracted flags if available, otherwise minimal defaults
|
||||
if intake_flags:
|
||||
ucca_intake = flags_to_ucca_intake(intake_flags)
|
||||
else:
|
||||
ucca_intake = {
|
||||
"data_types": {"personal_data": True},
|
||||
"purpose": {},
|
||||
"automation": "manual",
|
||||
"outputs": {},
|
||||
}
|
||||
|
||||
resp = await client.post(f"{SDK_URL}/sdk/v1/ucca/assess", headers=SDK_HEADERS, json={
|
||||
"use_case_text": text[:3000],
|
||||
"domain": classification,
|
||||
"data_types": {
|
||||
"personal_data": True,
|
||||
"customer_data": True,
|
||||
"location_data": "tracking" in text.lower() or "standort" in text.lower(),
|
||||
"images": False,
|
||||
"biometric_data": "biometrisch" in text.lower(),
|
||||
"minor_data": "kinder" in text.lower() or "minderjährig" in text.lower(),
|
||||
},
|
||||
"purpose": {
|
||||
"marketing": "werbung" in text.lower() or "marketing" in text.lower(),
|
||||
"analytics": "analyse" in text.lower() or "analytics" in text.lower(),
|
||||
"profiling": "profil" in text.lower() or "personalis" in text.lower(),
|
||||
"automation": False,
|
||||
"customer_support": False,
|
||||
},
|
||||
"automation": "partially_automated",
|
||||
"outputs": {
|
||||
"content_generation": False,
|
||||
"recommendations_to_users": "empfehl" in text.lower(),
|
||||
"data_export": "export" in text.lower() or "uebertrag" in text.lower(),
|
||||
},
|
||||
**ucca_intake,
|
||||
})
|
||||
data = resp.json()
|
||||
# Flatten: UCCA wraps result under "assessment" and "result"
|
||||
@@ -227,126 +236,27 @@ async def _assess(client: httpx.AsyncClient, text: str, classification: str) ->
|
||||
return {"risk_level": "unknown", "risk_score": 0, "escalation_level": "E0"}
|
||||
|
||||
|
||||
async def _check_website_compliance(
|
||||
client: httpx.AsyncClient, url: str, html: str,
|
||||
) -> tuple[list[str], list[FollowUpQuestion]]:
|
||||
"""Scan public website for consumer protection compliance (§312k BGB etc.)."""
|
||||
findings: list[str] = []
|
||||
follow_ups: list[FollowUpQuestion] = []
|
||||
html_lower = html.lower()
|
||||
base_domain = re.sub(r"https?://([^/]+).*", r"\1", url)
|
||||
|
||||
# --- §312k BGB: Kündigungsbutton ---
|
||||
cancel_patterns = [
|
||||
r'href="[^"]*(?:kuendig|kündig|cancel|vertrag.?beenden|abo.?beenden|mitgliedschaft.?beenden)[^"]*"',
|
||||
r'(?:kündigen|kuendigen|vertrag beenden|abo beenden|mitgliedschaft kündigen)',
|
||||
]
|
||||
has_cancel_link = any(re.search(p, html_lower) for p in cancel_patterns)
|
||||
|
||||
# Also check common cancel URLs
|
||||
cancel_urls_to_probe = [
|
||||
f"https://{base_domain}/kuendigen",
|
||||
f"https://{base_domain}/cancel",
|
||||
f"https://{base_domain}/vertrag-kuendigen",
|
||||
f"https://{base_domain}/abo-kuendigen",
|
||||
f"https://{base_domain}/account/cancel",
|
||||
]
|
||||
if not has_cancel_link:
|
||||
for probe_url in cancel_urls_to_probe:
|
||||
try:
|
||||
probe = await client.head(probe_url, follow_redirects=True, timeout=5.0)
|
||||
if probe.status_code < 400:
|
||||
has_cancel_link = True
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if not has_cancel_link:
|
||||
findings.append(
|
||||
"[§312k BGB] Kein oeffentlich sichtbarer Kuendigungsbutton gefunden. "
|
||||
"Seit 01.07.2022 muessen online geschlossene Vertraege mit max. 2 Klicks kuendbar sein."
|
||||
)
|
||||
follow_ups.append(FollowUpQuestion(
|
||||
id="cancel_button_312k",
|
||||
question="Koennen Sie nach Login im Kundenbereich innerhalb von 2 Klicks Ihren Vertrag kuendigen?",
|
||||
legal_basis="§ 312k BGB (Kuendigungsbutton), Omnibus-Richtlinie (EU) 2019/2161",
|
||||
severity="high",
|
||||
finding_if_no=(
|
||||
"[§312k BGB] VERSTOSS: Kein funktionaler Kuendigungsbutton vorhanden. "
|
||||
"Der Anbieter ist verpflichtet, einen leicht auffindbaren Kuendigungsbutton "
|
||||
"bereitzustellen (max. 2 Klicks). Ein Zwang zur telefonischen Kuendigung "
|
||||
"oder Kuendigung per Brief ist rechtswidrig."
|
||||
),
|
||||
))
|
||||
|
||||
# --- Impressumspflicht (§5 TMG / §18 MStV) ---
|
||||
imprint_patterns = [
|
||||
r'href="[^"]*(?:impressum|imprint|legal.?notice|about.?us/legal)[^"]*"',
|
||||
r'>impressum<',
|
||||
]
|
||||
has_imprint = any(re.search(p, html_lower) for p in imprint_patterns)
|
||||
if not has_imprint:
|
||||
findings.append(
|
||||
"[§5 TMG] Kein Impressum-Link auf der Seite gefunden. "
|
||||
"Geschaeftsmaessige Online-Dienste muessen ein leicht erreichbares Impressum bereitstellen."
|
||||
)
|
||||
|
||||
# --- Datenschutzerklaerung verlinkt? ---
|
||||
privacy_patterns = [
|
||||
r'href="[^"]*(?:datenschutz|privacy|dsgvo)[^"]*"',
|
||||
r'>datenschutz<',
|
||||
]
|
||||
has_privacy = any(re.search(p, html_lower) for p in privacy_patterns)
|
||||
if not has_privacy:
|
||||
findings.append(
|
||||
"[Art. 13 DSGVO] Kein Link zur Datenschutzerklaerung gefunden. "
|
||||
"Nutzer muessen ueber die Verarbeitung personenbezogener Daten informiert werden."
|
||||
)
|
||||
|
||||
# --- Cookie-Consent-Banner ---
|
||||
cookie_patterns = [
|
||||
r'(?:cookie.?consent|cookie.?banner|consent.?manager|didomi|cookiebot|onetrust|usercentrics)',
|
||||
r'(?:gdpr|dsgvo).?(?:consent|einwilligung)',
|
||||
]
|
||||
has_cookie_consent = any(re.search(p, html_lower) for p in cookie_patterns)
|
||||
if not has_cookie_consent:
|
||||
follow_ups.append(FollowUpQuestion(
|
||||
id="cookie_consent",
|
||||
question="Wird beim ersten Besuch der Website ein Cookie-Consent-Banner angezeigt?",
|
||||
legal_basis="§ 25 TDDDG (ehem. TTDSG), Art. 5(3) ePrivacy-Richtlinie",
|
||||
severity="medium",
|
||||
finding_if_no=(
|
||||
"[§25 TDDDG] Kein Cookie-Consent-Banner erkannt. "
|
||||
"Vor dem Setzen nicht-essentieller Cookies ist eine Einwilligung erforderlich."
|
||||
),
|
||||
))
|
||||
|
||||
return findings, follow_ups
|
||||
# _check_website_compliance, _to_string_list, _risk_to_escalation
|
||||
# → extracted to compliance/services/website_compliance_checks.py
|
||||
|
||||
|
||||
def _to_string_list(items: list) -> list[str]:
|
||||
"""Convert list of dicts or strings to list of strings."""
|
||||
result = []
|
||||
for item in (items or []):
|
||||
if isinstance(item, dict):
|
||||
# UCCA returns {code, category, description} or {id, name, description}
|
||||
desc = item.get("description", item.get("name", item.get("code", str(item))))
|
||||
code = item.get("code", item.get("id", ""))
|
||||
result.append(f"[{code}] {desc}" if code else str(desc))
|
||||
else:
|
||||
result.append(str(item))
|
||||
return result
|
||||
DOC_TYPE_LABELS = {
|
||||
"privacy_policy": "Datenschutzerklaerung",
|
||||
"cookie_banner": "Cookie-Banner",
|
||||
"terms_of_service": "AGB",
|
||||
"imprint": "Impressum",
|
||||
"dpa": "Auftragsverarbeitung (AVV)",
|
||||
"other": "Sonstiges",
|
||||
}
|
||||
|
||||
|
||||
def _risk_to_escalation(risk_level: str) -> str:
|
||||
"""Map UCCA risk level to escalation level."""
|
||||
mapping = {
|
||||
"MINIMAL": "E0",
|
||||
"LIMITED": "E1",
|
||||
"HIGH": "E2",
|
||||
"UNACCEPTABLE": "E3",
|
||||
}
|
||||
return mapping.get(risk_level.upper() if risk_level else "", "E0")
|
||||
RISK_COLORS = {
|
||||
"MINIMAL": ("#16a34a", "Niedrig"),
|
||||
"LOW": ("#ca8a04", "Gering"),
|
||||
"LIMITED": ("#ea580c", "Mittel"),
|
||||
"HIGH": ("#dc2626", "Hoch"),
|
||||
"UNACCEPTABLE": ("#991b1b", "Kritisch"),
|
||||
}
|
||||
|
||||
|
||||
def _build_summary(
|
||||
@@ -354,48 +264,54 @@ def _build_summary(
|
||||
findings_str: list[str], controls_str: list[str],
|
||||
mode: str = "post_launch",
|
||||
) -> str:
|
||||
"""Build a German manager summary, adapted to pre/post-launch context."""
|
||||
"""Build HTML summary for email and frontend."""
|
||||
risk = assessment.get("risk_level", "unbekannt")
|
||||
score = assessment.get("risk_score", 0)
|
||||
recommendation = assessment.get("recommendation", "")
|
||||
dsfa = assessment.get("dsfa_recommended", False)
|
||||
is_live = mode == "post_launch"
|
||||
risk_color, risk_label = RISK_COLORS.get(risk, ("#6b7280", risk))
|
||||
doc_label = DOC_TYPE_LABELS.get(classification, classification)
|
||||
|
||||
findings_text = "\n".join(f"- {f}" for f in findings_str[:5]) if findings_str else "Keine"
|
||||
controls_text = "\n".join(f"- {c}" for c in controls_str[:5]) if controls_str else "Keine"
|
||||
|
||||
mode_header = (
|
||||
"PRUEFUNG LIVE-WEBSITE — Das Dokument ist bereits oeffentlich zugaenglich."
|
||||
mode_banner = (
|
||||
'<div style="background:#fef2f2;border-left:4px solid #dc2626;padding:12px 16px;margin-bottom:16px;">'
|
||||
'<strong style="color:#991b1b;">LIVE-WEBSITE</strong> — Das Dokument ist bereits oeffentlich zugaenglich.</div>'
|
||||
if is_live else
|
||||
"INTERNE PRUEFUNG — Das Dokument ist noch nicht veroeffentlicht."
|
||||
'<div style="background:#eff6ff;border-left:4px solid #3b82f6;padding:12px 16px;margin-bottom:16px;">'
|
||||
'<strong style="color:#1e40af;">INTERNE PRUEFUNG</strong> — Dokument noch nicht veroeffentlicht.</div>'
|
||||
)
|
||||
|
||||
parts = [
|
||||
mode_header,
|
||||
"",
|
||||
f"Dokumenttyp: {classification}",
|
||||
f"Quelle: {url}",
|
||||
f"Risikobewertung: {risk} ({score}/100)",
|
||||
f"Zustaendig: {role}",
|
||||
f"DSFA empfohlen: {'Ja' if dsfa else 'Nein'}",
|
||||
"",
|
||||
f"Findings:\n{findings_text}",
|
||||
"",
|
||||
f"Erforderliche Massnahmen:\n{controls_text}",
|
||||
]
|
||||
findings_html = "".join(f'<li style="margin-bottom:4px;">{f}</li>' for f in findings_str[:8]) if findings_str else '<li style="color:#6b7280;">Keine</li>'
|
||||
controls_html = "".join(f'<li style="margin-bottom:4px;">{c}</li>' for c in controls_str[:8]) if controls_str else '<li style="color:#6b7280;">Keine</li>'
|
||||
|
||||
warning = ""
|
||||
if is_live and findings_str:
|
||||
parts.extend([
|
||||
"",
|
||||
"ACHTUNG: Diese Maengel sind bereits oeffentlich sichtbar. "
|
||||
"Sofortige Nachbesserung empfohlen um Abmahnrisiken zu minimieren.",
|
||||
])
|
||||
warning = (
|
||||
'<div style="background:#fef2f2;border:1px solid #fecaca;border-radius:8px;padding:12px 16px;margin-top:16px;">'
|
||||
'<strong style="color:#dc2626;">⚠ ACHTUNG:</strong> Diese Maengel sind bereits oeffentlich sichtbar. '
|
||||
'Sofortige Nachbesserung empfohlen um Abmahnrisiken zu minimieren.</div>'
|
||||
)
|
||||
elif not is_live and controls_str:
|
||||
parts.extend([
|
||||
"",
|
||||
"Empfehlung: Implementieren Sie die erforderlichen Kontrollen vor der Veroeffentlichung.",
|
||||
])
|
||||
warning = (
|
||||
'<div style="background:#f0fdf4;border:1px solid #bbf7d0;border-radius:8px;padding:12px 16px;margin-top:16px;">'
|
||||
'Empfehlung: Implementieren Sie die erforderlichen Kontrollen vor der Veroeffentlichung.</div>'
|
||||
)
|
||||
|
||||
if recommendation:
|
||||
parts.extend(["", f"Weitere Empfehlung: {recommendation}"])
|
||||
return "\n".join(parts)
|
||||
rec_html = f'<p style="color:#475569;margin-top:12px;"><em>{recommendation}</em></p>' if recommendation else ""
|
||||
|
||||
return f"""
|
||||
{mode_banner}
|
||||
<table style="width:100%;border-collapse:collapse;margin-bottom:16px;">
|
||||
<tr><td style="padding:6px 0;color:#64748b;width:180px;">Dokumenttyp</td><td style="padding:6px 0;font-weight:600;">{doc_label}</td></tr>
|
||||
<tr><td style="padding:6px 0;color:#64748b;">Quelle</td><td style="padding:6px 0;"><a href="{url}" style="color:#6366f1;">{url}</a></td></tr>
|
||||
<tr><td style="padding:6px 0;color:#64748b;">Risikobewertung</td><td style="padding:6px 0;"><span style="background:{risk_color};color:white;padding:2px 8px;border-radius:4px;font-size:13px;">{risk_label} ({score}/100)</span></td></tr>
|
||||
<tr><td style="padding:6px 0;color:#64748b;">Zustaendig</td><td style="padding:6px 0;font-weight:600;">{role}</td></tr>
|
||||
<tr><td style="padding:6px 0;color:#64748b;">DSFA empfohlen</td><td style="padding:6px 0;">{'Ja' if dsfa else 'Nein'}</td></tr>
|
||||
</table>
|
||||
<h3 style="color:#1e293b;font-size:15px;margin:16px 0 8px;">Findings</h3>
|
||||
<ul style="margin:0;padding-left:20px;color:#334155;">{findings_html}</ul>
|
||||
<h3 style="color:#1e293b;font-size:15px;margin:16px 0 8px;">Erforderliche Massnahmen</h3>
|
||||
<ul style="margin:0;padding-left:20px;color:#334155;">{controls_html}</ul>
|
||||
{warning}
|
||||
{rec_html}
|
||||
"""
|
||||
|
||||
@@ -0,0 +1,94 @@
|
||||
"""
|
||||
Agent Compare Routes — scan multiple websites and compare compliance posture.
|
||||
|
||||
POST /api/compliance/agent/compare
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import httpx
|
||||
from fastapi import APIRouter
|
||||
from pydantic import BaseModel
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/compliance/agent", tags=["agent"])
|
||||
|
||||
|
||||
class CompareRequest(BaseModel):
|
||||
urls: list[str] # 2-5 URLs to compare
|
||||
mode: str = "post_launch"
|
||||
|
||||
|
||||
class SiteResult(BaseModel):
|
||||
url: str
|
||||
domain: str
|
||||
risk_level: str = ""
|
||||
risk_score: float = 0
|
||||
findings_count: int = 0
|
||||
services_count: int = 0
|
||||
has_impressum: bool = False
|
||||
has_datenschutz: bool = False
|
||||
has_cookie_banner: bool = False
|
||||
has_google_fonts: bool = False
|
||||
tracking_before_consent: int = 0
|
||||
classification: str = ""
|
||||
scan_status: str = "pending"
|
||||
|
||||
|
||||
class CompareResponse(BaseModel):
|
||||
sites: list[SiteResult]
|
||||
compared_at: str
|
||||
|
||||
|
||||
@router.post("/compare", response_model=CompareResponse)
|
||||
async def compare_websites(req: CompareRequest):
|
||||
"""Scan multiple websites and compare their compliance posture."""
|
||||
urls = req.urls[:5] # Max 5
|
||||
|
||||
async def scan_one(url: str) -> SiteResult:
|
||||
domain = url.split("/")[2] if len(url.split("/")) > 2 else url
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
resp = await client.post(
|
||||
"http://localhost:8002/api/compliance/agent/scan",
|
||||
json={"url": url, "mode": req.mode},
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return SiteResult(url=url, domain=domain, scan_status="failed")
|
||||
|
||||
data = resp.json()
|
||||
services = data.get("services", [])
|
||||
findings = data.get("findings", [])
|
||||
|
||||
return SiteResult(
|
||||
url=url,
|
||||
domain=domain,
|
||||
risk_level=data.get("risk_level", ""),
|
||||
risk_score=data.get("risk_score", 0),
|
||||
findings_count=len(findings),
|
||||
services_count=len(services),
|
||||
has_impressum=not any("IMPRESSUM" in f.get("code", "") for f in findings if isinstance(f, dict)),
|
||||
has_datenschutz=not any("DATENSCHUTZ" in f.get("code", "") for f in findings if isinstance(f, dict)),
|
||||
has_cookie_banner=data.get("chatbot_detected", False) or any(
|
||||
s.get("id") == "cmp" for s in services if isinstance(s, dict)
|
||||
),
|
||||
has_google_fonts=any(
|
||||
s.get("id") == "google_fonts" for s in services if isinstance(s, dict)
|
||||
),
|
||||
classification=data.get("classification", ""),
|
||||
scan_status="completed",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Compare scan failed for %s: %s", url, e)
|
||||
return SiteResult(url=url, domain=domain, scan_status="error")
|
||||
|
||||
# Scan all in parallel
|
||||
results = await asyncio.gather(*[scan_one(u) for u in urls])
|
||||
|
||||
return CompareResponse(
|
||||
sites=list(results),
|
||||
compared_at=datetime.now(timezone.utc).isoformat(),
|
||||
)
|
||||
@@ -0,0 +1,220 @@
|
||||
"""
|
||||
Agent History Routes — persist and retrieve scan results.
|
||||
|
||||
GET /api/compliance/agent/scans — list recent scans
|
||||
GET /api/compliance/agent/scans/{id} — get single scan
|
||||
POST /api/compliance/agent/scans — save a scan result
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from fastapi import APIRouter, Query
|
||||
from fastapi.responses import Response
|
||||
from pydantic import BaseModel
|
||||
|
||||
from compliance.services.agent_pdf_export import generate_scan_pdf
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/compliance/agent", tags=["agent"])
|
||||
|
||||
DATABASE_URL = os.environ.get(
|
||||
"COMPLIANCE_DATABASE_URL",
|
||||
os.environ.get("DATABASE_URL", ""),
|
||||
)
|
||||
|
||||
|
||||
class SaveScanRequest(BaseModel):
|
||||
url: str
|
||||
scan_type: str = "scan"
|
||||
analysis_mode: str = "post_launch"
|
||||
result: dict # Full scan result JSON
|
||||
|
||||
|
||||
class ScanHistoryItem(BaseModel):
|
||||
id: str
|
||||
url: str
|
||||
scan_type: str
|
||||
analysis_mode: str
|
||||
risk_level: str | None = None
|
||||
risk_score: float = 0
|
||||
findings_count: int = 0
|
||||
pages_scanned: int = 0
|
||||
email_sent: bool = False
|
||||
created_at: str
|
||||
|
||||
|
||||
class ScanDetail(BaseModel):
|
||||
id: str
|
||||
url: str
|
||||
scan_type: str
|
||||
analysis_mode: str
|
||||
result: dict
|
||||
created_at: str
|
||||
|
||||
|
||||
async def _get_pool():
|
||||
"""Get or create database connection pool."""
|
||||
import asyncpg
|
||||
if not DATABASE_URL:
|
||||
return None
|
||||
try:
|
||||
return await asyncpg.create_pool(DATABASE_URL, min_size=1, max_size=3)
|
||||
except Exception as e:
|
||||
logger.warning("DB connection failed: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
@router.post("/scans")
|
||||
async def save_scan(req: SaveScanRequest):
|
||||
"""Save a scan result to the database."""
|
||||
pool = await _get_pool()
|
||||
if not pool:
|
||||
return {"status": "skipped", "reason": "no database"}
|
||||
|
||||
scan_id = str(uuid.uuid4())
|
||||
result = req.result
|
||||
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
await conn.execute("""
|
||||
INSERT INTO compliance_agent_scans
|
||||
(id, url, scan_type, analysis_mode, classification, risk_level,
|
||||
risk_score, escalation_level, responsible_role, services,
|
||||
findings, summary_html, pages_scanned, pages_list, email_sent,
|
||||
created_at)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16)
|
||||
""",
|
||||
uuid.UUID(scan_id),
|
||||
req.url,
|
||||
req.scan_type,
|
||||
req.analysis_mode,
|
||||
result.get("classification", ""),
|
||||
result.get("risk_level", ""),
|
||||
result.get("risk_score", 0),
|
||||
result.get("escalation_level", ""),
|
||||
result.get("responsible_role", ""),
|
||||
json.dumps(result.get("services", [])),
|
||||
json.dumps(result.get("findings", [])),
|
||||
result.get("summary", result.get("summary_html", "")),
|
||||
result.get("pages_scanned", 0),
|
||||
json.dumps(result.get("pages_list", [])),
|
||||
result.get("email_status") == "sent",
|
||||
datetime.now(timezone.utc),
|
||||
)
|
||||
return {"status": "saved", "id": scan_id}
|
||||
except Exception as e:
|
||||
logger.error("Failed to save scan: %s", e)
|
||||
return {"status": "error", "error": str(e)}
|
||||
finally:
|
||||
await pool.close()
|
||||
|
||||
|
||||
@router.get("/scans", response_model=list[ScanHistoryItem])
|
||||
async def list_scans(
|
||||
limit: int = Query(20, le=100),
|
||||
scan_type: str | None = None,
|
||||
):
|
||||
"""List recent scans."""
|
||||
pool = await _get_pool()
|
||||
if not pool:
|
||||
return []
|
||||
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
query = """
|
||||
SELECT id, url, scan_type, analysis_mode, risk_level, risk_score,
|
||||
findings, pages_scanned, email_sent, created_at
|
||||
FROM compliance_agent_scans
|
||||
"""
|
||||
params = []
|
||||
if scan_type:
|
||||
query += " WHERE scan_type = $1"
|
||||
params.append(scan_type)
|
||||
query += " ORDER BY created_at DESC LIMIT " + str(limit)
|
||||
|
||||
rows = await conn.fetch(query, *params)
|
||||
return [
|
||||
ScanHistoryItem(
|
||||
id=str(r["id"]),
|
||||
url=r["url"],
|
||||
scan_type=r["scan_type"],
|
||||
analysis_mode=r["analysis_mode"],
|
||||
risk_level=r["risk_level"],
|
||||
risk_score=r["risk_score"] or 0,
|
||||
findings_count=len(json.loads(r["findings"] or "[]")),
|
||||
pages_scanned=r["pages_scanned"] or 0,
|
||||
email_sent=r["email_sent"] or False,
|
||||
created_at=r["created_at"].isoformat() if r["created_at"] else "",
|
||||
)
|
||||
for r in rows
|
||||
]
|
||||
except Exception as e:
|
||||
logger.error("Failed to list scans: %s", e)
|
||||
return []
|
||||
finally:
|
||||
await pool.close()
|
||||
|
||||
|
||||
@router.get("/scans/{scan_id}", response_model=ScanDetail)
|
||||
async def get_scan(scan_id: str):
|
||||
"""Get a single scan result."""
|
||||
pool = await _get_pool()
|
||||
if not pool:
|
||||
return ScanDetail(id=scan_id, url="", scan_type="", analysis_mode="", result={}, created_at="")
|
||||
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow("""
|
||||
SELECT * FROM compliance_agent_scans WHERE id = $1
|
||||
""", uuid.UUID(scan_id))
|
||||
|
||||
if not row:
|
||||
return ScanDetail(id=scan_id, url="", scan_type="", analysis_mode="", result={}, created_at="")
|
||||
|
||||
return ScanDetail(
|
||||
id=str(row["id"]),
|
||||
url=row["url"],
|
||||
scan_type=row["scan_type"],
|
||||
analysis_mode=row["analysis_mode"],
|
||||
result={
|
||||
"classification": row["classification"],
|
||||
"risk_level": row["risk_level"],
|
||||
"risk_score": row["risk_score"],
|
||||
"services": json.loads(row["services"] or "[]"),
|
||||
"findings": json.loads(row["findings"] or "[]"),
|
||||
"summary": row["summary_html"],
|
||||
"pages_scanned": row["pages_scanned"],
|
||||
"pages_list": json.loads(row["pages_list"] or "[]"),
|
||||
},
|
||||
created_at=row["created_at"].isoformat() if row["created_at"] else "",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Failed to get scan: %s", e)
|
||||
return ScanDetail(id=scan_id, url="", scan_type="", analysis_mode="", result={}, created_at="")
|
||||
finally:
|
||||
await pool.close()
|
||||
|
||||
|
||||
@router.post("/scans/pdf")
|
||||
async def export_scan_pdf(req: SaveScanRequest):
|
||||
"""Generate a PDF report from scan results (no DB required)."""
|
||||
try:
|
||||
pdf_bytes = generate_scan_pdf({
|
||||
"url": req.url,
|
||||
"scan_type": req.scan_type,
|
||||
"analysis_mode": req.analysis_mode,
|
||||
**req.result,
|
||||
})
|
||||
return Response(
|
||||
content=pdf_bytes,
|
||||
media_type="application/pdf",
|
||||
headers={"Content-Disposition": f'attachment; filename="compliance-report-{req.url.split("/")[2][:30]}.pdf"'},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("PDF generation failed: %s", e)
|
||||
return {"error": str(e)}
|
||||
@@ -0,0 +1,111 @@
|
||||
"""
|
||||
Agent Recurring Scan Routes — schedule and run automated periodic scans.
|
||||
|
||||
POST /api/compliance/agent/monitored-urls — add URL to monitoring
|
||||
GET /api/compliance/agent/monitored-urls — list monitored URLs
|
||||
POST /api/compliance/agent/run-scheduled — trigger all scheduled scans
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from fastapi import APIRouter
|
||||
from pydantic import BaseModel
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/compliance/agent", tags=["agent"])
|
||||
|
||||
DATABASE_URL = os.environ.get(
|
||||
"COMPLIANCE_DATABASE_URL",
|
||||
os.environ.get("DATABASE_URL", ""),
|
||||
)
|
||||
|
||||
# In-memory fallback when no DB available
|
||||
_monitored_urls: list[dict] = []
|
||||
|
||||
|
||||
class MonitoredURL(BaseModel):
|
||||
url: str
|
||||
scan_type: str = "scan" # scan, consent_test
|
||||
frequency: str = "weekly" # daily, weekly, monthly
|
||||
recipient: str = "dsb@breakpilot.local"
|
||||
enabled: bool = True
|
||||
|
||||
|
||||
@router.post("/monitored-urls")
|
||||
async def add_monitored_url(req: MonitoredURL):
|
||||
"""Add a URL to the monitoring list."""
|
||||
entry = {
|
||||
"id": str(uuid.uuid4()),
|
||||
"url": req.url,
|
||||
"scan_type": req.scan_type,
|
||||
"frequency": req.frequency,
|
||||
"recipient": req.recipient,
|
||||
"enabled": req.enabled,
|
||||
"created_at": datetime.now(timezone.utc).isoformat(),
|
||||
"last_scan_at": None,
|
||||
}
|
||||
_monitored_urls.append(entry)
|
||||
logger.info("Added monitored URL: %s (%s)", req.url, req.frequency)
|
||||
return {"status": "added", **entry}
|
||||
|
||||
|
||||
@router.get("/monitored-urls")
|
||||
async def list_monitored_urls():
|
||||
"""List all monitored URLs."""
|
||||
return {"urls": _monitored_urls}
|
||||
|
||||
|
||||
@router.delete("/monitored-urls/{url_id}")
|
||||
async def remove_monitored_url(url_id: str):
|
||||
"""Remove a URL from monitoring."""
|
||||
global _monitored_urls
|
||||
_monitored_urls = [u for u in _monitored_urls if u["id"] != url_id]
|
||||
return {"status": "removed"}
|
||||
|
||||
|
||||
@router.post("/run-scheduled")
|
||||
async def run_scheduled_scans():
|
||||
"""Trigger all enabled scheduled scans. Called by cron/ZeroClaw."""
|
||||
import httpx
|
||||
|
||||
results = []
|
||||
backend_url = "http://localhost:8002"
|
||||
|
||||
for entry in _monitored_urls:
|
||||
if not entry["enabled"]:
|
||||
continue
|
||||
|
||||
url = entry["url"]
|
||||
scan_type = entry["scan_type"]
|
||||
logger.info("Running scheduled %s for %s", scan_type, url)
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=300.0) as client:
|
||||
if scan_type == "consent_test":
|
||||
resp = await client.post(
|
||||
"http://bp-compliance-consent-tester:8094/scan",
|
||||
json={"url": url},
|
||||
)
|
||||
else:
|
||||
resp = await client.post(
|
||||
f"{backend_url}/api/compliance/agent/scan",
|
||||
json={"url": url, "mode": "post_launch", "recipient": entry["recipient"]},
|
||||
)
|
||||
|
||||
entry["last_scan_at"] = datetime.now(timezone.utc).isoformat()
|
||||
results.append({
|
||||
"url": url,
|
||||
"scan_type": scan_type,
|
||||
"status": "completed" if resp.status_code == 200 else "failed",
|
||||
"status_code": resp.status_code,
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error("Scheduled scan failed for %s: %s", url, e)
|
||||
results.append({"url": url, "scan_type": scan_type, "status": "error", "error": str(e)})
|
||||
|
||||
return {"scans_triggered": len(results), "results": results}
|
||||
@@ -73,6 +73,7 @@ def build_scan_summary(
|
||||
f"Findings: {n_findings} ({high} mit hoher Prioritaet)",
|
||||
])
|
||||
|
||||
<<<<<<< HEAD
|
||||
# DSI Documents section — grouped with their findings
|
||||
if discovered_docs:
|
||||
parts.extend(["", f"Rechtliche Dokumente ({len(discovered_docs)})"])
|
||||
@@ -108,6 +109,27 @@ def build_scan_summary(
|
||||
marker = "!!" if sev == "HIGH" else "!" if sev == "MEDIUM" else "i"
|
||||
parts.append(f" [{marker}] {txt}")
|
||||
elif findings:
|
||||
=======
|
||||
# DSI Documents section
|
||||
if discovered_docs:
|
||||
parts.extend([
|
||||
"",
|
||||
f"Rechtliche Dokumente gefunden: {len(discovered_docs)}",
|
||||
])
|
||||
for doc in discovered_docs:
|
||||
pct = doc.completeness_pct if hasattr(doc, 'completeness_pct') else 0
|
||||
fc = doc.findings_count if hasattr(doc, 'findings_count') else 0
|
||||
wc = doc.word_count if hasattr(doc, 'word_count') else 0
|
||||
status = "OK" if pct >= 80 else "LUECKENHAFT" if pct >= 50 else "MANGELHAFT"
|
||||
dt = doc.doc_type if hasattr(doc, 'doc_type') else "unknown"
|
||||
title = doc.title if hasattr(doc, 'title') else "?"
|
||||
parts.append(
|
||||
f" [{status}] {title} ({dt}, {wc} Woerter, "
|
||||
f"{pct}% vollstaendig, {fc} Maengel)"
|
||||
)
|
||||
|
||||
if findings:
|
||||
>>>>>>> feat/zeroclaw-compliance-agent
|
||||
parts.append("")
|
||||
for f in findings[:20]:
|
||||
sev = f.severity if hasattr(f, 'severity') else "?"
|
||||
@@ -123,6 +145,7 @@ def build_scan_summary(
|
||||
])
|
||||
|
||||
return "\n".join(parts)
|
||||
<<<<<<< HEAD
|
||||
|
||||
|
||||
async def fetch_dse_text(url: str, scanned_pages: list[str]) -> str:
|
||||
@@ -161,3 +184,5 @@ async def fetch_dse_html(url: str, scanned_pages: list[str]) -> str:
|
||||
return resp.text
|
||||
except Exception:
|
||||
return ""
|
||||
=======
|
||||
>>>>>>> feat/zeroclaw-compliance-agent
|
||||
|
||||
@@ -23,9 +23,13 @@ from compliance.services.mandatory_content_checker import (
|
||||
check_mandatory_documents, check_dse_mandatory_content, MandatoryFinding,
|
||||
)
|
||||
from compliance.services.legal_basis_validator import validate_legal_bases
|
||||
<<<<<<< HEAD
|
||||
from compliance.api.agent_scan_helpers import (
|
||||
add_corrections, build_scan_summary, fetch_dse_text, fetch_dse_html,
|
||||
)
|
||||
=======
|
||||
from compliance.api.agent_scan_helpers import add_corrections, build_scan_summary
|
||||
>>>>>>> feat/zeroclaw-compliance-agent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -79,7 +83,10 @@ class ScanFinding(BaseModel):
|
||||
severity: str
|
||||
text: str
|
||||
correction: str = ""
|
||||
<<<<<<< HEAD
|
||||
doc_title: str = ""
|
||||
=======
|
||||
>>>>>>> feat/zeroclaw-compliance-agent
|
||||
text_reference: TextReferenceModel | None = None
|
||||
|
||||
|
||||
@@ -219,17 +226,69 @@ async def _execute_scan(req: ScanRequest, scan_id: str = "") -> ScanResponse:
|
||||
else:
|
||||
scan = await scan_website(req.url)
|
||||
|
||||
<<<<<<< HEAD
|
||||
logger.info("Scanned %d pages, found %d services", len(scan.pages_scanned), len(scan.detected_services))
|
||||
|
||||
_progress(f"Schritt 2/7: Rechtliche Dokumente suchen... ({len(scan.pages_scanned)} Seiten gescannt)")
|
||||
=======
|
||||
# Step 1: Scan website — try Playwright first (JS-rendered), fallback to httpx
|
||||
playwright_htmls: dict[str, str] = {}
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=120.0) as pw_client:
|
||||
pw_resp = await pw_client.post(
|
||||
"http://bp-compliance-consent-tester:8094/website-scan",
|
||||
json={"url": req.url, "max_pages": 15, "click_nav": True},
|
||||
)
|
||||
if pw_resp.status_code == 200:
|
||||
pw_data = pw_resp.json()
|
||||
playwright_htmls = pw_data.get("page_htmls", {})
|
||||
logger.info("Playwright scan: %d pages, %d scripts",
|
||||
pw_data.get("pages_count", 0), len(pw_data.get("external_scripts", [])))
|
||||
except Exception as e:
|
||||
logger.warning("Playwright scanner unavailable, falling back to httpx: %s", e)
|
||||
|
||||
# Use Playwright results if available, otherwise fall back to httpx scanner
|
||||
if playwright_htmls:
|
||||
# Build ScanResult from Playwright data
|
||||
from compliance.services.website_scanner import ScanResult, DetectedService, _detect_services, _detect_ai_mentions
|
||||
from compliance.services.service_registry import SERVICE_REGISTRY
|
||||
scan = ScanResult()
|
||||
scan.pages_scanned = list(playwright_htmls.keys())
|
||||
for page_url, html in playwright_htmls.items():
|
||||
_detect_services(html, page_url, scan)
|
||||
_detect_ai_mentions(html, page_url, scan)
|
||||
# Deduplicate
|
||||
seen = set()
|
||||
unique = []
|
||||
for svc in scan.detected_services:
|
||||
if svc.id not in seen:
|
||||
seen.add(svc.id)
|
||||
unique.append(svc)
|
||||
scan.detected_services = unique
|
||||
scan.chatbot_detected = any(s.category == "chatbot" for s in scan.detected_services)
|
||||
if scan.chatbot_detected:
|
||||
scan.chatbot_provider = next(s.name for s in scan.detected_services if s.category == "chatbot")
|
||||
else:
|
||||
scan = await scan_website(req.url)
|
||||
|
||||
logger.info("Scanned %d pages, found %d services", len(scan.pages_scanned), len(scan.detected_services))
|
||||
|
||||
>>>>>>> feat/zeroclaw-compliance-agent
|
||||
# Step 1b: DSI Discovery — find all legal documents on the website
|
||||
discovered_docs: list[DiscoveredDocument] = []
|
||||
dsi_findings: list[ScanFinding] = []
|
||||
try:
|
||||
<<<<<<< HEAD
|
||||
async with httpx.AsyncClient(timeout=300.0) as dsi_client:
|
||||
dsi_resp = await dsi_client.post(
|
||||
"http://bp-compliance-consent-tester:8094/dsi-discovery",
|
||||
json={"url": req.url, "max_documents": 30},
|
||||
=======
|
||||
async with httpx.AsyncClient(timeout=180.0) as dsi_client:
|
||||
dsi_resp = await dsi_client.post(
|
||||
"http://bp-compliance-consent-tester:8094/dsi-discovery",
|
||||
json={"url": req.url, "max_documents": 20},
|
||||
>>>>>>> feat/zeroclaw-compliance-agent
|
||||
)
|
||||
if dsi_resp.status_code == 200:
|
||||
dsi_data = dsi_resp.json()
|
||||
@@ -241,12 +300,17 @@ async def _execute_scan(req: ScanRequest, scan_id: str = "") -> ScanResponse:
|
||||
)
|
||||
for doc in dsi_data.get("documents", []):
|
||||
doc_type = classify_document_type(doc["title"], doc["url"])
|
||||
<<<<<<< HEAD
|
||||
doc_text = doc.get("full_text", "") or doc.get("text_preview", "")
|
||||
logger.info("DSI check: '%s' type=%s text_len=%d full_text_len=%d preview_len=%d",
|
||||
doc["title"][:50], doc_type, len(doc_text),
|
||||
len(doc.get("full_text", "")), len(doc.get("text_preview", "")))
|
||||
doc_findings = check_document_completeness(
|
||||
doc_text, doc_type, doc["title"], doc["url"],
|
||||
=======
|
||||
doc_findings = check_document_completeness(
|
||||
doc.get("text_preview", ""), doc_type, doc["title"], doc["url"],
|
||||
>>>>>>> feat/zeroclaw-compliance-agent
|
||||
)
|
||||
# Count completeness
|
||||
score_finding = next((f for f in doc_findings if "SCORE" in f.get("code", "")), None)
|
||||
@@ -268,6 +332,7 @@ async def _execute_scan(req: ScanRequest, scan_id: str = "") -> ScanResponse:
|
||||
if "SCORE" not in df.get("code", ""):
|
||||
dsi_findings.append(ScanFinding(
|
||||
code=df["code"], severity=df["severity"], text=df["text"],
|
||||
<<<<<<< HEAD
|
||||
doc_title=doc["title"],
|
||||
))
|
||||
except Exception as e:
|
||||
@@ -296,6 +361,24 @@ async def _execute_scan(req: ScanRequest, scan_id: str = "") -> ScanResponse:
|
||||
pass
|
||||
if not dse_text:
|
||||
dse_text = await fetch_dse_text(req.url, scan.pages_scanned)
|
||||
=======
|
||||
))
|
||||
except Exception as e:
|
||||
logger.warning("DSI discovery failed: %s", e)
|
||||
|
||||
# Step 2: Fetch privacy policy text (from Playwright HTMLs or httpx)
|
||||
dse_text = ""
|
||||
for page_url, html in playwright_htmls.items():
|
||||
if re.search(r"datenschutz|privacy|dsgvo", page_url, re.IGNORECASE):
|
||||
import re as _re
|
||||
clean = _re.sub(r"<(script|style)[^>]*>.*?</\1>", "", html, flags=_re.DOTALL | _re.IGNORECASE)
|
||||
clean = _re.sub(r"<[^>]+>", " ", clean)
|
||||
clean = _re.sub(r"\s+", " ", clean).strip()
|
||||
dse_text = clean[:4000]
|
||||
break
|
||||
if not dse_text:
|
||||
dse_text = await _fetch_dse_text(req.url, scan.pages_scanned)
|
||||
>>>>>>> feat/zeroclaw-compliance-agent
|
||||
|
||||
# Step 3: Extract services mentioned in DSE via LLM + text fallback
|
||||
dse_services = await extract_dse_services(dse_text) if dse_text else []
|
||||
@@ -320,11 +403,18 @@ async def _execute_scan(req: ScanRequest, scan_id: str = "") -> ScanResponse:
|
||||
dse_html = html
|
||||
break
|
||||
if not dse_html:
|
||||
<<<<<<< HEAD
|
||||
dse_html = await fetch_dse_html(req.url, scan.pages_scanned)
|
||||
dse_sections = parse_dse(dse_html, req.url) if dse_html else []
|
||||
logger.info("Parsed %d DSE sections", len(dse_sections))
|
||||
|
||||
_progress("Schritt 4/7: SOLL/IST Vergleich...")
|
||||
=======
|
||||
dse_html = await _fetch_dse_html(req.url, scan.pages_scanned)
|
||||
dse_sections = parse_dse(dse_html, req.url) if dse_html else []
|
||||
logger.info("Parsed %d DSE sections", len(dse_sections))
|
||||
|
||||
>>>>>>> feat/zeroclaw-compliance-agent
|
||||
# Step 5: SOLL/IST comparison
|
||||
detected_dicts = [_service_to_dict(s) for s in scan.detected_services]
|
||||
comparison = compare_services(detected_dicts, dse_services)
|
||||
@@ -363,7 +453,10 @@ async def _execute_scan(req: ScanRequest, scan_id: str = "") -> ScanResponse:
|
||||
# Step 8c: Add DSI document findings
|
||||
findings.extend(dsi_findings)
|
||||
|
||||
<<<<<<< HEAD
|
||||
_progress(f"Schritt 5/7: Korrekturen generieren... ({len(findings)} Findings)")
|
||||
=======
|
||||
>>>>>>> feat/zeroclaw-compliance-agent
|
||||
# Step 9: Generate corrections for pre-launch mode
|
||||
if not is_live and findings:
|
||||
await add_corrections(findings, dse_text)
|
||||
@@ -400,6 +493,24 @@ async def _execute_scan(req: ScanRequest, scan_id: str = "") -> ScanResponse:
|
||||
|
||||
|
||||
|
||||
async def _fetch_dse_html(url: str, scanned_pages: list[str]) -> str:
|
||||
"""Fetch the raw HTML of the privacy policy page (for structured parsing)."""
|
||||
import re
|
||||
dse_url = None
|
||||
for page in scanned_pages:
|
||||
if re.search(r"datenschutz|privacy|dsgvo", page, re.IGNORECASE):
|
||||
dse_url = page
|
||||
break
|
||||
if not dse_url:
|
||||
dse_url = url
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client:
|
||||
resp = await client.get(dse_url, headers={"User-Agent": "BreakPilot-Compliance-Agent/1.0"})
|
||||
return resp.text
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def _service_to_dict(svc: DetectedService) -> dict:
|
||||
return {
|
||||
"id": svc.id, "name": svc.name, "category": svc.category,
|
||||
|
||||
@@ -0,0 +1,120 @@
|
||||
"""
|
||||
FastAPI routes for Banner A/B Testing.
|
||||
|
||||
Endpoints:
|
||||
GET /banner/ab/{site_config_id}/variants — list variants
|
||||
POST /banner/ab/{site_config_id}/variants — create variant
|
||||
PUT /banner/ab/variants/{variant_id} — update variant
|
||||
DELETE /banner/ab/variants/{variant_id} — delete variant
|
||||
GET /banner/ab/{site_config_id}/stats — per-variant stats
|
||||
GET /banner/ab/assign — assign variant for device
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from classroom_engine.database import get_db
|
||||
from .tenant_utils import get_tenant_id as _get_tenant_id
|
||||
from compliance.services.banner_ab_service import BannerABService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/banner/ab", tags=["banner-ab-testing"])
|
||||
|
||||
|
||||
class VariantCreate(BaseModel):
|
||||
variant_name: str
|
||||
variant_key: str = "A"
|
||||
traffic_percent: int = 50
|
||||
is_control: bool = False
|
||||
banner_title: Optional[str] = None
|
||||
banner_description: Optional[str] = None
|
||||
position: Optional[str] = None
|
||||
style: Optional[str] = None
|
||||
primary_color: Optional[str] = None
|
||||
show_decline_all: Optional[bool] = None
|
||||
theme_overrides: Optional[dict] = None
|
||||
|
||||
|
||||
class VariantUpdate(BaseModel):
|
||||
variant_name: Optional[str] = None
|
||||
traffic_percent: Optional[int] = None
|
||||
is_control: Optional[bool] = None
|
||||
banner_title: Optional[str] = None
|
||||
banner_description: Optional[str] = None
|
||||
position: Optional[str] = None
|
||||
style: Optional[str] = None
|
||||
primary_color: Optional[str] = None
|
||||
show_decline_all: Optional[bool] = None
|
||||
is_active: Optional[bool] = None
|
||||
|
||||
|
||||
@router.get("/{site_config_id}/variants")
|
||||
def list_variants(
|
||||
site_config_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
service = BannerABService(db)
|
||||
return service.list_variants(tenant_id, site_config_id)
|
||||
|
||||
|
||||
@router.post("/{site_config_id}/variants")
|
||||
def create_variant(
|
||||
site_config_id: str,
|
||||
body: VariantCreate,
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
service = BannerABService(db)
|
||||
return service.create_variant(tenant_id, site_config_id, body.model_dump())
|
||||
|
||||
|
||||
@router.put("/variants/{variant_id}")
|
||||
def update_variant(
|
||||
variant_id: str,
|
||||
body: VariantUpdate,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
service = BannerABService(db)
|
||||
result = service.update_variant(variant_id, body.model_dump(exclude_none=True))
|
||||
if not result:
|
||||
raise HTTPException(404, "Variant not found")
|
||||
return result
|
||||
|
||||
|
||||
@router.delete("/variants/{variant_id}")
|
||||
def delete_variant(
|
||||
variant_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
service = BannerABService(db)
|
||||
if not service.delete_variant(variant_id):
|
||||
raise HTTPException(404, "Variant not found")
|
||||
return {"deleted": True}
|
||||
|
||||
|
||||
@router.get("/{site_config_id}/stats")
|
||||
def variant_stats(
|
||||
site_config_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
service = BannerABService(db)
|
||||
return service.get_variant_stats(tenant_id, site_config_id)
|
||||
|
||||
|
||||
@router.get("/assign")
|
||||
def assign_variant(
|
||||
site_config_id: str = Query(...),
|
||||
device_fingerprint: str = Query(...),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
service = BannerABService(db)
|
||||
variant = service.assign_variant(site_config_id, device_fingerprint)
|
||||
if not variant:
|
||||
return {"variant": None, "message": "No active A/B test"}
|
||||
return {"variant": variant}
|
||||
@@ -0,0 +1,67 @@
|
||||
"""
|
||||
FastAPI routes for Banner Consent Analytics.
|
||||
|
||||
Endpoints:
|
||||
GET /banner/analytics/{site_id}/overview — high-level stats
|
||||
GET /banner/analytics/{site_id}/time-series — opt-in rate over time
|
||||
GET /banner/analytics/{site_id}/categories — acceptance per category
|
||||
GET /banner/analytics/{site_id}/devices — mobile/desktop/tablet breakdown
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from classroom_engine.database import get_db
|
||||
from .tenant_utils import get_tenant_id as _get_tenant_id
|
||||
from compliance.services.banner_analytics_service import BannerAnalyticsService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/banner/analytics", tags=["banner-analytics"])
|
||||
|
||||
|
||||
@router.get("/{site_id}/overview")
|
||||
def analytics_overview(
|
||||
site_id: str,
|
||||
days: int = Query(30, le=365),
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
service = BannerAnalyticsService(db)
|
||||
return service.get_overview_stats(tenant_id, site_id, days)
|
||||
|
||||
|
||||
@router.get("/{site_id}/time-series")
|
||||
def analytics_time_series(
|
||||
site_id: str,
|
||||
period: str = Query("daily"),
|
||||
days: int = Query(30, le=365),
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
service = BannerAnalyticsService(db)
|
||||
return service.get_time_series(tenant_id, site_id, period, days)
|
||||
|
||||
|
||||
@router.get("/{site_id}/categories")
|
||||
def analytics_categories(
|
||||
site_id: str,
|
||||
days: int = Query(30, le=365),
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
service = BannerAnalyticsService(db)
|
||||
return service.get_category_breakdown(tenant_id, site_id, days)
|
||||
|
||||
|
||||
@router.get("/{site_id}/devices")
|
||||
def analytics_devices(
|
||||
site_id: str,
|
||||
days: int = Query(30, le=365),
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
service = BannerAnalyticsService(db)
|
||||
return service.get_device_breakdown(tenant_id, site_id, days)
|
||||
@@ -74,6 +74,7 @@ async def record_consent(
|
||||
device_fingerprint=body.device_fingerprint,
|
||||
categories=body.categories,
|
||||
vendors=body.vendors,
|
||||
vendor_consents=body.vendor_consents,
|
||||
ip_address=body.ip_address,
|
||||
user_agent=body.user_agent,
|
||||
consent_string=body.consent_string,
|
||||
|
||||
@@ -0,0 +1,38 @@
|
||||
"""
|
||||
FastAPI route for Compliance Report PDF generation.
|
||||
|
||||
Endpoint:
|
||||
GET /compliance/report/pdf — generate comprehensive compliance report as PDF
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
from fastapi.responses import StreamingResponse
|
||||
from sqlalchemy.orm import Session
|
||||
import io
|
||||
|
||||
from classroom_engine.database import get_db
|
||||
from .tenant_utils import get_tenant_id as _get_tenant_id
|
||||
from compliance.services.compliance_pdf_generator import CompliancePDFGenerator
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/compliance/report", tags=["compliance-report"])
|
||||
|
||||
|
||||
@router.get("/pdf")
|
||||
def generate_compliance_report_pdf(
|
||||
project_id: Optional[str] = Query(None),
|
||||
language: str = Query("de"),
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
"""Generate a comprehensive compliance PDF report for a project."""
|
||||
generator = CompliancePDFGenerator(db)
|
||||
pdf_bytes, filename = generator.generate(tenant_id, project_id, language)
|
||||
return StreamingResponse(
|
||||
io.BytesIO(pdf_bytes),
|
||||
media_type="application/pdf",
|
||||
headers={"Content-Disposition": f'attachment; filename="{filename}"'},
|
||||
)
|
||||
@@ -0,0 +1,380 @@
|
||||
"""
|
||||
FastAPI routes for Document Review Workflow.
|
||||
|
||||
Tracks which compliance documents have been sent for review, their status,
|
||||
and handles email notifications to reviewers.
|
||||
|
||||
Endpoints:
|
||||
GET /document-reviews — list reviews with filters
|
||||
GET /document-reviews/stats — counts by status
|
||||
POST /document-reviews — create review (auto-assign from mapping)
|
||||
GET /document-reviews/{id} — single review
|
||||
POST /document-reviews/{id}/send — send notification email
|
||||
POST /document-reviews/{id}/approve — mark as approved
|
||||
POST /document-reviews/{id}/reject — mark as rejected
|
||||
GET /document-reviews/for-document — reviews for a specific doc type
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from classroom_engine.database import get_db
|
||||
from .tenant_utils import get_tenant_id as _get_tenant_id
|
||||
from .db_utils import row_to_dict as _row_to_dict
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/document-reviews", tags=["document-reviews"])
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Schemas
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class ReviewCreate(BaseModel):
|
||||
document_type: str
|
||||
document_title: str
|
||||
document_content: Optional[str] = None
|
||||
project_id: Optional[str] = None
|
||||
submitted_by: Optional[str] = None
|
||||
review_link: Optional[str] = None
|
||||
|
||||
|
||||
class ReviewReject(BaseModel):
|
||||
comment: str
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Routes
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@router.get("")
|
||||
def list_reviews(
|
||||
project_id: Optional[str] = Query(None),
|
||||
status: Optional[str] = Query(None),
|
||||
document_type: Optional[str] = Query(None),
|
||||
reviewer_role_key: Optional[str] = Query(None),
|
||||
limit: int = Query(50, le=200),
|
||||
offset: int = Query(0),
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
where = ["tenant_id = :tid"]
|
||||
params = {"tid": tenant_id, "lim": limit, "off": offset}
|
||||
if project_id:
|
||||
where.append("project_id = :pid")
|
||||
params["pid"] = project_id
|
||||
if status:
|
||||
where.append("status = :status")
|
||||
params["status"] = status
|
||||
if document_type:
|
||||
where.append("document_type = :dt")
|
||||
params["dt"] = document_type
|
||||
if reviewer_role_key:
|
||||
where.append("reviewer_role_key = :rrk")
|
||||
params["rrk"] = reviewer_role_key
|
||||
|
||||
q = text(f"""
|
||||
SELECT * FROM compliance_document_reviews
|
||||
WHERE {' AND '.join(where)}
|
||||
ORDER BY created_at DESC LIMIT :lim OFFSET :off
|
||||
""")
|
||||
rows = db.execute(q, params).fetchall()
|
||||
return [_row_to_dict(r) for r in rows]
|
||||
|
||||
|
||||
@router.get("/stats")
|
||||
def review_stats(
|
||||
project_id: Optional[str] = Query(None),
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
where = "tenant_id = :tid"
|
||||
params = {"tid": tenant_id}
|
||||
if project_id:
|
||||
where += " AND project_id = :pid"
|
||||
params["pid"] = project_id
|
||||
q = text(f"SELECT status, COUNT(*) as count FROM compliance_document_reviews WHERE {where} GROUP BY status")
|
||||
rows = db.execute(q, params).fetchall()
|
||||
return {r.status: r.count for r in rows}
|
||||
|
||||
|
||||
@router.get("/for-document")
|
||||
def reviews_for_document(
|
||||
document_type: str = Query(...),
|
||||
project_id: Optional[str] = Query(None),
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
where = "tenant_id = :tid AND document_type = :dt"
|
||||
params = {"tid": tenant_id, "dt": document_type}
|
||||
if project_id:
|
||||
where += " AND project_id = :pid"
|
||||
params["pid"] = project_id
|
||||
q = text(f"SELECT * FROM compliance_document_reviews WHERE {where} ORDER BY created_at DESC LIMIT 10")
|
||||
rows = db.execute(q, params).fetchall()
|
||||
return [_row_to_dict(r) for r in rows]
|
||||
|
||||
|
||||
@router.post("")
|
||||
def create_review(
|
||||
body: ReviewCreate,
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
# Find reviewer(s) from mapping + org_roles
|
||||
q = text("""
|
||||
SELECT m.role_key, m.is_primary, r.person_name, r.person_email, r.role_label
|
||||
FROM compliance_document_role_mapping m
|
||||
LEFT JOIN compliance_org_roles r
|
||||
ON r.tenant_id = m.tenant_id AND r.role_key = m.role_key
|
||||
AND (r.project_id = :pid OR r.project_id IS NULL)
|
||||
WHERE m.tenant_id = :tid AND m.document_type = :dt
|
||||
ORDER BY m.is_primary DESC
|
||||
""")
|
||||
mappings = db.execute(q, {"tid": tenant_id, "dt": body.document_type, "pid": body.project_id}).fetchall()
|
||||
|
||||
if not mappings:
|
||||
raise HTTPException(404, f"No reviewer mapping found for document type '{body.document_type}'")
|
||||
|
||||
content_hash = hashlib.sha256(body.document_content.encode()).hexdigest() if body.document_content else None
|
||||
created = []
|
||||
for m in mappings:
|
||||
m_dict = _row_to_dict(m)
|
||||
ins = text("""
|
||||
INSERT INTO compliance_document_reviews
|
||||
(tenant_id, project_id, document_type, document_title, document_content_hash,
|
||||
reviewer_role_key, reviewer_name, reviewer_email, submitted_by, review_link, submitted_at)
|
||||
VALUES (:tid, :pid, :dt, :title, :hash, :rrk, :rn, :re, :sb, :rl, NOW())
|
||||
RETURNING *
|
||||
""")
|
||||
row = db.execute(ins, {
|
||||
"tid": tenant_id, "pid": body.project_id, "dt": body.document_type,
|
||||
"title": body.document_title, "hash": content_hash,
|
||||
"rrk": m_dict["role_key"], "rn": m_dict.get("person_name"),
|
||||
"re": m_dict.get("person_email"), "sb": body.submitted_by,
|
||||
"rl": body.review_link,
|
||||
}).fetchone()
|
||||
created.append(_row_to_dict(row))
|
||||
db.commit()
|
||||
return created
|
||||
|
||||
|
||||
@router.get("/{review_id}")
|
||||
def get_review(
|
||||
review_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
q = text("SELECT * FROM compliance_document_reviews WHERE id = :rid AND tenant_id = :tid")
|
||||
row = db.execute(q, {"rid": review_id, "tid": tenant_id}).fetchone()
|
||||
if not row:
|
||||
raise HTTPException(404, "Review not found")
|
||||
return _row_to_dict(row)
|
||||
|
||||
|
||||
@router.post("/{review_id}/send")
|
||||
def send_notification(
|
||||
review_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
q = text("SELECT * FROM compliance_document_reviews WHERE id = :rid AND tenant_id = :tid")
|
||||
row = db.execute(q, {"rid": review_id, "tid": tenant_id}).fetchone()
|
||||
if not row:
|
||||
raise HTTPException(404, "Review not found")
|
||||
review = _row_to_dict(row)
|
||||
if not review.get("reviewer_email"):
|
||||
raise HTTPException(400, "No email for reviewer — assign a person to this role first")
|
||||
|
||||
try:
|
||||
from compliance.services.smtp_sender import send_email
|
||||
result = send_email(
|
||||
recipient=review["reviewer_email"],
|
||||
subject=f"[BreakPilot] Dokument zur Pruefung: {review['document_title']}",
|
||||
body_html=f"""
|
||||
<h2>Dokument zur Pruefung</h2>
|
||||
<p>Sehr geehrte/r <strong>{review.get('reviewer_name') or 'Pruefer/in'}</strong>,</p>
|
||||
<p>das folgende Dokument wurde Ihnen zur inhaltlichen Pruefung zugewiesen:</p>
|
||||
<table style="border-collapse:collapse;margin:16px 0;">
|
||||
<tr><td style="padding:4px 12px 4px 0;font-weight:bold;">Dokument:</td>
|
||||
<td>{review['document_title']}</td></tr>
|
||||
<tr><td style="padding:4px 12px 4px 0;font-weight:bold;">Typ:</td>
|
||||
<td>{review['document_type']}</td></tr>
|
||||
<tr><td style="padding:4px 12px 4px 0;font-weight:bold;">Eingereicht von:</td>
|
||||
<td>{review.get('submitted_by') or 'System'}</td></tr>
|
||||
</table>
|
||||
<p>Bitte pruefen Sie das Dokument auf <strong>inhaltliche Richtigkeit</strong>,
|
||||
<strong>Vollstaendigkeit</strong> und <strong>Umsetzbarkeit</strong>.</p>
|
||||
{f'<p><a href="{review["review_link"]}" style="background:#7c3aed;color:white;padding:10px 20px;border-radius:6px;text-decoration:none;">Dokument oeffnen</a></p>' if review.get("review_link") else ''}
|
||||
<p style="color:#888;font-size:12px;">BreakPilot Compliance SDK</p>
|
||||
""",
|
||||
)
|
||||
# Update review status
|
||||
db.execute(text("""
|
||||
UPDATE compliance_document_reviews
|
||||
SET status = 'in_review', email_sent = TRUE, email_sent_at = NOW(), updated_at = NOW()
|
||||
WHERE id = :rid
|
||||
"""), {"rid": review_id})
|
||||
db.commit()
|
||||
return {"sent": True, "email": review["reviewer_email"], "result": result}
|
||||
except Exception as e:
|
||||
logger.error("Failed to send review email: %s", e)
|
||||
raise HTTPException(500, f"Email sending failed: {e}")
|
||||
|
||||
|
||||
@router.post("/{review_id}/approve")
|
||||
def approve_review(
|
||||
review_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
q = text("""
|
||||
UPDATE compliance_document_reviews
|
||||
SET status = 'approved', reviewed_at = NOW(), updated_at = NOW()
|
||||
WHERE id = :rid AND tenant_id = :tid
|
||||
RETURNING *
|
||||
""")
|
||||
row = db.execute(q, {"rid": review_id, "tid": tenant_id}).fetchone()
|
||||
if not row:
|
||||
raise HTTPException(404, "Review not found")
|
||||
db.commit()
|
||||
review = _row_to_dict(row)
|
||||
|
||||
# Notify all OTHER roles mapped to this document type about the approval
|
||||
_notify_approval(db, tenant_id, review)
|
||||
|
||||
# Check training gaps
|
||||
training_info = {"training_gaps": 0, "academy_available": False}
|
||||
try:
|
||||
from compliance.services.training_link_service import TrainingLinkService
|
||||
tls = TrainingLinkService(db)
|
||||
gaps = tls.check_training_gaps(tenant_id, review["document_type"], review.get("project_id"))
|
||||
training_info = {"training_gaps": gaps.get("total_gaps", 0), "academy_available": gaps.get("academy_available", False)}
|
||||
# Send training notification emails for each gap
|
||||
if gaps.get("gaps"):
|
||||
_notify_training_gaps(gaps["gaps"], review)
|
||||
except Exception as e:
|
||||
logger.warning("Training gap check failed (non-blocking): %s", e)
|
||||
|
||||
review["training"] = training_info
|
||||
return review
|
||||
|
||||
|
||||
def _notify_approval(db: Session, tenant_id: str, review: dict):
|
||||
"""Send approval notification to all other roles mapped to this document type."""
|
||||
try:
|
||||
from compliance.services.smtp_sender import send_email
|
||||
q = text("""
|
||||
SELECT DISTINCT r.person_name, r.person_email, r.role_label
|
||||
FROM compliance_document_role_mapping m
|
||||
JOIN compliance_org_roles r
|
||||
ON r.tenant_id = m.tenant_id AND r.role_key = m.role_key
|
||||
AND (r.project_id = :pid OR r.project_id IS NULL)
|
||||
WHERE m.tenant_id = :tid AND m.document_type = :dt
|
||||
AND m.role_key != :reviewer_key AND r.person_email IS NOT NULL
|
||||
""")
|
||||
others = db.execute(q, {
|
||||
"tid": tenant_id, "dt": review["document_type"],
|
||||
"pid": review.get("project_id"), "reviewer_key": review["reviewer_role_key"],
|
||||
}).fetchall()
|
||||
for other in others:
|
||||
o = _row_to_dict(other)
|
||||
send_email(
|
||||
recipient=o["person_email"],
|
||||
subject=f"[BreakPilot] Freigabe: {review['document_title']}",
|
||||
body_html=f"""
|
||||
<h2>Dokument freigegeben</h2>
|
||||
<p>Sehr geehrte/r <strong>{o.get('person_name') or o['role_label']}</strong>,</p>
|
||||
<p>das Dokument <strong>{review['document_title']}</strong> wurde von
|
||||
{review.get('reviewer_name') or review['reviewer_role_key']} freigegeben.</p>
|
||||
<p>Bitte pruefen Sie, ob fuer Ihren Verantwortungsbereich Handlungsbedarf besteht
|
||||
(z.B. Schulungsbedarf, Prozessanpassungen).</p>
|
||||
<p style="color:#888;font-size:12px;">BreakPilot Compliance SDK</p>
|
||||
""",
|
||||
)
|
||||
logger.info("Notified %d other roles about approval of %s", len(others), review["document_title"])
|
||||
except Exception as e:
|
||||
logger.warning("Approval notification failed (non-blocking): %s", e)
|
||||
|
||||
|
||||
def _notify_training_gaps(gaps: list[dict], review: dict):
|
||||
"""Send training requirement emails to persons with outstanding modules."""
|
||||
try:
|
||||
from compliance.services.smtp_sender import send_email
|
||||
for gap in gaps:
|
||||
if not gap.get("person_email"):
|
||||
continue
|
||||
send_email(
|
||||
recipient=gap["person_email"],
|
||||
subject=f"[BreakPilot] Schulungsbedarf: {gap['module_title']}",
|
||||
body_html=f"""
|
||||
<h2>Schulungsbedarf nach Dokument-Freigabe</h2>
|
||||
<p>Sehr geehrte/r <strong>{gap['person_name']}</strong>,</p>
|
||||
<p>nach Freigabe des Dokuments <strong>{review['document_title']}</strong>
|
||||
ist fuer Ihre Rolle (<strong>{gap['role']}</strong>) eine Schulung erforderlich:</p>
|
||||
<p><strong>{gap['module_title']}</strong> ({gap['module_code']})</p>
|
||||
<p>Status: {gap['status']}</p>
|
||||
<p><a href="/sdk/training/learner" style="background:#7c3aed;color:white;padding:10px 20px;border-radius:6px;text-decoration:none;">Zur Academy</a></p>
|
||||
<p style="color:#888;font-size:12px;">BreakPilot Compliance SDK</p>
|
||||
""",
|
||||
)
|
||||
logger.info("Sent %d training gap notifications for %s", len(gaps), review["document_title"])
|
||||
except Exception as e:
|
||||
logger.warning("Training notification failed (non-blocking): %s", e)
|
||||
|
||||
|
||||
@router.post("/{review_id}/reject")
|
||||
def reject_review(
|
||||
review_id: str,
|
||||
body: ReviewReject,
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
q = text("""
|
||||
UPDATE compliance_document_reviews
|
||||
SET status = 'rejected', reviewed_at = NOW(), review_comment = :comment, updated_at = NOW()
|
||||
WHERE id = :rid AND tenant_id = :tid
|
||||
RETURNING *
|
||||
""")
|
||||
row = db.execute(q, {"rid": review_id, "tid": tenant_id, "comment": body.comment}).fetchone()
|
||||
if not row:
|
||||
raise HTTPException(404, "Review not found")
|
||||
db.commit()
|
||||
return _row_to_dict(row)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Training Integration
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@router.get("/training-requirements")
|
||||
def get_training_requirements(
|
||||
document_type: str = Query(...),
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
from compliance.services.training_link_service import TrainingLinkService
|
||||
service = TrainingLinkService(db)
|
||||
return service.get_training_requirements(tenant_id, document_type)
|
||||
|
||||
|
||||
@router.get("/training-gaps")
|
||||
def get_training_gaps(
|
||||
document_type: str = Query(...),
|
||||
project_id: Optional[str] = Query(None),
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
from compliance.services.training_link_service import TrainingLinkService
|
||||
service = TrainingLinkService(db)
|
||||
return service.check_training_gaps(tenant_id, document_type, project_id)
|
||||
@@ -243,6 +243,19 @@ async def change_status(
|
||||
return svc.change_status(dsr_id, body, tenant_id)
|
||||
|
||||
|
||||
@router.post("/{dsr_id}/reject-art11")
|
||||
async def reject_art11(
|
||||
dsr_id: str,
|
||||
notes: str = Query(""),
|
||||
tenant_id: str = Depends(_get_tenant),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Reject DSR under Art. 11 DSGVO — data subject not identifiable."""
|
||||
from compliance.services.dsr_art11_service import DSRArt11Service
|
||||
with translate_domain_errors():
|
||||
return DSRArt11Service(db).reject_not_identifiable(dsr_id, tenant_id, notes)
|
||||
|
||||
|
||||
@router.post("/{dsr_id}/verify-identity")
|
||||
async def verify_identity(
|
||||
dsr_id: str,
|
||||
@@ -367,3 +380,42 @@ async def update_exception_check(
|
||||
):
|
||||
with translate_domain_errors():
|
||||
return svc.update_exception_check(dsr_id, check_id, body, tenant_id)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# User Data Export (Art. 15 / Art. 20)
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@router.get("/{dsr_id}/export-user-data")
|
||||
async def export_user_data(
|
||||
dsr_id: str,
|
||||
format: str = Query("json"),
|
||||
tenant_id: str = Depends(_get_tenant),
|
||||
svc: DSRService = Depends(_dsr_svc),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Export all CMP data about the data subject as JSON, CSV, or PDF."""
|
||||
import io
|
||||
from compliance.services.dsr_export_service import DSRExportService
|
||||
|
||||
with translate_domain_errors():
|
||||
dsr = svc.get(dsr_id, tenant_id)
|
||||
email = dsr.get("requester_email")
|
||||
if not email:
|
||||
from fastapi import HTTPException
|
||||
raise HTTPException(400, "DSR has no requester email")
|
||||
|
||||
export_svc = DSRExportService(db)
|
||||
if format == "pdf":
|
||||
content, filename = export_svc.export_pdf(tenant_id, email)
|
||||
return StreamingResponse(io.BytesIO(content), media_type="application/pdf",
|
||||
headers={"Content-Disposition": f'attachment; filename="{filename}"'})
|
||||
elif format == "csv":
|
||||
content, filename = export_svc.export_csv(tenant_id, email)
|
||||
return StreamingResponse(io.BytesIO(content), media_type="text/csv",
|
||||
headers={"Content-Disposition": f'attachment; filename="{filename}"'})
|
||||
else:
|
||||
content, filename = export_svc.export_json(tenant_id, email)
|
||||
return StreamingResponse(io.BytesIO(content), media_type="application/json",
|
||||
headers={"Content-Disposition": f'attachment; filename="{filename}"'})
|
||||
|
||||
@@ -0,0 +1,255 @@
|
||||
"""
|
||||
FastAPI routes for Organizational Compliance Roles.
|
||||
|
||||
Manages the 7 standard compliance roles (DSB, GF, IT-Leiter, etc.)
|
||||
and the document-to-role mapping that determines who reviews which documents.
|
||||
|
||||
Endpoints:
|
||||
GET /org-roles — list roles for tenant/project
|
||||
POST /org-roles — create/upsert a role
|
||||
PUT /org-roles/{id} — update role details
|
||||
DELETE /org-roles/{id} — remove a role
|
||||
GET /org-roles/defaults — 7 standard role definitions
|
||||
POST /org-roles/seed — seed default roles for a project
|
||||
POST /org-roles/{id}/send-test — send test email to role
|
||||
GET /org-roles/mapping — document-to-role mapping
|
||||
PUT /org-roles/mapping — update mapping
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Optional, List
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from classroom_engine.database import get_db
|
||||
from .tenant_utils import get_tenant_id as _get_tenant_id
|
||||
from .db_utils import row_to_dict as _row_to_dict
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/org-roles", tags=["org-roles"])
|
||||
|
||||
# =============================================================================
|
||||
# Standard role definitions
|
||||
# =============================================================================
|
||||
|
||||
DEFAULT_ROLES = [
|
||||
{"role_key": "dsb", "role_label": "Datenschutzbeauftragter (DSB)"},
|
||||
{"role_key": "gf", "role_label": "Geschaeftsfuehrung"},
|
||||
{"role_key": "it_leiter", "role_label": "IT-Leiter / CISO"},
|
||||
{"role_key": "hr_leitung", "role_label": "HR-Leitung"},
|
||||
{"role_key": "marketing_leitung", "role_label": "Marketing-Leitung"},
|
||||
{"role_key": "compliance_beauftragter", "role_label": "Compliance-Beauftragter"},
|
||||
{"role_key": "einkauf", "role_label": "Einkauf / Vendor Management"},
|
||||
]
|
||||
|
||||
# =============================================================================
|
||||
# Schemas
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class OrgRoleCreate(BaseModel):
|
||||
role_key: str
|
||||
role_label: str
|
||||
person_name: Optional[str] = None
|
||||
person_email: Optional[str] = None
|
||||
department: Optional[str] = None
|
||||
project_id: Optional[str] = None
|
||||
|
||||
|
||||
class OrgRoleUpdate(BaseModel):
|
||||
role_label: Optional[str] = None
|
||||
person_name: Optional[str] = None
|
||||
person_email: Optional[str] = None
|
||||
department: Optional[str] = None
|
||||
is_active: Optional[bool] = None
|
||||
|
||||
|
||||
class MappingEntry(BaseModel):
|
||||
document_type: str
|
||||
role_key: str
|
||||
is_primary: bool = True
|
||||
|
||||
|
||||
class MappingUpdate(BaseModel):
|
||||
entries: List[MappingEntry]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Routes
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@router.get("")
|
||||
def list_roles(
|
||||
project_id: Optional[str] = Query(None),
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
q = text("""
|
||||
SELECT * FROM compliance_org_roles
|
||||
WHERE tenant_id = :tid AND (project_id = :pid OR (:pid IS NULL AND project_id IS NULL))
|
||||
ORDER BY role_key
|
||||
""")
|
||||
rows = db.execute(q, {"tid": tenant_id, "pid": project_id}).fetchall()
|
||||
return [_row_to_dict(r) for r in rows]
|
||||
|
||||
|
||||
@router.get("/defaults")
|
||||
def get_defaults():
|
||||
return DEFAULT_ROLES
|
||||
|
||||
|
||||
@router.post("")
|
||||
def create_role(
|
||||
body: OrgRoleCreate,
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
q = text("""
|
||||
INSERT INTO compliance_org_roles (tenant_id, project_id, role_key, role_label, person_name, person_email, department)
|
||||
VALUES (:tid, :pid, :rk, :rl, :pn, :pe, :dept)
|
||||
ON CONFLICT (tenant_id, project_id, role_key) DO UPDATE
|
||||
SET role_label = EXCLUDED.role_label,
|
||||
person_name = COALESCE(EXCLUDED.person_name, compliance_org_roles.person_name),
|
||||
person_email = COALESCE(EXCLUDED.person_email, compliance_org_roles.person_email),
|
||||
department = COALESCE(EXCLUDED.department, compliance_org_roles.department),
|
||||
updated_at = NOW()
|
||||
RETURNING *
|
||||
""")
|
||||
row = db.execute(q, {
|
||||
"tid": tenant_id, "pid": body.project_id, "rk": body.role_key,
|
||||
"rl": body.role_label, "pn": body.person_name, "pe": body.person_email,
|
||||
"dept": body.department,
|
||||
}).fetchone()
|
||||
db.commit()
|
||||
return _row_to_dict(row)
|
||||
|
||||
|
||||
@router.put("/{role_id}")
|
||||
def update_role(
|
||||
role_id: str,
|
||||
body: OrgRoleUpdate,
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
sets, params = [], {"rid": role_id, "tid": tenant_id}
|
||||
for field in ["role_label", "person_name", "person_email", "department", "is_active"]:
|
||||
val = getattr(body, field, None)
|
||||
if val is not None:
|
||||
sets.append(f"{field} = :{field}")
|
||||
params[field] = val
|
||||
if not sets:
|
||||
raise HTTPException(400, "No fields to update")
|
||||
sets.append("updated_at = NOW()")
|
||||
q = text(f"UPDATE compliance_org_roles SET {', '.join(sets)} WHERE id = :rid AND tenant_id = :tid RETURNING *")
|
||||
row = db.execute(q, params).fetchone()
|
||||
if not row:
|
||||
raise HTTPException(404, "Role not found")
|
||||
db.commit()
|
||||
return _row_to_dict(row)
|
||||
|
||||
|
||||
@router.delete("/{role_id}")
|
||||
def delete_role(
|
||||
role_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
q = text("DELETE FROM compliance_org_roles WHERE id = :rid AND tenant_id = :tid")
|
||||
result = db.execute(q, {"rid": role_id, "tid": tenant_id})
|
||||
db.commit()
|
||||
if result.rowcount == 0:
|
||||
raise HTTPException(404, "Role not found")
|
||||
return {"deleted": True}
|
||||
|
||||
|
||||
@router.post("/seed")
|
||||
def seed_roles(
|
||||
project_id: Optional[str] = Query(None),
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
created = 0
|
||||
for role in DEFAULT_ROLES:
|
||||
q = text("""
|
||||
INSERT INTO compliance_org_roles (tenant_id, project_id, role_key, role_label)
|
||||
VALUES (:tid, :pid, :rk, :rl)
|
||||
ON CONFLICT (tenant_id, project_id, role_key) DO NOTHING
|
||||
""")
|
||||
result = db.execute(q, {"tid": tenant_id, "pid": project_id, "rk": role["role_key"], "rl": role["role_label"]})
|
||||
created += result.rowcount
|
||||
db.commit()
|
||||
return {"seeded": created, "total": len(DEFAULT_ROLES)}
|
||||
|
||||
|
||||
@router.post("/{role_id}/send-test")
|
||||
def send_test_email(
|
||||
role_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
q = text("SELECT * FROM compliance_org_roles WHERE id = :rid AND tenant_id = :tid")
|
||||
role = db.execute(q, {"rid": role_id, "tid": tenant_id}).fetchone()
|
||||
if not role:
|
||||
raise HTTPException(404, "Role not found")
|
||||
role_dict = _row_to_dict(role)
|
||||
if not role_dict.get("person_email"):
|
||||
raise HTTPException(400, "No email configured for this role")
|
||||
|
||||
try:
|
||||
from compliance.services.smtp_sender import send_email
|
||||
result = send_email(
|
||||
recipient=role_dict["person_email"],
|
||||
subject=f"[BreakPilot] Test-E-Mail fuer {role_dict['role_label']}",
|
||||
body_html=f"""
|
||||
<h2>Test-E-Mail</h2>
|
||||
<p>Diese E-Mail bestaetigt, dass die Zustellung an die Rolle
|
||||
<strong>{role_dict['role_label']}</strong> funktioniert.</p>
|
||||
<p>Empfaenger: {role_dict['person_name'] or 'N/A'} ({role_dict['person_email']})</p>
|
||||
<p style="color:#888;font-size:12px;">Gesendet von BreakPilot Compliance SDK</p>
|
||||
""",
|
||||
)
|
||||
return {"sent": True, "email": role_dict["person_email"], "result": result}
|
||||
except Exception as e:
|
||||
logger.error("Failed to send test email: %s", e)
|
||||
raise HTTPException(500, f"Email sending failed: {e}")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Document-to-Role Mapping
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@router.get("/mapping")
|
||||
def get_mapping(
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
q = text("""
|
||||
SELECT * FROM compliance_document_role_mapping
|
||||
WHERE tenant_id = :tid
|
||||
ORDER BY document_type, role_key
|
||||
""")
|
||||
rows = db.execute(q, {"tid": tenant_id}).fetchall()
|
||||
return [_row_to_dict(r) for r in rows]
|
||||
|
||||
|
||||
@router.put("/mapping")
|
||||
def update_mapping(
|
||||
body: MappingUpdate,
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
for entry in body.entries:
|
||||
q = text("""
|
||||
INSERT INTO compliance_document_role_mapping (tenant_id, document_type, role_key, is_primary)
|
||||
VALUES (:tid, :dt, :rk, :ip)
|
||||
ON CONFLICT (tenant_id, document_type, role_key) DO UPDATE
|
||||
SET is_primary = EXCLUDED.is_primary
|
||||
""")
|
||||
db.execute(q, {"tid": tenant_id, "dt": entry.document_type, "rk": entry.role_key, "ip": entry.is_primary})
|
||||
db.commit()
|
||||
return {"updated": len(body.entries)}
|
||||
@@ -0,0 +1,95 @@
|
||||
"""
|
||||
FastAPI routes for IAB TCF 2.2 (Transparency & Consent Framework).
|
||||
|
||||
Endpoints:
|
||||
GET /tcf/purposes — list 12 IAB purposes with translations
|
||||
GET /tcf/special-features — list 2 IAB special features
|
||||
GET /tcf/category-mapping — banner category → IAB purpose mapping
|
||||
POST /tcf/encode — generate TC String from consent decisions
|
||||
POST /tcf/encode-categories — generate TC String from banner categories
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Optional, List, Dict
|
||||
|
||||
from fastapi import APIRouter, Depends
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from classroom_engine.database import get_db
|
||||
from .tenant_utils import get_tenant_id as _get_tenant_id
|
||||
from compliance.services.tcf_encoder_service import TCFEncoderService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/tcf", tags=["tcf"])
|
||||
|
||||
|
||||
class TCFEncodeRequest(BaseModel):
|
||||
purpose_consents: Dict[int, bool] = {}
|
||||
vendor_consents: Dict[int, bool] = {}
|
||||
purpose_li: Optional[Dict[int, bool]] = None
|
||||
special_features: Optional[Dict[int, bool]] = None
|
||||
cmp_id: int = 1
|
||||
cmp_version: int = 1
|
||||
consent_language: str = "DE"
|
||||
|
||||
|
||||
class TCFCategoryEncodeRequest(BaseModel):
|
||||
categories: List[str] = []
|
||||
vendor_consents: Optional[Dict[int, bool]] = None
|
||||
cmp_id: int = 1
|
||||
consent_language: str = "DE"
|
||||
|
||||
|
||||
@router.get("/purposes")
|
||||
def list_purposes():
|
||||
return TCFEncoderService.get_purposes()
|
||||
|
||||
|
||||
@router.get("/special-features")
|
||||
def list_special_features():
|
||||
return TCFEncoderService.get_special_features()
|
||||
|
||||
|
||||
@router.get("/category-mapping")
|
||||
def get_category_mapping():
|
||||
return TCFEncoderService.get_category_purpose_map()
|
||||
|
||||
|
||||
@router.post("/encode")
|
||||
def encode_tc_string(body: TCFEncodeRequest):
|
||||
encoder = TCFEncoderService(
|
||||
cmp_id=body.cmp_id,
|
||||
cmp_version=body.cmp_version,
|
||||
consent_language=body.consent_language,
|
||||
)
|
||||
tc_string = encoder.encode(
|
||||
purpose_consents=body.purpose_consents,
|
||||
vendor_consents=body.vendor_consents,
|
||||
purpose_li=body.purpose_li,
|
||||
special_features=body.special_features,
|
||||
)
|
||||
return {"tc_string": tc_string, "version": 2}
|
||||
|
||||
|
||||
@router.post("/encode-categories")
|
||||
def encode_from_categories(body: TCFCategoryEncodeRequest):
|
||||
encoder = TCFEncoderService(
|
||||
cmp_id=body.cmp_id,
|
||||
consent_language=body.consent_language,
|
||||
)
|
||||
tc_string = encoder.encode_from_categories(
|
||||
categories=body.categories,
|
||||
vendor_consents=body.vendor_consents,
|
||||
)
|
||||
# Also return which purposes were set
|
||||
from compliance.services.tcf_encoder_service import CATEGORY_PURPOSE_MAP
|
||||
purpose_ids = set()
|
||||
for cat in body.categories:
|
||||
purpose_ids.update(CATEGORY_PURPOSE_MAP.get(cat, []))
|
||||
return {
|
||||
"tc_string": tc_string,
|
||||
"version": 2,
|
||||
"purposes_consented": sorted(purpose_ids),
|
||||
"categories": body.categories,
|
||||
}
|
||||
@@ -0,0 +1,310 @@
|
||||
"""
|
||||
FastAPI routes for Whistleblower (HinSchG) — Hinweisgeberschutz.
|
||||
|
||||
Admin endpoints for managing reports + public endpoint for anonymous submissions.
|
||||
Deadlines: 7 days acknowledgment (§ 17 Abs. 1), 3 months feedback (§ 17 Abs. 2).
|
||||
|
||||
Endpoints:
|
||||
GET /whistleblower/reports — list with filters
|
||||
GET /whistleblower/reports/stats — counts by status/category
|
||||
POST /whistleblower/reports — create report (admin)
|
||||
GET /whistleblower/reports/{id} — single report with messages
|
||||
PUT /whistleblower/reports/{id} — update status/priority/assignment
|
||||
POST /whistleblower/reports/{id}/acknowledge — send acknowledgment
|
||||
POST /whistleblower/reports/{id}/close — close report
|
||||
POST /whistleblower/reports/{id}/messages — add message
|
||||
GET /whistleblower/reports/{id}/measures — list measures
|
||||
POST /whistleblower/reports/{id}/measures — add measure
|
||||
POST /whistleblower/submit — public anonymous submission
|
||||
GET /whistleblower/check/{access_key} — reporter checks status
|
||||
"""
|
||||
|
||||
import logging
|
||||
import secrets
|
||||
import string
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Optional, List
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from classroom_engine.database import get_db
|
||||
from .tenant_utils import get_tenant_id as _get_tenant_id
|
||||
from .db_utils import row_to_dict as _row_to_dict
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/whistleblower", tags=["whistleblower"])
|
||||
|
||||
VALID_CATEGORIES = {"corruption", "fraud", "data_protection", "discrimination",
|
||||
"environment", "competition", "product_safety", "tax_evasion", "other"}
|
||||
VALID_STATUSES = {"new", "acknowledged", "under_review", "investigation",
|
||||
"measures_taken", "closed", "rejected"}
|
||||
|
||||
|
||||
def _gen_ref(tenant_id: str, db: Session) -> str:
|
||||
year = datetime.now().year
|
||||
q = text("SELECT COUNT(*) FROM compliance_whistleblower_reports WHERE tenant_id = :tid")
|
||||
count = db.execute(q, {"tid": tenant_id}).scalar() or 0
|
||||
return f"WB-{year}-{count + 1:06d}"
|
||||
|
||||
|
||||
def _gen_access_key() -> str:
|
||||
chars = string.ascii_uppercase + string.digits
|
||||
parts = [''.join(secrets.choice(chars) for _ in range(4)) for _ in range(3)]
|
||||
return '-'.join(parts)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Schemas
|
||||
# =============================================================================
|
||||
|
||||
class ReportCreate(BaseModel):
|
||||
category: str = "other"
|
||||
title: str
|
||||
description: str
|
||||
is_anonymous: bool = True
|
||||
reporter_name: Optional[str] = None
|
||||
reporter_email: Optional[str] = None
|
||||
reporter_phone: Optional[str] = None
|
||||
priority: str = "normal"
|
||||
|
||||
class ReportUpdate(BaseModel):
|
||||
status: Optional[str] = None
|
||||
priority: Optional[str] = None
|
||||
assigned_to: Optional[str] = None
|
||||
category: Optional[str] = None
|
||||
|
||||
class MessageCreate(BaseModel):
|
||||
message: str
|
||||
sender_type: str = "admin"
|
||||
is_internal: bool = False
|
||||
|
||||
class MeasureCreate(BaseModel):
|
||||
title: str
|
||||
description: Optional[str] = None
|
||||
responsible: Optional[str] = None
|
||||
due_date: Optional[str] = None
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Admin Routes
|
||||
# =============================================================================
|
||||
|
||||
@router.get("/reports")
|
||||
def list_reports(
|
||||
status: Optional[str] = Query(None),
|
||||
category: Optional[str] = Query(None),
|
||||
limit: int = Query(50, le=200),
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
where = ["tenant_id = :tid"]
|
||||
params = {"tid": tenant_id, "lim": limit}
|
||||
if status:
|
||||
where.append("status = :st")
|
||||
params["st"] = status
|
||||
if category:
|
||||
where.append("category = :cat")
|
||||
params["cat"] = category
|
||||
q = text(f"SELECT * FROM compliance_whistleblower_reports WHERE {' AND '.join(where)} ORDER BY received_at DESC LIMIT :lim")
|
||||
return [_row_to_dict(r) for r in db.execute(q, params).fetchall()]
|
||||
|
||||
|
||||
@router.get("/reports/stats")
|
||||
def report_stats(
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
now = datetime.now(timezone.utc)
|
||||
q = text("SELECT status, COUNT(*) as cnt FROM compliance_whistleblower_reports WHERE tenant_id = :tid GROUP BY status")
|
||||
by_status = {r.status: r.cnt for r in db.execute(q, {"tid": tenant_id}).fetchall()}
|
||||
q2 = text("SELECT category, COUNT(*) as cnt FROM compliance_whistleblower_reports WHERE tenant_id = :tid GROUP BY category")
|
||||
by_category = {r.category: r.cnt for r in db.execute(q2, {"tid": tenant_id}).fetchall()}
|
||||
q3 = text("SELECT COUNT(*) FROM compliance_whistleblower_reports WHERE tenant_id = :tid AND deadline_acknowledgment < :now AND acknowledged_at IS NULL AND status = 'new'")
|
||||
overdue_ack = db.execute(q3, {"tid": tenant_id, "now": now}).scalar() or 0
|
||||
q4 = text("SELECT COUNT(*) FROM compliance_whistleblower_reports WHERE tenant_id = :tid AND deadline_feedback < :now AND status NOT IN ('closed', 'rejected')")
|
||||
overdue_fb = db.execute(q4, {"tid": tenant_id, "now": now}).scalar() or 0
|
||||
total = sum(by_status.values())
|
||||
return {"total": total, "by_status": by_status, "by_category": by_category, "overdue_acknowledgment": overdue_ack, "overdue_feedback": overdue_fb}
|
||||
|
||||
|
||||
@router.post("/reports")
|
||||
def create_report(
|
||||
body: ReportCreate,
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
now = datetime.now(timezone.utc)
|
||||
ref = _gen_ref(tenant_id, db)
|
||||
ak = _gen_access_key()
|
||||
q = text("""
|
||||
INSERT INTO compliance_whistleblower_reports
|
||||
(tenant_id, reference_number, access_key, category, title, description,
|
||||
is_anonymous, reporter_name, reporter_email, reporter_phone, priority,
|
||||
received_at, deadline_acknowledgment, deadline_feedback)
|
||||
VALUES (:tid, :ref, :ak, :cat, :title, :desc,
|
||||
:anon, :rn, :re, :rp, :pri,
|
||||
:now, :dl_ack, :dl_fb)
|
||||
RETURNING *
|
||||
""")
|
||||
row = db.execute(q, {
|
||||
"tid": tenant_id, "ref": ref, "ak": ak,
|
||||
"cat": body.category, "title": body.title, "desc": body.description,
|
||||
"anon": body.is_anonymous, "rn": body.reporter_name,
|
||||
"re": body.reporter_email, "rp": body.reporter_phone,
|
||||
"pri": body.priority, "now": now,
|
||||
"dl_ack": now + timedelta(days=7),
|
||||
"dl_fb": now + timedelta(days=90),
|
||||
}).fetchone()
|
||||
db.commit()
|
||||
return _row_to_dict(row)
|
||||
|
||||
|
||||
@router.get("/reports/{report_id}")
|
||||
def get_report(
|
||||
report_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
row = db.execute(text("SELECT * FROM compliance_whistleblower_reports WHERE id = :rid AND tenant_id = :tid"),
|
||||
{"rid": report_id, "tid": tenant_id}).fetchone()
|
||||
if not row:
|
||||
raise HTTPException(404, "Report not found")
|
||||
result = _row_to_dict(row)
|
||||
msgs = db.execute(text("SELECT * FROM compliance_whistleblower_messages WHERE report_id = :rid ORDER BY created_at"),
|
||||
{"rid": report_id}).fetchall()
|
||||
result["messages"] = [_row_to_dict(m) for m in msgs]
|
||||
measures = db.execute(text("SELECT * FROM compliance_whistleblower_measures WHERE report_id = :rid ORDER BY created_at"),
|
||||
{"rid": report_id}).fetchall()
|
||||
result["measures"] = [_row_to_dict(m) for m in measures]
|
||||
return result
|
||||
|
||||
|
||||
@router.put("/reports/{report_id}")
|
||||
def update_report(
|
||||
report_id: str,
|
||||
body: ReportUpdate,
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
sets, params = [], {"rid": report_id, "tid": tenant_id}
|
||||
for field in ["status", "priority", "assigned_to", "category"]:
|
||||
val = getattr(body, field, None)
|
||||
if val is not None:
|
||||
sets.append(f"{field} = :{field}")
|
||||
params[field] = val
|
||||
if not sets:
|
||||
raise HTTPException(400, "No fields to update")
|
||||
sets.append("updated_at = NOW()")
|
||||
q = text(f"UPDATE compliance_whistleblower_reports SET {', '.join(sets)} WHERE id = :rid AND tenant_id = :tid RETURNING *")
|
||||
row = db.execute(q, params).fetchone()
|
||||
if not row:
|
||||
raise HTTPException(404, "Report not found")
|
||||
db.commit()
|
||||
return _row_to_dict(row)
|
||||
|
||||
|
||||
@router.post("/reports/{report_id}/acknowledge")
|
||||
def acknowledge_report(
|
||||
report_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
q = text("""
|
||||
UPDATE compliance_whistleblower_reports
|
||||
SET status = 'acknowledged', acknowledged_at = NOW(), updated_at = NOW()
|
||||
WHERE id = :rid AND tenant_id = :tid RETURNING *
|
||||
""")
|
||||
row = db.execute(q, {"rid": report_id, "tid": tenant_id}).fetchone()
|
||||
if not row:
|
||||
raise HTTPException(404, "Report not found")
|
||||
db.commit()
|
||||
return _row_to_dict(row)
|
||||
|
||||
|
||||
@router.post("/reports/{report_id}/close")
|
||||
def close_report(
|
||||
report_id: str,
|
||||
reason: str = Query(""),
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
q = text("""
|
||||
UPDATE compliance_whistleblower_reports
|
||||
SET status = 'closed', closed_at = NOW(), closure_reason = :reason, updated_at = NOW()
|
||||
WHERE id = :rid AND tenant_id = :tid RETURNING *
|
||||
""")
|
||||
row = db.execute(q, {"rid": report_id, "tid": tenant_id, "reason": reason}).fetchone()
|
||||
if not row:
|
||||
raise HTTPException(404, "Report not found")
|
||||
db.commit()
|
||||
return _row_to_dict(row)
|
||||
|
||||
|
||||
@router.post("/reports/{report_id}/messages")
|
||||
def add_message(
|
||||
report_id: str,
|
||||
body: MessageCreate,
|
||||
db: Session = Depends(get_db),
|
||||
tenant_id: str = Depends(_get_tenant_id),
|
||||
):
|
||||
q = text("""
|
||||
INSERT INTO compliance_whistleblower_messages (report_id, sender_type, message, is_internal)
|
||||
VALUES (:rid, :st, :msg, :internal) RETURNING *
|
||||
""")
|
||||
row = db.execute(q, {"rid": report_id, "st": body.sender_type, "msg": body.message, "internal": body.is_internal}).fetchone()
|
||||
db.commit()
|
||||
return _row_to_dict(row)
|
||||
|
||||
|
||||
@router.get("/reports/{report_id}/measures")
|
||||
def list_measures(report_id: str, db: Session = Depends(get_db)):
|
||||
return [_row_to_dict(r) for r in db.execute(text(
|
||||
"SELECT * FROM compliance_whistleblower_measures WHERE report_id = :rid ORDER BY created_at"
|
||||
), {"rid": report_id}).fetchall()]
|
||||
|
||||
|
||||
@router.post("/reports/{report_id}/measures")
|
||||
def add_measure(
|
||||
report_id: str, body: MeasureCreate,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
q = text("""
|
||||
INSERT INTO compliance_whistleblower_measures (report_id, title, description, responsible, due_date)
|
||||
VALUES (:rid, :title, :desc, :resp, :due) RETURNING *
|
||||
""")
|
||||
row = db.execute(q, {"rid": report_id, "title": body.title, "desc": body.description,
|
||||
"resp": body.responsible, "due": body.due_date}).fetchone()
|
||||
db.commit()
|
||||
return _row_to_dict(row)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Public Routes (Anonymous)
|
||||
# =============================================================================
|
||||
|
||||
@router.post("/submit")
|
||||
def submit_report(body: ReportCreate, db: Session = Depends(get_db), tenant_id: str = Depends(_get_tenant_id)):
|
||||
"""Public anonymous submission — same as create but returns only access_key."""
|
||||
body.is_anonymous = True
|
||||
result = create_report(body, db, tenant_id)
|
||||
return {"access_key": result["access_key"], "reference_number": result["reference_number"],
|
||||
"message": "Ihre Meldung wurde erfolgreich eingereicht. Nutzen Sie den Zugangscode um den Status zu pruefen."}
|
||||
|
||||
|
||||
@router.get("/check/{access_key}")
|
||||
def check_status(access_key: str, db: Session = Depends(get_db), tenant_id: str = Depends(_get_tenant_id)):
|
||||
"""Reporter checks status anonymously via access key."""
|
||||
row = db.execute(text(
|
||||
"SELECT id, reference_number, status, category, received_at, acknowledged_at FROM compliance_whistleblower_reports WHERE access_key = :ak AND tenant_id = :tid"
|
||||
), {"ak": access_key, "tid": tenant_id}).fetchone()
|
||||
if not row:
|
||||
raise HTTPException(404, "Meldung nicht gefunden")
|
||||
result = _row_to_dict(row)
|
||||
msgs = db.execute(text(
|
||||
"SELECT message, sender_type, created_at FROM compliance_whistleblower_messages WHERE report_id = :rid AND is_internal = FALSE ORDER BY created_at"
|
||||
), {"rid": result["id"]}).fetchall()
|
||||
result["messages"] = [_row_to_dict(m) for m in msgs]
|
||||
return result
|
||||
@@ -31,24 +31,11 @@ class BannerConsentDB(Base):
|
||||
device_fingerprint = Column(Text, nullable=False)
|
||||
categories = Column(JSON, default=list)
|
||||
vendors = Column(JSON, default=list)
|
||||
vendor_consents = Column(JSON, default=dict) # {"vendor_id": true/false}
|
||||
ip_hash = Column(Text)
|
||||
user_agent = Column(Text)
|
||||
consent_string = Column(Text)
|
||||
linked_email = Column(Text)
|
||||
# Vendor-agnostische Felder (Migration 107)
|
||||
consent_method = Column(Text) # accept_all / reject_all / custom_selection
|
||||
banner_version = Column(Integer)
|
||||
banner_config_hash = Column(Text)
|
||||
geo_country = Column(Text)
|
||||
geo_region = Column(Text)
|
||||
consent_scope = Column(Text, default='domain')
|
||||
page_url = Column(Text)
|
||||
referrer = Column(Text)
|
||||
device_type = Column(Text) # mobile / desktop / tablet
|
||||
browser = Column(Text)
|
||||
os = Column(Text)
|
||||
screen_resolution = Column(Text)
|
||||
session_id = Column(Text)
|
||||
expires_at = Column(DateTime)
|
||||
created_at = Column(DateTime, nullable=False, default=datetime.utcnow)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
@@ -74,11 +61,11 @@ class BannerConsentAuditLogDB(Base):
|
||||
site_id = Column(Text, nullable=False)
|
||||
device_fingerprint = Column(Text)
|
||||
categories = Column(JSON, default=list)
|
||||
vendor_consents = Column(JSON, default=dict)
|
||||
ip_hash = Column(Text)
|
||||
user_agent = Column(Text)
|
||||
banner_config_hash = Column(Text)
|
||||
consent_version = Column(Integer)
|
||||
consent_method = Column(Text)
|
||||
page_url = Column(Text)
|
||||
created_at = Column(DateTime, nullable=False, default=datetime.utcnow)
|
||||
|
||||
__table_args__ = (
|
||||
|
||||
@@ -16,6 +16,7 @@ class ConsentCreate(BaseModel):
|
||||
device_fingerprint: str
|
||||
categories: List[str] = []
|
||||
vendors: List[str] = []
|
||||
vendor_consents: dict[str, bool] = {}
|
||||
ip_address: Optional[str] = None
|
||||
user_agent: Optional[str] = None
|
||||
consent_string: Optional[str] = None
|
||||
|
||||
@@ -23,23 +23,10 @@ def consent_to_dict(c: BannerConsentDB) -> dict[str, Any]:
|
||||
"device_fingerprint": c.device_fingerprint,
|
||||
"categories": c.categories or [],
|
||||
"vendors": c.vendors or [],
|
||||
"vendor_consents": c.vendor_consents or {},
|
||||
"ip_hash": c.ip_hash,
|
||||
"user_agent": c.user_agent,
|
||||
"consent_string": c.consent_string,
|
||||
"linked_email": c.linked_email,
|
||||
"consent_method": c.consent_method,
|
||||
"banner_version": c.banner_version,
|
||||
"banner_config_hash": c.banner_config_hash,
|
||||
"geo_country": c.geo_country,
|
||||
"geo_region": c.geo_region,
|
||||
"consent_scope": c.consent_scope,
|
||||
"page_url": c.page_url,
|
||||
"referrer": c.referrer,
|
||||
"device_type": c.device_type,
|
||||
"browser": c.browser,
|
||||
"os": c.os,
|
||||
"screen_resolution": c.screen_resolution,
|
||||
"session_id": c.session_id,
|
||||
"expires_at": c.expires_at.isoformat() if c.expires_at else None,
|
||||
"created_at": c.created_at.isoformat() if c.created_at else None,
|
||||
"updated_at": c.updated_at.isoformat() if c.updated_at else None,
|
||||
|
||||
@@ -0,0 +1,95 @@
|
||||
"""
|
||||
Agent PDF Export — generates printable compliance scan reports.
|
||||
|
||||
Uses WeasyPrint to convert HTML report to PDF.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from io import BytesIO
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def generate_scan_pdf(scan_data: dict) -> bytes:
|
||||
"""Generate a PDF report from scan results."""
|
||||
from weasyprint import HTML
|
||||
|
||||
html = _build_report_html(scan_data)
|
||||
pdf_buffer = BytesIO()
|
||||
HTML(string=html).write_pdf(pdf_buffer)
|
||||
return pdf_buffer.getvalue()
|
||||
|
||||
|
||||
def _severity_color(sev: str) -> str:
|
||||
return {"HIGH": "#dc2626", "CRITICAL": "#991b1b", "MEDIUM": "#ea580c", "LOW": "#2563eb"}.get(sev, "#6b7280")
|
||||
|
||||
|
||||
def _build_report_html(data: dict) -> str:
|
||||
"""Build HTML for the PDF report."""
|
||||
url = data.get("url", "")
|
||||
scan_type = data.get("scan_type", "scan")
|
||||
mode = data.get("analysis_mode", "post_launch")
|
||||
findings = data.get("findings", [])
|
||||
services = data.get("services", [])
|
||||
risk = data.get("risk_level", "")
|
||||
score = data.get("risk_score", 0)
|
||||
pages = data.get("pages_scanned", 0)
|
||||
now = datetime.now(timezone.utc).strftime("%d.%m.%Y %H:%M UTC")
|
||||
|
||||
mode_label = "Live-Website Pruefung" if mode == "post_launch" else "Interne Pruefung"
|
||||
type_label = {"quick": "Schnellanalyse", "scan": "Website-Scan", "consent_test": "Cookie-Test"}.get(scan_type, scan_type)
|
||||
|
||||
findings_rows = ""
|
||||
for f in findings:
|
||||
sev = f.get("severity", "MEDIUM") if isinstance(f, dict) else "MEDIUM"
|
||||
text = f.get("text", str(f)) if isinstance(f, dict) else str(f)
|
||||
color = _severity_color(sev)
|
||||
findings_rows += f'<tr><td style="color:{color};font-weight:bold;padding:6px 8px;border-bottom:1px solid #e5e7eb;">{sev}</td><td style="padding:6px 8px;border-bottom:1px solid #e5e7eb;">{text}</td></tr>'
|
||||
|
||||
services_rows = ""
|
||||
for s in services:
|
||||
if isinstance(s, dict):
|
||||
status_icon = "✓" if s.get("in_dse") or s.get("status") == "ok" else "✗"
|
||||
status_color = "#16a34a" if status_icon == "✓" else "#dc2626"
|
||||
services_rows += f'<tr><td style="color:{status_color};font-weight:bold;padding:4px 8px;border-bottom:1px solid #f3f4f6;">{status_icon}</td><td style="padding:4px 8px;border-bottom:1px solid #f3f4f6;">{s.get("name","")}</td><td style="padding:4px 8px;border-bottom:1px solid #f3f4f6;">{s.get("country","")}</td><td style="padding:4px 8px;border-bottom:1px solid #f3f4f6;">{s.get("category","")}</td></tr>'
|
||||
|
||||
return f"""<!DOCTYPE html>
|
||||
<html><head><meta charset="utf-8">
|
||||
<style>
|
||||
body {{ font-family: -apple-system, Arial, sans-serif; font-size: 11px; color: #1e293b; margin: 40px; }}
|
||||
h1 {{ font-size: 20px; color: #1e1b4b; margin-bottom: 4px; }}
|
||||
h2 {{ font-size: 14px; color: #334155; border-bottom: 2px solid #e2e8f0; padding-bottom: 4px; margin-top: 24px; }}
|
||||
.meta {{ color: #64748b; font-size: 10px; margin-bottom: 20px; }}
|
||||
.badge {{ display: inline-block; padding: 2px 8px; border-radius: 4px; color: white; font-size: 10px; font-weight: bold; }}
|
||||
table {{ width: 100%; border-collapse: collapse; }}
|
||||
th {{ text-align: left; padding: 6px 8px; background: #f8fafc; border-bottom: 2px solid #e2e8f0; font-size: 10px; color: #64748b; }}
|
||||
.warning {{ background: #fef2f2; border-left: 4px solid #dc2626; padding: 10px 14px; margin: 16px 0; }}
|
||||
.footer {{ margin-top: 30px; padding-top: 10px; border-top: 1px solid #e2e8f0; color: #94a3b8; font-size: 9px; }}
|
||||
</style></head><body>
|
||||
|
||||
<h1>Compliance Agent Report</h1>
|
||||
<p class="meta">{type_label} | {mode_label} | {now}</p>
|
||||
|
||||
<table style="margin-bottom:20px;">
|
||||
<tr><td style="padding:4px 0;color:#64748b;width:150px;">URL</td><td style="padding:4px 0;"><strong>{url}</strong></td></tr>
|
||||
<tr><td style="padding:4px 0;color:#64748b;">Risikobewertung</td><td style="padding:4px 0;"><span class="badge" style="background:{_severity_color(risk) if risk else '#6b7280'}">{risk} ({score}/100)</span></td></tr>
|
||||
<tr><td style="padding:4px 0;color:#64748b;">Seiten gescannt</td><td style="padding:4px 0;">{pages}</td></tr>
|
||||
<tr><td style="padding:4px 0;color:#64748b;">Findings</td><td style="padding:4px 0;"><strong>{len(findings)}</strong></td></tr>
|
||||
</table>
|
||||
|
||||
{'<div class="warning"><strong>ACHTUNG:</strong> Maengel auf einer bereits veroeffentlichten Website. Sofortige Korrektur empfohlen.</div>' if mode == "post_launch" and findings else ''}
|
||||
|
||||
<h2>Findings ({len(findings)})</h2>
|
||||
<table>
|
||||
<tr><th>Schwere</th><th>Beschreibung</th></tr>
|
||||
{findings_rows if findings_rows else '<tr><td colspan="2" style="padding:8px;color:#16a34a;">Keine Findings — alles OK</td></tr>'}
|
||||
</table>
|
||||
|
||||
{'<h2>Dienstleister-Abgleich</h2><table><tr><th>Status</th><th>Dienst</th><th>Land</th><th>Kategorie</th></tr>' + services_rows + '</table>' if services_rows else ''}
|
||||
|
||||
<div class="footer">
|
||||
Automatisch erstellt vom BreakPilot Compliance Agent | {now}<br>
|
||||
Dieses Dokument ersetzt keine Rechtsberatung.
|
||||
</div>
|
||||
</body></html>"""
|
||||
@@ -0,0 +1,193 @@
|
||||
"""
|
||||
Banner A/B Testing Service — variant assignment, stats, significance.
|
||||
|
||||
Deterministic variant assignment via device fingerprint hash ensures
|
||||
the same device always sees the same variant (sticky bucketing).
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import math
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Optional
|
||||
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
|
||||
class BannerABService:
|
||||
"""A/B testing for consent banner variants."""
|
||||
|
||||
def __init__(self, db: Session) -> None:
|
||||
self.db = db
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Variant CRUD
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def list_variants(self, tenant_id: str, site_config_id: str) -> list[dict]:
|
||||
q = text("""
|
||||
SELECT * FROM compliance_banner_variants
|
||||
WHERE tenant_id = :tid AND site_config_id = :scid
|
||||
ORDER BY variant_key
|
||||
""")
|
||||
rows = self.db.execute(q, {"tid": tenant_id, "scid": site_config_id}).fetchall()
|
||||
return [dict(r._mapping) for r in rows]
|
||||
|
||||
def create_variant(self, tenant_id: str, site_config_id: str, data: dict) -> dict:
|
||||
q = text("""
|
||||
INSERT INTO compliance_banner_variants
|
||||
(tenant_id, site_config_id, variant_name, variant_key, traffic_percent, is_control,
|
||||
banner_title, banner_description, position, style, primary_color, show_decline_all, theme_overrides)
|
||||
VALUES (:tid, :scid, :name, :key, :pct, :ctrl,
|
||||
:title, :desc, :pos, :style, :color, :decline, :theme)
|
||||
RETURNING *
|
||||
""")
|
||||
row = self.db.execute(q, {
|
||||
"tid": tenant_id, "scid": site_config_id,
|
||||
"name": data.get("variant_name", ""),
|
||||
"key": data.get("variant_key", "A"),
|
||||
"pct": data.get("traffic_percent", 50),
|
||||
"ctrl": data.get("is_control", False),
|
||||
"title": data.get("banner_title"),
|
||||
"desc": data.get("banner_description"),
|
||||
"pos": data.get("position"),
|
||||
"style": data.get("style"),
|
||||
"color": data.get("primary_color"),
|
||||
"decline": data.get("show_decline_all"),
|
||||
"theme": data.get("theme_overrides", "{}"),
|
||||
}).fetchone()
|
||||
self.db.commit()
|
||||
return dict(row._mapping)
|
||||
|
||||
def update_variant(self, variant_id: str, data: dict) -> Optional[dict]:
|
||||
sets, params = [], {"vid": variant_id}
|
||||
for field in ["variant_name", "traffic_percent", "is_control", "banner_title",
|
||||
"banner_description", "position", "style", "primary_color",
|
||||
"show_decline_all", "is_active"]:
|
||||
if field in data and data[field] is not None:
|
||||
sets.append(f"{field} = :{field}")
|
||||
params[field] = data[field]
|
||||
if not sets:
|
||||
return None
|
||||
sets.append("updated_at = NOW()")
|
||||
q = text(f"UPDATE compliance_banner_variants SET {', '.join(sets)} WHERE id = :vid RETURNING *")
|
||||
row = self.db.execute(q, params).fetchone()
|
||||
self.db.commit()
|
||||
return dict(row._mapping) if row else None
|
||||
|
||||
def delete_variant(self, variant_id: str) -> bool:
|
||||
q = text("DELETE FROM compliance_banner_variants WHERE id = :vid")
|
||||
result = self.db.execute(q, {"vid": variant_id})
|
||||
self.db.commit()
|
||||
return result.rowcount > 0
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Variant Assignment (deterministic sticky bucketing)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def assign_variant(self, site_config_id: str, device_fingerprint: str) -> Optional[dict]:
|
||||
"""Assign a variant based on device fingerprint hash. Returns variant or None."""
|
||||
variants = self.db.execute(text("""
|
||||
SELECT * FROM compliance_banner_variants
|
||||
WHERE site_config_id = :scid AND is_active = TRUE
|
||||
ORDER BY variant_key
|
||||
"""), {"scid": site_config_id}).fetchall()
|
||||
if not variants:
|
||||
return None
|
||||
|
||||
# Deterministic bucket 0-99 from device fingerprint
|
||||
bucket = int(hashlib.md5(f"{site_config_id}:{device_fingerprint}".encode()).hexdigest(), 16) % 100
|
||||
|
||||
cumulative = 0
|
||||
for v in variants:
|
||||
cumulative += v.traffic_percent
|
||||
if bucket < cumulative:
|
||||
return dict(v._mapping)
|
||||
# Fallback to last variant
|
||||
return dict(variants[-1]._mapping)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Stats with statistical significance
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def get_variant_stats(self, tenant_id: str, site_config_id: str) -> list[dict]:
|
||||
"""Per-variant stats with chi-squared significance test."""
|
||||
variants = self.list_variants(tenant_id, site_config_id)
|
||||
if not variants:
|
||||
return []
|
||||
|
||||
results = []
|
||||
for v in variants:
|
||||
vid = str(v["id"])
|
||||
vkey = v["variant_key"]
|
||||
q = text("""
|
||||
SELECT
|
||||
COUNT(*) AS total,
|
||||
COUNT(*) FILTER (WHERE action = 'consent_given') AS accepted,
|
||||
COUNT(*) FILTER (WHERE action IN ('consent_withdrawn', 'consent_revoked')) AS rejected
|
||||
FROM compliance_banner_consent_audit_log
|
||||
WHERE tenant_id = :tid AND variant_key = :vkey
|
||||
""")
|
||||
row = self.db.execute(q, {"tid": tenant_id, "vkey": vkey}).fetchone()
|
||||
total = row.total if row else 0
|
||||
accepted = row.accepted if row else 0
|
||||
results.append({
|
||||
"variant_id": vid,
|
||||
"variant_key": vkey,
|
||||
"variant_name": v["variant_name"],
|
||||
"traffic_percent": v["traffic_percent"],
|
||||
"is_control": v["is_control"],
|
||||
"total": total,
|
||||
"accepted": accepted,
|
||||
"opt_in_rate": round(accepted / total * 100, 1) if total > 0 else 0,
|
||||
})
|
||||
|
||||
# Chi-squared test between control and best variant
|
||||
control = next((r for r in results if r["is_control"]), None)
|
||||
if control and len(results) > 1:
|
||||
best = max((r for r in results if not r["is_control"]), key=lambda x: x["opt_in_rate"], default=None)
|
||||
if best and control["total"] > 0 and best["total"] > 0:
|
||||
sig = self._chi_squared_significance(
|
||||
control["accepted"], control["total"],
|
||||
best["accepted"], best["total"],
|
||||
)
|
||||
best["is_winner"] = sig > 0.95
|
||||
best["significance"] = round(sig * 100, 1)
|
||||
control["is_winner"] = False
|
||||
control["significance"] = round((1 - sig) * 100, 1)
|
||||
|
||||
return results
|
||||
|
||||
@staticmethod
|
||||
def _chi_squared_significance(a_success: int, a_total: int, b_success: int, b_total: int) -> float:
|
||||
"""Simple chi-squared test for 2x2 contingency table. Returns confidence 0-1."""
|
||||
a_fail = a_total - a_success
|
||||
b_fail = b_total - b_success
|
||||
n = a_total + b_total
|
||||
if n == 0:
|
||||
return 0.0
|
||||
|
||||
# Expected values
|
||||
exp_a_s = a_total * (a_success + b_success) / n
|
||||
exp_a_f = a_total * (a_fail + b_fail) / n
|
||||
exp_b_s = b_total * (a_success + b_success) / n
|
||||
exp_b_f = b_total * (a_fail + b_fail) / n
|
||||
|
||||
chi2 = 0.0
|
||||
for obs, exp in [(a_success, exp_a_s), (a_fail, exp_a_f), (b_success, exp_b_s), (b_fail, exp_b_f)]:
|
||||
if exp > 0:
|
||||
chi2 += (obs - exp) ** 2 / exp
|
||||
|
||||
# Approximate p-value for 1 df using Wilson-Hilferty
|
||||
if chi2 < 0.001:
|
||||
return 0.0
|
||||
if chi2 > 10.83:
|
||||
return 0.999
|
||||
# Lookup table for common thresholds (1 df)
|
||||
thresholds = [(2.706, 0.90), (3.841, 0.95), (5.024, 0.975), (6.635, 0.99), (10.83, 0.999)]
|
||||
confidence = 0.0
|
||||
for threshold, conf in thresholds:
|
||||
if chi2 >= threshold:
|
||||
confidence = conf
|
||||
return confidence
|
||||
@@ -0,0 +1,135 @@
|
||||
"""
|
||||
Banner consent analytics — time-series, device breakdown, bounce rate.
|
||||
|
||||
Reads from BannerConsentAuditLogDB for aggregated analytics.
|
||||
"""
|
||||
|
||||
import re
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any, Optional
|
||||
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
|
||||
class BannerAnalyticsService:
|
||||
"""Provides aggregated consent analytics for a site."""
|
||||
|
||||
def __init__(self, db: Session) -> None:
|
||||
self.db = db
|
||||
|
||||
def get_time_series(
|
||||
self,
|
||||
tenant_id: str,
|
||||
site_id: str,
|
||||
period: str = "daily",
|
||||
days: int = 30,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Opt-in rate per day/week over the last N days."""
|
||||
trunc = "day" if period == "daily" else "week"
|
||||
cutoff = datetime.now(timezone.utc) - timedelta(days=days)
|
||||
q = text(f"""
|
||||
SELECT DATE_TRUNC(:trunc, created_at) AS period,
|
||||
COUNT(*) FILTER (WHERE action = 'consent_given') AS given,
|
||||
COUNT(*) FILTER (WHERE action = 'consent_updated') AS updated,
|
||||
COUNT(*) FILTER (WHERE action IN ('consent_withdrawn', 'consent_revoked')) AS withdrawn,
|
||||
COUNT(*) AS total
|
||||
FROM compliance_banner_consent_audit_log
|
||||
WHERE tenant_id = :tid AND site_id = :sid AND created_at >= :cutoff
|
||||
GROUP BY 1 ORDER BY 1
|
||||
""")
|
||||
rows = self.db.execute(q, {"tid": tenant_id, "sid": site_id, "cutoff": cutoff, "trunc": trunc}).fetchall()
|
||||
return [
|
||||
{
|
||||
"period": r.period.isoformat() if r.period else None,
|
||||
"given": r.given,
|
||||
"updated": r.updated,
|
||||
"withdrawn": r.withdrawn,
|
||||
"total": r.total,
|
||||
"opt_in_rate": round((r.given + r.updated) / r.total * 100, 1) if r.total > 0 else 0,
|
||||
}
|
||||
for r in rows
|
||||
]
|
||||
|
||||
def get_category_breakdown(
|
||||
self,
|
||||
tenant_id: str,
|
||||
site_id: str,
|
||||
days: int = 30,
|
||||
) -> dict[str, dict[str, int]]:
|
||||
"""Acceptance count per category."""
|
||||
cutoff = datetime.now(timezone.utc) - timedelta(days=days)
|
||||
q = text("""
|
||||
SELECT categories FROM compliance_banner_consent_audit_log
|
||||
WHERE tenant_id = :tid AND site_id = :sid AND created_at >= :cutoff
|
||||
AND action IN ('consent_given', 'consent_updated')
|
||||
""")
|
||||
rows = self.db.execute(q, {"tid": tenant_id, "sid": site_id, "cutoff": cutoff}).fetchall()
|
||||
counts: dict[str, int] = {}
|
||||
total = len(rows)
|
||||
for r in rows:
|
||||
cats = r.categories if isinstance(r.categories, list) else []
|
||||
for cat in cats:
|
||||
counts[cat] = counts.get(cat, 0) + 1
|
||||
return {
|
||||
cat: {"count": count, "total": total, "rate": round(count / total * 100, 1) if total > 0 else 0}
|
||||
for cat, count in sorted(counts.items())
|
||||
}
|
||||
|
||||
def get_device_breakdown(
|
||||
self,
|
||||
tenant_id: str,
|
||||
site_id: str,
|
||||
days: int = 30,
|
||||
) -> dict[str, int]:
|
||||
"""Mobile/Desktop/Tablet classification from user_agent."""
|
||||
cutoff = datetime.now(timezone.utc) - timedelta(days=days)
|
||||
q = text("""
|
||||
SELECT user_agent FROM compliance_banner_consent_audit_log
|
||||
WHERE tenant_id = :tid AND site_id = :sid AND created_at >= :cutoff
|
||||
AND user_agent IS NOT NULL
|
||||
""")
|
||||
rows = self.db.execute(q, {"tid": tenant_id, "sid": site_id, "cutoff": cutoff}).fetchall()
|
||||
result = {"desktop": 0, "mobile": 0, "tablet": 0, "unknown": 0}
|
||||
mobile_re = re.compile(r"Mobile|Android|iPhone|iPod", re.IGNORECASE)
|
||||
tablet_re = re.compile(r"iPad|Tablet|PlayBook|Silk", re.IGNORECASE)
|
||||
for r in rows:
|
||||
ua = r.user_agent or ""
|
||||
if tablet_re.search(ua):
|
||||
result["tablet"] += 1
|
||||
elif mobile_re.search(ua):
|
||||
result["mobile"] += 1
|
||||
elif ua:
|
||||
result["desktop"] += 1
|
||||
else:
|
||||
result["unknown"] += 1
|
||||
return result
|
||||
|
||||
def get_overview_stats(
|
||||
self,
|
||||
tenant_id: str,
|
||||
site_id: str,
|
||||
days: int = 30,
|
||||
) -> dict[str, Any]:
|
||||
"""High-level stats: total consents, active, withdrawn, opt-in rate."""
|
||||
cutoff = datetime.now(timezone.utc) - timedelta(days=days)
|
||||
q = text("""
|
||||
SELECT
|
||||
COUNT(*) FILTER (WHERE action = 'consent_given') AS given,
|
||||
COUNT(*) FILTER (WHERE action = 'consent_updated') AS updated,
|
||||
COUNT(*) FILTER (WHERE action IN ('consent_withdrawn', 'consent_revoked')) AS withdrawn,
|
||||
COUNT(*) AS total
|
||||
FROM compliance_banner_consent_audit_log
|
||||
WHERE tenant_id = :tid AND site_id = :sid AND created_at >= :cutoff
|
||||
""")
|
||||
r = self.db.execute(q, {"tid": tenant_id, "sid": site_id, "cutoff": cutoff}).fetchone()
|
||||
total = r.total if r else 0
|
||||
given = (r.given or 0) + (r.updated or 0) if r else 0
|
||||
return {
|
||||
"period_days": days,
|
||||
"total_interactions": total,
|
||||
"consents_given": r.given if r else 0,
|
||||
"consents_updated": r.updated if r else 0,
|
||||
"consents_withdrawn": r.withdrawn if r else 0,
|
||||
"opt_in_rate": round(given / total * 100, 1) if total > 0 else 0,
|
||||
}
|
||||
@@ -73,9 +73,8 @@ class BannerConsentService:
|
||||
ip_hash: Optional[str] = None,
|
||||
banner_config_hash: Optional[str] = None,
|
||||
consent_version: Optional[int] = None,
|
||||
*,
|
||||
consent_method: Optional[str] = None,
|
||||
page_url: Optional[str] = None,
|
||||
vendor_consents: Optional[dict[str, bool]] = None,
|
||||
user_agent: Optional[str] = None,
|
||||
) -> None:
|
||||
entry = BannerConsentAuditLogDB(
|
||||
tenant_id=tenant_id,
|
||||
@@ -84,11 +83,11 @@ class BannerConsentService:
|
||||
site_id=site_id,
|
||||
device_fingerprint=device_fingerprint,
|
||||
categories=categories or [],
|
||||
vendor_consents=vendor_consents or {},
|
||||
ip_hash=ip_hash,
|
||||
user_agent=user_agent,
|
||||
banner_config_hash=banner_config_hash,
|
||||
consent_version=consent_version,
|
||||
consent_method=consent_method,
|
||||
page_url=page_url,
|
||||
)
|
||||
self.db.add(entry)
|
||||
|
||||
@@ -134,6 +133,24 @@ class BannerConsentService:
|
||||
return max(v.retention_days for v in vendors if v.retention_days)
|
||||
return max((CATEGORY_RETENTION_DAYS.get(c, 365) for c in categories), default=365)
|
||||
|
||||
def _maybe_generate_tc_string(
|
||||
self, tenant_id: uuid.UUID, site_id: str, categories: list[str],
|
||||
) -> Optional[str]:
|
||||
"""Generate TC String if TCF is enabled for this site."""
|
||||
config = (
|
||||
self.db.query(BannerSiteConfigDB)
|
||||
.filter(BannerSiteConfigDB.tenant_id == tenant_id, BannerSiteConfigDB.site_id == site_id)
|
||||
.first()
|
||||
)
|
||||
if not config or not config.tcf_enabled:
|
||||
return None
|
||||
try:
|
||||
from compliance.services.tcf_encoder_service import TCFEncoderService
|
||||
encoder = TCFEncoderService()
|
||||
return encoder.encode_from_categories(categories)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Consent CRUD (public SDK)
|
||||
# ------------------------------------------------------------------
|
||||
@@ -148,16 +165,7 @@ class BannerConsentService:
|
||||
ip_address: Optional[str],
|
||||
user_agent: Optional[str],
|
||||
consent_string: Optional[str],
|
||||
*,
|
||||
consent_method: Optional[str] = None,
|
||||
page_url: Optional[str] = None,
|
||||
referrer: Optional[str] = None,
|
||||
device_type: Optional[str] = None,
|
||||
browser: Optional[str] = None,
|
||||
os: Optional[str] = None,
|
||||
screen_resolution: Optional[str] = None,
|
||||
session_id: Optional[str] = None,
|
||||
consent_scope: Optional[str] = None,
|
||||
vendor_consents: Optional[dict[str, bool]] = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Upsert a device consent row for (tenant, site, device_fingerprint).
|
||||
|
||||
@@ -173,20 +181,9 @@ class BannerConsentService:
|
||||
expires_at = now + timedelta(days=retention)
|
||||
config_hash, config_ver = self._compute_config_hash(tid, site_id)
|
||||
|
||||
# Vendor-agnostische Zusatzfelder
|
||||
extra = {
|
||||
"consent_method": consent_method,
|
||||
"banner_version": config_ver,
|
||||
"banner_config_hash": config_hash,
|
||||
"page_url": page_url,
|
||||
"referrer": referrer,
|
||||
"device_type": device_type,
|
||||
"browser": browser,
|
||||
"os": os,
|
||||
"screen_resolution": screen_resolution,
|
||||
"session_id": session_id,
|
||||
"consent_scope": consent_scope or "domain",
|
||||
}
|
||||
# Auto-generate TC String if TCF is enabled for this site
|
||||
if not consent_string:
|
||||
consent_string = self._maybe_generate_tc_string(tid, site_id, categories)
|
||||
|
||||
existing = (
|
||||
self.db.query(BannerConsentDB)
|
||||
@@ -201,18 +198,17 @@ class BannerConsentService:
|
||||
if existing:
|
||||
existing.categories = categories
|
||||
existing.vendors = vendors
|
||||
existing.vendor_consents = vendor_consents or {}
|
||||
existing.ip_hash = ip_hash
|
||||
existing.user_agent = user_agent
|
||||
existing.consent_string = consent_string
|
||||
existing.expires_at = expires_at
|
||||
existing.updated_at = now
|
||||
for key, val in extra.items():
|
||||
setattr(existing, key, val)
|
||||
self.db.flush()
|
||||
self._log(
|
||||
tid, existing.id, "consent_updated", site_id, device_fingerprint,
|
||||
categories, ip_hash, config_hash, config_ver,
|
||||
consent_method=consent_method, page_url=page_url,
|
||||
vendor_consents=vendor_consents, user_agent=user_agent,
|
||||
)
|
||||
self.db.commit()
|
||||
self.db.refresh(existing)
|
||||
@@ -224,18 +220,18 @@ class BannerConsentService:
|
||||
device_fingerprint=device_fingerprint,
|
||||
categories=categories,
|
||||
vendors=vendors,
|
||||
vendor_consents=vendor_consents or {},
|
||||
ip_hash=ip_hash,
|
||||
user_agent=user_agent,
|
||||
consent_string=consent_string,
|
||||
expires_at=expires_at,
|
||||
**extra,
|
||||
)
|
||||
self.db.add(consent)
|
||||
self.db.flush()
|
||||
self._log(
|
||||
tid, consent.id, "consent_given", site_id, device_fingerprint,
|
||||
categories, ip_hash, config_hash, config_ver,
|
||||
consent_method=consent_method, page_url=page_url,
|
||||
vendor_consents=vendor_consents, user_agent=user_agent,
|
||||
)
|
||||
self.db.commit()
|
||||
self.db.refresh(consent)
|
||||
@@ -383,14 +379,7 @@ class BannerConsentService:
|
||||
total = base.count()
|
||||
category_stats: dict[str, int] = {}
|
||||
for c in base.all():
|
||||
raw = c.categories or []
|
||||
if isinstance(raw, str):
|
||||
try:
|
||||
import json
|
||||
raw = json.loads(raw)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
raw = []
|
||||
cats: list[str] = list(raw) if isinstance(raw, list) else []
|
||||
cats: list[str] = list(c.categories or [])
|
||||
for cat in cats:
|
||||
category_stats[cat] = category_stats.get(cat, 0) + 1
|
||||
return {
|
||||
@@ -404,58 +393,3 @@ class BannerConsentService:
|
||||
for cat, count in category_stats.items()
|
||||
},
|
||||
}
|
||||
|
||||
def list_consents(
|
||||
self, tenant_id: str, site_id: Optional[str] = None,
|
||||
limit: int = 50, offset: int = 0,
|
||||
) -> dict[str, Any]:
|
||||
"""List paginated banner consents with parsed categories."""
|
||||
import json as _json
|
||||
tid = uuid.UUID(tenant_id)
|
||||
base = self.db.query(BannerConsentDB).filter(BannerConsentDB.tenant_id == tid)
|
||||
if site_id:
|
||||
base = base.filter(BannerConsentDB.site_id == site_id)
|
||||
total = base.count()
|
||||
rows = base.order_by(BannerConsentDB.created_at.desc()).offset(offset).limit(limit).all()
|
||||
consents = []
|
||||
for c in rows:
|
||||
raw_cats = c.categories or []
|
||||
if isinstance(raw_cats, str):
|
||||
try:
|
||||
raw_cats = _json.loads(raw_cats)
|
||||
except (ValueError, TypeError):
|
||||
raw_cats = []
|
||||
raw_vendors = c.vendors or []
|
||||
if isinstance(raw_vendors, str):
|
||||
try:
|
||||
raw_vendors = _json.loads(raw_vendors)
|
||||
except (ValueError, TypeError):
|
||||
raw_vendors = []
|
||||
consents.append({
|
||||
"id": str(c.id),
|
||||
"site_id": c.site_id,
|
||||
"device_fingerprint": c.device_fingerprint,
|
||||
"categories": list(raw_cats) if isinstance(raw_cats, list) else [],
|
||||
"vendors": list(raw_vendors) if isinstance(raw_vendors, list) else [],
|
||||
"ip_hash": c.ip_hash,
|
||||
"user_agent": c.user_agent,
|
||||
"linked_email": c.linked_email,
|
||||
"consent_string": c.consent_string,
|
||||
"consent_method": c.consent_method,
|
||||
"banner_version": c.banner_version,
|
||||
"banner_config_hash": c.banner_config_hash,
|
||||
"geo_country": c.geo_country,
|
||||
"geo_region": c.geo_region,
|
||||
"consent_scope": c.consent_scope,
|
||||
"page_url": c.page_url,
|
||||
"referrer": c.referrer,
|
||||
"device_type": c.device_type,
|
||||
"browser": c.browser,
|
||||
"os": c.os,
|
||||
"screen_resolution": c.screen_resolution,
|
||||
"session_id": c.session_id,
|
||||
"expires_at": c.expires_at.isoformat() if c.expires_at else None,
|
||||
"created_at": c.created_at.isoformat() if c.created_at else None,
|
||||
"updated_at": c.updated_at.isoformat() if c.updated_at else None,
|
||||
})
|
||||
return {"consents": consents, "total": total, "limit": limit, "offset": offset}
|
||||
|
||||
@@ -40,6 +40,22 @@ _CONTROL_COLUMNS = """
|
||||
"""
|
||||
|
||||
|
||||
def _ensure_list(val: Any) -> list:
|
||||
"""Ensure a JSONB value is always a Python list."""
|
||||
if isinstance(val, list):
|
||||
return val
|
||||
if val is None:
|
||||
return []
|
||||
if isinstance(val, str):
|
||||
try:
|
||||
import json
|
||||
parsed = json.loads(val)
|
||||
return parsed if isinstance(parsed, list) else []
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
return []
|
||||
return []
|
||||
|
||||
|
||||
def _control_row(r: Any) -> dict[str, Any]:
|
||||
"""Serialize a canonical_controls SELECT row to a response dict."""
|
||||
return {
|
||||
@@ -49,19 +65,19 @@ def _control_row(r: Any) -> dict[str, Any]:
|
||||
"title": r.title,
|
||||
"objective": r.objective,
|
||||
"rationale": r.rationale,
|
||||
"scope": r.scope,
|
||||
"requirements": r.requirements,
|
||||
"test_procedure": r.test_procedure,
|
||||
"evidence": r.evidence,
|
||||
"scope": r.scope if isinstance(r.scope, dict) else {},
|
||||
"requirements": _ensure_list(r.requirements),
|
||||
"test_procedure": _ensure_list(r.test_procedure),
|
||||
"evidence": _ensure_list(r.evidence),
|
||||
"severity": r.severity,
|
||||
"risk_score": float(r.risk_score) if r.risk_score is not None else None,
|
||||
"implementation_effort": r.implementation_effort,
|
||||
"evidence_confidence": (
|
||||
float(r.evidence_confidence) if r.evidence_confidence is not None else None
|
||||
),
|
||||
"open_anchors": r.open_anchors,
|
||||
"open_anchors": _ensure_list(r.open_anchors),
|
||||
"release_state": r.release_state,
|
||||
"tags": r.tags or [],
|
||||
"tags": _ensure_list(r.tags),
|
||||
"created_at": r.created_at.isoformat() if r.created_at else None,
|
||||
"updated_at": r.updated_at.isoformat() if r.updated_at else None,
|
||||
}
|
||||
|
||||
@@ -0,0 +1,216 @@
|
||||
"""
|
||||
Compliance Report PDF Generator — generates a comprehensive A4 PDF
|
||||
covering all compliance modules for a project.
|
||||
|
||||
Uses reportlab (same as audit_pdf_generator.py).
|
||||
"""
|
||||
|
||||
import io
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from reportlab.lib import colors
|
||||
from reportlab.lib.pagesizes import A4
|
||||
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
||||
from reportlab.lib.units import mm
|
||||
from reportlab.platypus import (
|
||||
SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak,
|
||||
)
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Colors
|
||||
PURPLE = colors.HexColor("#7c3aed")
|
||||
LIGHT_PURPLE = colors.HexColor("#f5f3ff")
|
||||
GRAY = colors.HexColor("#6b7280")
|
||||
GREEN = colors.HexColor("#16a34a")
|
||||
RED = colors.HexColor("#dc2626")
|
||||
YELLOW = colors.HexColor("#ca8a04")
|
||||
|
||||
|
||||
def _styles():
|
||||
ss = getSampleStyleSheet()
|
||||
ss.add(ParagraphStyle("Title2", parent=ss["Title"], fontSize=24, textColor=PURPLE, spaceAfter=6))
|
||||
ss.add(ParagraphStyle("Section", parent=ss["Heading2"], fontSize=14, textColor=PURPLE, spaceBefore=12, spaceAfter=6))
|
||||
ss.add(ParagraphStyle("Body2", parent=ss["Normal"], fontSize=10, leading=14, spaceAfter=4))
|
||||
ss.add(ParagraphStyle("Small", parent=ss["Normal"], fontSize=8, textColor=GRAY))
|
||||
return ss
|
||||
|
||||
|
||||
class CompliancePDFGenerator:
|
||||
"""Generates a full compliance status report as PDF."""
|
||||
|
||||
def __init__(self, db: Session) -> None:
|
||||
self.db = db
|
||||
|
||||
def generate(self, tenant_id: str, project_id: str | None = None, language: str = "de") -> tuple[bytes, str]:
|
||||
buf = io.BytesIO()
|
||||
doc = SimpleDocTemplate(buf, pagesize=A4, leftMargin=20 * mm, rightMargin=20 * mm, topMargin=25 * mm, bottomMargin=20 * mm)
|
||||
ss = _styles()
|
||||
story: list = []
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
story.append(Paragraph("Compliance-Report", ss["Title2"]))
|
||||
story.append(Paragraph(f"Stand: {now.strftime('%d.%m.%Y %H:%M')} UTC", ss["Small"]))
|
||||
story.append(Spacer(1, 10 * mm))
|
||||
|
||||
# Company Profile
|
||||
self._add_company_section(story, ss, tenant_id, project_id)
|
||||
# TOM
|
||||
self._add_count_section(story, ss, "TOM (Technisch-Organisatorische Massnahmen)",
|
||||
"compliance_toms", tenant_id)
|
||||
# VVT
|
||||
self._add_count_section(story, ss, "VVT (Verarbeitungstaetigkeiten)",
|
||||
"compliance_vvt_activities", tenant_id)
|
||||
# DSFA
|
||||
self._add_count_section(story, ss, "Datenschutz-Folgenabschaetzungen",
|
||||
"compliance_dsfa_assessments", tenant_id)
|
||||
# Risks
|
||||
self._add_risk_section(story, ss, tenant_id)
|
||||
# Vendors
|
||||
self._add_count_section(story, ss, "Auftragsverarbeiter",
|
||||
"compliance_vendor_assessments", tenant_id)
|
||||
# Incidents
|
||||
self._add_count_section(story, ss, "Datenschutz-Vorfaelle",
|
||||
"compliance_notfallplan_incidents", tenant_id)
|
||||
# Document Reviews
|
||||
self._add_review_section(story, ss, tenant_id)
|
||||
# Banner Consents
|
||||
self._add_consent_section(story, ss, tenant_id)
|
||||
# Org Roles
|
||||
self._add_role_section(story, ss, tenant_id, project_id)
|
||||
# Footer
|
||||
story.append(Spacer(1, 15 * mm))
|
||||
story.append(Paragraph("Erstellt mit BreakPilot Compliance SDK", ss["Small"]))
|
||||
|
||||
doc.build(story)
|
||||
filename = f"compliance-report-{now.strftime('%Y%m%d')}.pdf"
|
||||
return buf.getvalue(), filename
|
||||
|
||||
def _add_company_section(self, story, ss, tid, pid):
|
||||
story.append(Paragraph("Unternehmensprofil", ss["Section"]))
|
||||
try:
|
||||
where = "tenant_id = :tid"
|
||||
params: dict[str, Any] = {"tid": tid}
|
||||
if pid:
|
||||
where += " AND project_id = :pid"
|
||||
params["pid"] = pid
|
||||
row = self.db.execute(text(f"SELECT * FROM compliance_company_profiles WHERE {where} LIMIT 1"), params).fetchone()
|
||||
if row:
|
||||
d = dict(row._mapping)
|
||||
data = [
|
||||
["Feld", "Wert"],
|
||||
["Firma", d.get("company_name", "-")],
|
||||
["Branche", d.get("industry", "-")],
|
||||
["Rechtsform", d.get("legal_form", "-")],
|
||||
["Mitarbeiter", str(d.get("employee_count", "-"))],
|
||||
]
|
||||
t = Table(data, colWidths=[60 * mm, 100 * mm])
|
||||
t.setStyle(TableStyle([
|
||||
("BACKGROUND", (0, 0), (-1, 0), LIGHT_PURPLE),
|
||||
("TEXTCOLOR", (0, 0), (-1, 0), PURPLE),
|
||||
("FONTSIZE", (0, 0), (-1, -1), 9),
|
||||
("GRID", (0, 0), (-1, -1), 0.5, colors.lightgrey),
|
||||
("VALIGN", (0, 0), (-1, -1), "TOP"),
|
||||
]))
|
||||
story.append(t)
|
||||
else:
|
||||
story.append(Paragraph("Kein Unternehmensprofil hinterlegt.", ss["Body2"]))
|
||||
except Exception as e:
|
||||
story.append(Paragraph(f"Fehler beim Laden: {e}", ss["Small"]))
|
||||
story.append(Spacer(1, 5 * mm))
|
||||
|
||||
def _add_count_section(self, story, ss, title, table_name, tid):
|
||||
story.append(Paragraph(title, ss["Section"]))
|
||||
try:
|
||||
count = self.db.execute(text(f"SELECT COUNT(*) FROM {table_name} WHERE tenant_id = :tid"), {"tid": tid}).scalar()
|
||||
story.append(Paragraph(f"Eintraege: <b>{count or 0}</b>", ss["Body2"]))
|
||||
except Exception:
|
||||
story.append(Paragraph("Tabelle nicht vorhanden oder leer.", ss["Small"]))
|
||||
story.append(Spacer(1, 3 * mm))
|
||||
|
||||
def _add_risk_section(self, story, ss, tid):
|
||||
story.append(Paragraph("Risikobewertung", ss["Section"]))
|
||||
try:
|
||||
q = text("""
|
||||
SELECT severity, COUNT(*) as cnt FROM compliance_risks
|
||||
WHERE tenant_id = :tid GROUP BY severity ORDER BY severity
|
||||
""")
|
||||
rows = self.db.execute(q, {"tid": tid}).fetchall()
|
||||
if rows:
|
||||
data = [["Schweregrad", "Anzahl"]]
|
||||
for r in rows:
|
||||
data.append([r.severity or "UNKNOWN", str(r.cnt)])
|
||||
t = Table(data, colWidths=[80 * mm, 40 * mm])
|
||||
t.setStyle(TableStyle([
|
||||
("BACKGROUND", (0, 0), (-1, 0), LIGHT_PURPLE),
|
||||
("TEXTCOLOR", (0, 0), (-1, 0), PURPLE),
|
||||
("FONTSIZE", (0, 0), (-1, -1), 9),
|
||||
("GRID", (0, 0), (-1, -1), 0.5, colors.lightgrey),
|
||||
]))
|
||||
story.append(t)
|
||||
else:
|
||||
story.append(Paragraph("Keine Risiken erfasst.", ss["Body2"]))
|
||||
except Exception:
|
||||
story.append(Paragraph("Risiko-Tabelle nicht vorhanden.", ss["Small"]))
|
||||
story.append(Spacer(1, 3 * mm))
|
||||
|
||||
def _add_review_section(self, story, ss, tid):
|
||||
story.append(Paragraph("Dokumenten-Reviews", ss["Section"]))
|
||||
try:
|
||||
q = text("SELECT status, COUNT(*) as cnt FROM compliance_document_reviews WHERE tenant_id = :tid GROUP BY status")
|
||||
rows = self.db.execute(q, {"tid": tid}).fetchall()
|
||||
if rows:
|
||||
data = [["Status", "Anzahl"]]
|
||||
for r in rows:
|
||||
data.append([r.status, str(r.cnt)])
|
||||
t = Table(data, colWidths=[80 * mm, 40 * mm])
|
||||
t.setStyle(TableStyle([
|
||||
("BACKGROUND", (0, 0), (-1, 0), LIGHT_PURPLE),
|
||||
("FONTSIZE", (0, 0), (-1, -1), 9),
|
||||
("GRID", (0, 0), (-1, -1), 0.5, colors.lightgrey),
|
||||
]))
|
||||
story.append(t)
|
||||
else:
|
||||
story.append(Paragraph("Keine Reviews vorhanden.", ss["Body2"]))
|
||||
except Exception:
|
||||
story.append(Paragraph("Review-Tabelle nicht vorhanden.", ss["Small"]))
|
||||
story.append(Spacer(1, 3 * mm))
|
||||
|
||||
def _add_consent_section(self, story, ss, tid):
|
||||
story.append(Paragraph("Banner-Consents", ss["Section"]))
|
||||
try:
|
||||
count = self.db.execute(text("SELECT COUNT(*) FROM compliance_banner_consents WHERE tenant_id = :tid"), {"tid": tid}).scalar()
|
||||
story.append(Paragraph(f"Gesamte Consents: <b>{count or 0}</b>", ss["Body2"]))
|
||||
except Exception:
|
||||
story.append(Paragraph("Banner-Tabelle nicht vorhanden.", ss["Small"]))
|
||||
story.append(Spacer(1, 3 * mm))
|
||||
|
||||
def _add_role_section(self, story, ss, tid, pid):
|
||||
story.append(Paragraph("Rollenkonzept", ss["Section"]))
|
||||
try:
|
||||
where = "tenant_id = :tid"
|
||||
params: dict[str, Any] = {"tid": tid}
|
||||
if pid:
|
||||
where += " AND (project_id = :pid OR project_id IS NULL)"
|
||||
params["pid"] = pid
|
||||
rows = self.db.execute(text(f"SELECT role_key, role_label, person_name, person_email FROM compliance_org_roles WHERE {where} ORDER BY role_key"), params).fetchall()
|
||||
if rows:
|
||||
data = [["Rolle", "Name", "E-Mail"]]
|
||||
for r in rows:
|
||||
data.append([r.role_label or r.role_key, r.person_name or "-", r.person_email or "-"])
|
||||
t = Table(data, colWidths=[60 * mm, 50 * mm, 50 * mm])
|
||||
t.setStyle(TableStyle([
|
||||
("BACKGROUND", (0, 0), (-1, 0), LIGHT_PURPLE),
|
||||
("TEXTCOLOR", (0, 0), (-1, 0), PURPLE),
|
||||
("FONTSIZE", (0, 0), (-1, -1), 9),
|
||||
("GRID", (0, 0), (-1, -1), 0.5, colors.lightgrey),
|
||||
]))
|
||||
story.append(t)
|
||||
else:
|
||||
story.append(Paragraph("Keine Rollen zugewiesen.", ss["Body2"]))
|
||||
except Exception:
|
||||
story.append(Paragraph("Rollen-Tabelle nicht vorhanden.", ss["Small"]))
|
||||
@@ -87,9 +87,10 @@ def compare_services(
|
||||
|
||||
for key, svc in detected_names.items():
|
||||
# Skip CMP — consent managers don't need DSE mention
|
||||
if svc.get("category") == "other" and svc.get("id") == "cmp":
|
||||
if svc.get("category") == "cmp" or (svc.get("category") == "other" and svc.get("id") == "cmp"):
|
||||
continue
|
||||
matched = False
|
||||
# Method 1: Match against LLM-extracted service list
|
||||
for dse_key, dse_svc in dse_names.items():
|
||||
if key == dse_key or _fuzzy_match(svc["name"], dse_svc["name"]):
|
||||
documented.append({"detected": svc, "dse": dse_svc, "status": "ok"})
|
||||
|
||||
@@ -0,0 +1,100 @@
|
||||
"""
|
||||
DSR Art. 11 Service — handles "data subject not identifiable" rejections.
|
||||
|
||||
Art. 11 Abs. 1 DSGVO: If the controller is unable to identify the data
|
||||
subject, it is not obligated to obtain additional information solely to
|
||||
comply with Art. 15-20 requests.
|
||||
|
||||
Common scenario: Website visitor requests access, but only anonymous
|
||||
cookies/IP-hashes are stored — no way to link to a person.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Dict
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from compliance.domain import ValidationError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DSRArt11Service:
|
||||
"""Handles Art. 11 DSGVO rejections for non-identifiable data subjects."""
|
||||
|
||||
def __init__(self, db: Session) -> None:
|
||||
self._db = db
|
||||
|
||||
def reject_not_identifiable(
|
||||
self, dsr_id: str, tenant_id: str, notes: str = "",
|
||||
) -> Dict[str, Any]:
|
||||
"""Reject DSR because data subject cannot be identified."""
|
||||
from compliance.db.dsr_models import DSRRequestDB
|
||||
from compliance.services.dsr_workflow_service import _dsr_to_dict, _record_history
|
||||
|
||||
dsr = (
|
||||
self._db.query(DSRRequestDB)
|
||||
.filter(DSRRequestDB.id == dsr_id, DSRRequestDB.tenant_id == tenant_id)
|
||||
.first()
|
||||
)
|
||||
if not dsr:
|
||||
raise ValidationError("DSR not found")
|
||||
if dsr.status in ("completed", "rejected", "cancelled"):
|
||||
raise ValidationError("DSR already closed")
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
reason = (
|
||||
"Die bei uns gespeicherten Daten (anonymisierte Cookies, IP-Hashes, "
|
||||
"Device-Fingerprints) erlauben keine Identifikation der betroffenen Person. "
|
||||
"Gemaess Art. 11 Abs. 1 DSGVO sind wir nicht verpflichtet, zusaetzliche "
|
||||
"Informationen zu erheben, um die betroffene Person zu identifizieren."
|
||||
)
|
||||
if notes:
|
||||
reason += f" Ergaenzung: {notes}"
|
||||
|
||||
_record_history(self._db, dsr, "rejected",
|
||||
comment="Art. 11 DSGVO — Identifikation nicht moeglich")
|
||||
dsr.status = "rejected"
|
||||
dsr.rejection_reason = reason
|
||||
dsr.rejection_legal_basis = "Art. 11 Abs. 1 DSGVO"
|
||||
dsr.identity_verified = False
|
||||
dsr.verification_method = "art11_not_identifiable"
|
||||
dsr.verification_notes = "Daten erlauben keine Identifikation der betroffenen Person"
|
||||
dsr.completed_at = now
|
||||
dsr.updated_at = now
|
||||
self._db.commit()
|
||||
self._db.refresh(dsr)
|
||||
|
||||
# Send rejection notification
|
||||
self._send_art11_notification(dsr)
|
||||
|
||||
return _dsr_to_dict(dsr)
|
||||
|
||||
def _send_art11_notification(self, dsr: Any) -> None:
|
||||
if not dsr.requester_email:
|
||||
return
|
||||
try:
|
||||
from compliance.services.email_delivery_service import EmailDeliveryService
|
||||
delivery = EmailDeliveryService(self._db)
|
||||
variables = {
|
||||
"requester_name": dsr.requester_name or "Antragsteller/in",
|
||||
"reference_number": dsr.request_number or "",
|
||||
"rejection_reason": "Identifikation nicht moeglich — Art. 11 Abs. 1 DSGVO",
|
||||
"legal_basis": "Art. 11 Abs. 1 DSGVO",
|
||||
"sender_name": "Datenschutzbeauftragter",
|
||||
}
|
||||
# Use published dsr_rejection template, fallback to inline
|
||||
delivery.send(
|
||||
tenant_id=str(dsr.tenant_id),
|
||||
template_type="dsr_rejection",
|
||||
recipient=dsr.requester_email,
|
||||
variables=variables,
|
||||
fallback_subject=f"Zu Ihrer Anfrage {dsr.request_number} — Art. 11 DSGVO",
|
||||
fallback_html=f"""<p>Sehr geehrte/r {dsr.requester_name or 'Antragsteller/in'},</p>
|
||||
<p>wir koennen die bei uns gespeicherten Daten keiner identifizierbaren Person zuordnen.
|
||||
Gemaess Art. 11 Abs. 1 DSGVO ist eine Auskunftserteilung nicht moeglich.</p>
|
||||
<p>Mit freundlichen Gruessen<br/>Datenschutzbeauftragter</p>""",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("Art. 11 notification failed: %s", e)
|
||||
@@ -0,0 +1,273 @@
|
||||
"""
|
||||
DSR User Data Export Service — aggregates all CMP data about a user.
|
||||
|
||||
Supports Art. 15 (access right, PDF) and Art. 20 (data portability, JSON/CSV).
|
||||
Collects from: Banner Consents, Einwilligungen, Consent Audit Trail, DSR History.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Optional
|
||||
|
||||
from reportlab.lib import colors
|
||||
from reportlab.lib.pagesizes import A4
|
||||
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
||||
from reportlab.lib.units import mm
|
||||
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
|
||||
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from compliance.services.banner_dsr_service import BannerDSRService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
PURPLE = colors.HexColor("#7c3aed")
|
||||
LIGHT_PURPLE = colors.HexColor("#f5f3ff")
|
||||
GRAY = colors.HexColor("#6b7280")
|
||||
|
||||
|
||||
class DSRExportService:
|
||||
"""Aggregates and exports all user data stored in the CMP."""
|
||||
|
||||
def __init__(self, db: Session) -> None:
|
||||
self.db = db
|
||||
|
||||
def aggregate_user_data(self, tenant_id: str, email: str) -> dict[str, Any]:
|
||||
"""Collect ALL data about a user from all CMP sources."""
|
||||
now = datetime.now(timezone.utc)
|
||||
tid = tenant_id # Keep as string — let PostgreSQL cast
|
||||
|
||||
# 1. Banner consents + audit trail
|
||||
banner_data: dict[str, Any] = {"banner_consents": [], "audit_trail": []}
|
||||
try:
|
||||
banner_svc = BannerDSRService(self.db)
|
||||
banner_data = banner_svc.export_for_dsr(tenant_id, email)
|
||||
except Exception as e:
|
||||
logger.warning("Banner DSR export failed: %s", e)
|
||||
try:
|
||||
self.db.rollback()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 2. Einwilligungen (user-based consents)
|
||||
einwilligungen: list[dict] = []
|
||||
try:
|
||||
q = text("""
|
||||
SELECT c.id, c.data_point_id, c.granted, c.granted_at, c.revoked_at,
|
||||
c.consent_version, c.source, c.ip_address, c.user_agent, c.created_at
|
||||
FROM compliance_einwilligungen_consents c
|
||||
WHERE c.tenant_id = CAST(:tid AS VARCHAR) AND c.user_id = :email
|
||||
ORDER BY c.created_at DESC
|
||||
""")
|
||||
rows = self.db.execute(q, {"tid": tid, "email": email}).fetchall()
|
||||
for r in rows:
|
||||
entry = dict(r._mapping)
|
||||
for k, v in entry.items():
|
||||
if isinstance(v, datetime):
|
||||
entry[k] = v.isoformat()
|
||||
elif isinstance(v, uuid.UUID):
|
||||
entry[k] = str(v)
|
||||
# Get history
|
||||
hist_q = text("""
|
||||
SELECT action, consent_version, ip_address, user_agent, source, created_at
|
||||
FROM compliance_einwilligungen_consent_history
|
||||
WHERE consent_id = :cid ORDER BY created_at
|
||||
""")
|
||||
hist = self.db.execute(hist_q, {"cid": entry["id"]}).fetchall()
|
||||
entry["history"] = [
|
||||
{k: (v.isoformat() if isinstance(v, datetime) else str(v) if isinstance(v, uuid.UUID) else v)
|
||||
for k, v in dict(h._mapping).items()}
|
||||
for h in hist
|
||||
]
|
||||
einwilligungen.append(entry)
|
||||
except Exception as e:
|
||||
logger.warning("Einwilligungen export failed: %s", e)
|
||||
try:
|
||||
self.db.rollback()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 3. DSR requests by this user
|
||||
dsr_requests: list[dict] = []
|
||||
try:
|
||||
q = text("""
|
||||
SELECT id, request_number, request_type, status, received_at, deadline_at, completed_at
|
||||
FROM compliance_dsr_requests
|
||||
WHERE tenant_id = :tid AND requester_email = :email
|
||||
ORDER BY received_at DESC
|
||||
""")
|
||||
rows = self.db.execute(q, {"tid": tid, "email": email}).fetchall()
|
||||
for r in rows:
|
||||
entry = dict(r._mapping)
|
||||
for k, v in entry.items():
|
||||
if isinstance(v, datetime):
|
||||
entry[k] = v.isoformat()
|
||||
elif isinstance(v, uuid.UUID):
|
||||
entry[k] = str(v)
|
||||
dsr_requests.append(entry)
|
||||
except Exception as e:
|
||||
logger.warning("DSR requests export failed: %s", e)
|
||||
try:
|
||||
self.db.rollback()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return {
|
||||
"export_date": now.isoformat(),
|
||||
"data_subject": {"email": email},
|
||||
"banner_consents": banner_data.get("banner_consents", []),
|
||||
"consent_audit_trail": banner_data.get("audit_trail", []),
|
||||
"einwilligungen": einwilligungen,
|
||||
"dsr_requests": dsr_requests,
|
||||
"metadata": {
|
||||
"tenant_id": tenant_id,
|
||||
"data_categories": ["Banner-Consents", "Einwilligungen", "Audit-Trail", "DSR-Anfragen"],
|
||||
"legal_basis": "Art. 15 / Art. 20 DSGVO",
|
||||
},
|
||||
}
|
||||
|
||||
def export_json(self, tenant_id: str, email: str) -> tuple[bytes, str]:
|
||||
data = self.aggregate_user_data(tenant_id, email)
|
||||
data["metadata"]["export_format"] = "json"
|
||||
content = json.dumps(data, indent=2, ensure_ascii=False, default=str).encode("utf-8")
|
||||
return content, f"dsr-export-{email.split('@')[0]}.json"
|
||||
|
||||
def export_csv(self, tenant_id: str, email: str) -> tuple[bytes, str]:
|
||||
data = self.aggregate_user_data(tenant_id, email)
|
||||
buf = io.StringIO()
|
||||
writer = csv.writer(buf)
|
||||
writer.writerow(["Kategorie", "Schluessel", "Wert", "Zeitpunkt", "Quelle"])
|
||||
|
||||
# Banner consents
|
||||
for c in data.get("banner_consents", []):
|
||||
writer.writerow(["Banner-Consent", "site_id", c.get("site_id", ""), c.get("created_at", ""), "CMP"])
|
||||
writer.writerow(["Banner-Consent", "categories", ", ".join(c.get("categories", [])), c.get("updated_at", ""), "CMP"])
|
||||
writer.writerow(["Banner-Consent", "ip_hash", c.get("ip_hash", ""), c.get("created_at", ""), "CMP"])
|
||||
|
||||
# Audit trail
|
||||
for a in data.get("consent_audit_trail", []):
|
||||
writer.writerow(["Audit-Trail", a.get("action", ""), ", ".join(a.get("categories", [])), a.get("created_at", ""), "CMP"])
|
||||
|
||||
# Einwilligungen
|
||||
for e in data.get("einwilligungen", []):
|
||||
status = "Erteilt" if e.get("granted") else "Widerrufen"
|
||||
writer.writerow(["Einwilligung", e.get("data_point_id", ""), status, e.get("granted_at", ""), e.get("source", "")])
|
||||
|
||||
# DSR requests
|
||||
for d in data.get("dsr_requests", []):
|
||||
writer.writerow(["DSR-Anfrage", d.get("request_type", ""), d.get("status", ""), d.get("received_at", ""), ""])
|
||||
|
||||
content = buf.getvalue().encode("utf-8-sig") # BOM for Excel
|
||||
return content, f"dsr-export-{email.split('@')[0]}.csv"
|
||||
|
||||
def export_pdf(self, tenant_id: str, email: str) -> tuple[bytes, str]:
|
||||
data = self.aggregate_user_data(tenant_id, email)
|
||||
buf = io.BytesIO()
|
||||
doc = SimpleDocTemplate(buf, pagesize=A4, leftMargin=20 * mm, rightMargin=20 * mm, topMargin=25 * mm, bottomMargin=20 * mm)
|
||||
ss = getSampleStyleSheet()
|
||||
ss.add(ParagraphStyle("Title2", parent=ss["Title"], fontSize=20, textColor=PURPLE, spaceAfter=6))
|
||||
ss.add(ParagraphStyle("Section", parent=ss["Heading2"], fontSize=13, textColor=PURPLE, spaceBefore=10))
|
||||
ss.add(ParagraphStyle("Body2", parent=ss["Normal"], fontSize=9, leading=13))
|
||||
ss.add(ParagraphStyle("Small", parent=ss["Normal"], fontSize=8, textColor=GRAY))
|
||||
story: list = []
|
||||
|
||||
# Cover
|
||||
story.append(Paragraph("Datenauskunft gemaess Art. 15 DSGVO", ss["Title2"]))
|
||||
story.append(Paragraph(f"Betroffene Person: {email}", ss["Body2"]))
|
||||
story.append(Paragraph(f"Erstellt am: {data['export_date'][:10]}", ss["Small"]))
|
||||
story.append(Spacer(1, 8 * mm))
|
||||
|
||||
tbl_style = TableStyle([
|
||||
("BACKGROUND", (0, 0), (-1, 0), LIGHT_PURPLE),
|
||||
("TEXTCOLOR", (0, 0), (-1, 0), PURPLE),
|
||||
("FONTSIZE", (0, 0), (-1, -1), 8),
|
||||
("GRID", (0, 0), (-1, -1), 0.5, colors.lightgrey),
|
||||
("VALIGN", (0, 0), (-1, -1), "TOP"),
|
||||
("TOPPADDING", (0, 0), (-1, -1), 3),
|
||||
("BOTTOMPADDING", (0, 0), (-1, -1), 3),
|
||||
])
|
||||
|
||||
# Section 1: Banner Consents
|
||||
consents = data.get("banner_consents", [])
|
||||
story.append(Paragraph(f"1. Banner-Consents ({len(consents)})", ss["Section"]))
|
||||
if consents:
|
||||
rows = [["Site", "Kategorien", "IP-Hash", "Erstellt", "Aktualisiert"]]
|
||||
for c in consents:
|
||||
rows.append([
|
||||
str(c.get("site_id", "")),
|
||||
", ".join(c.get("categories", [])),
|
||||
str(c.get("ip_hash", ""))[:12] + "...",
|
||||
str(c.get("created_at", ""))[:10],
|
||||
str(c.get("updated_at", ""))[:10],
|
||||
])
|
||||
t = Table(rows, colWidths=[30 * mm, 40 * mm, 30 * mm, 25 * mm, 25 * mm])
|
||||
t.setStyle(tbl_style)
|
||||
story.append(t)
|
||||
else:
|
||||
story.append(Paragraph("Keine Banner-Consents gespeichert.", ss["Body2"]))
|
||||
|
||||
# Section 2: Einwilligungen
|
||||
einw = data.get("einwilligungen", [])
|
||||
story.append(Paragraph(f"2. Einwilligungen ({len(einw)})", ss["Section"]))
|
||||
if einw:
|
||||
rows = [["Datenpunkt", "Status", "Erteilt am", "Widerrufen am", "IP-Adresse"]]
|
||||
for e in einw:
|
||||
rows.append([
|
||||
str(e.get("data_point_id", "")),
|
||||
"Erteilt" if e.get("granted") else "Widerrufen",
|
||||
str(e.get("granted_at", ""))[:10],
|
||||
str(e.get("revoked_at", ""))[:10] if e.get("revoked_at") else "-",
|
||||
str(e.get("ip_address", ""))[:15] if e.get("ip_address") else "-",
|
||||
])
|
||||
t = Table(rows, colWidths=[35 * mm, 25 * mm, 25 * mm, 25 * mm, 35 * mm])
|
||||
t.setStyle(tbl_style)
|
||||
story.append(t)
|
||||
else:
|
||||
story.append(Paragraph("Keine Einwilligungen gespeichert.", ss["Body2"]))
|
||||
|
||||
# Section 3: Audit Trail
|
||||
trail = data.get("consent_audit_trail", [])
|
||||
story.append(Paragraph(f"3. Consent-Audit-Trail ({len(trail)})", ss["Section"]))
|
||||
if trail:
|
||||
rows = [["Aktion", "Kategorien", "Datum"]]
|
||||
for a in trail[:50]: # Limit to 50 for PDF
|
||||
rows.append([
|
||||
str(a.get("action", "")),
|
||||
", ".join(a.get("categories", [])),
|
||||
str(a.get("created_at", ""))[:19],
|
||||
])
|
||||
t = Table(rows, colWidths=[40 * mm, 60 * mm, 45 * mm])
|
||||
t.setStyle(tbl_style)
|
||||
story.append(t)
|
||||
if len(trail) > 50:
|
||||
story.append(Paragraph(f"... und {len(trail) - 50} weitere Eintraege (im JSON-Export enthalten)", ss["Small"]))
|
||||
else:
|
||||
story.append(Paragraph("Kein Audit-Trail vorhanden.", ss["Body2"]))
|
||||
|
||||
# Section 4: DSR Requests
|
||||
dsrs = data.get("dsr_requests", [])
|
||||
story.append(Paragraph(f"4. Bisherige DSR-Anfragen ({len(dsrs)})", ss["Section"]))
|
||||
if dsrs:
|
||||
rows = [["Typ", "Status", "Eingegangen", "Abgeschlossen"]]
|
||||
for d in dsrs:
|
||||
rows.append([
|
||||
str(d.get("request_type", "")),
|
||||
str(d.get("status", "")),
|
||||
str(d.get("received_at", ""))[:10],
|
||||
str(d.get("completed_at", ""))[:10] if d.get("completed_at") else "-",
|
||||
])
|
||||
t = Table(rows, colWidths=[35 * mm, 30 * mm, 35 * mm, 35 * mm])
|
||||
t.setStyle(tbl_style)
|
||||
story.append(t)
|
||||
|
||||
# Footer
|
||||
story.append(Spacer(1, 15 * mm))
|
||||
story.append(Paragraph("Erstellt mit BreakPilot Compliance SDK | Art. 15 DSGVO Datenauskunft", ss["Small"]))
|
||||
|
||||
doc.build(story)
|
||||
return buf.getvalue(), f"dsr-export-{email.split('@')[0]}.pdf"
|
||||
@@ -0,0 +1,122 @@
|
||||
"""
|
||||
Email Template Delivery Service — the missing integration layer.
|
||||
|
||||
Combines: template loading → published version → variable rendering → SMTP → audit log.
|
||||
Used by DSR workflow, document reviews, and other modules that need to send
|
||||
templated emails.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
from typing import Any, Optional
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from compliance.db.email_template_models import (
|
||||
EmailSendLogDB,
|
||||
EmailTemplateDB,
|
||||
EmailTemplateVersionDB,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _render(html: str, variables: dict[str, str]) -> str:
|
||||
"""Replace {{variable}} placeholders with values."""
|
||||
result = html
|
||||
for key, value in variables.items():
|
||||
result = result.replace(f"{{{{{key}}}}}", str(value))
|
||||
return result
|
||||
|
||||
|
||||
class EmailDeliveryService:
|
||||
"""Load template → render → send via SMTP → log."""
|
||||
|
||||
def __init__(self, db: Session) -> None:
|
||||
self.db = db
|
||||
|
||||
def get_published_version(
|
||||
self, tenant_id: str, template_type: str,
|
||||
) -> Optional[EmailTemplateVersionDB]:
|
||||
"""Get the latest published version of a template by type."""
|
||||
tid = uuid.UUID(tenant_id)
|
||||
template = (
|
||||
self.db.query(EmailTemplateDB)
|
||||
.filter(EmailTemplateDB.tenant_id == tid, EmailTemplateDB.template_type == template_type)
|
||||
.first()
|
||||
)
|
||||
if not template:
|
||||
return None
|
||||
return (
|
||||
self.db.query(EmailTemplateVersionDB)
|
||||
.filter(
|
||||
EmailTemplateVersionDB.template_id == template.id,
|
||||
EmailTemplateVersionDB.status == "published",
|
||||
)
|
||||
.order_by(EmailTemplateVersionDB.created_at.desc())
|
||||
.first()
|
||||
)
|
||||
|
||||
def send(
|
||||
self,
|
||||
tenant_id: str,
|
||||
template_type: str,
|
||||
recipient: str,
|
||||
variables: dict[str, str],
|
||||
fallback_subject: Optional[str] = None,
|
||||
fallback_html: Optional[str] = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Send a templated email. Falls back to inline HTML if no published template.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant UUID string.
|
||||
template_type: E.g. 'dsr_receipt', 'dsr_completion'.
|
||||
recipient: Email address.
|
||||
variables: Dict of {{key}}: value for rendering.
|
||||
fallback_subject: Subject if no template found.
|
||||
fallback_html: HTML body if no template found.
|
||||
"""
|
||||
from compliance.services.smtp_sender import send_email
|
||||
|
||||
tid = uuid.UUID(tenant_id)
|
||||
version = self.get_published_version(tenant_id, template_type)
|
||||
|
||||
if version:
|
||||
subject = _render(version.subject, variables)
|
||||
body_html = _render(version.body_html, variables)
|
||||
version_id = version.id
|
||||
elif fallback_subject and fallback_html:
|
||||
subject = _render(fallback_subject, variables)
|
||||
body_html = _render(fallback_html, variables)
|
||||
version_id = None
|
||||
else:
|
||||
logger.warning("No published template for '%s' and no fallback provided", template_type)
|
||||
return {"success": False, "error": f"No template for {template_type}"}
|
||||
|
||||
result = send_email(recipient=recipient, subject=subject, body_html=body_html)
|
||||
|
||||
# Audit log
|
||||
try:
|
||||
log = EmailSendLogDB(
|
||||
tenant_id=tid,
|
||||
template_type=template_type,
|
||||
version_id=version_id,
|
||||
recipient=recipient,
|
||||
subject=subject,
|
||||
status=result.get("status", "unknown"),
|
||||
variables=variables,
|
||||
error_message=result.get("error"),
|
||||
)
|
||||
self.db.add(log)
|
||||
self.db.commit()
|
||||
except Exception as e:
|
||||
logger.warning("Failed to log email send: %s", e)
|
||||
|
||||
return {
|
||||
"success": result.get("status") == "sent",
|
||||
"template_type": template_type,
|
||||
"recipient": recipient,
|
||||
"subject": subject,
|
||||
"used_template": version is not None,
|
||||
"status": result.get("status"),
|
||||
}
|
||||
@@ -0,0 +1,179 @@
|
||||
"""
|
||||
Intake Extractor — derives UCCA intake flags from DETECTED SERVICES,
|
||||
not from website text content.
|
||||
|
||||
The actual data processing happens through APIs, scripts, and cookies —
|
||||
NOT through visible text on the page. A news website reporting about
|
||||
healthcare does NOT process health data.
|
||||
|
||||
Flags are derived deterministically from:
|
||||
1. Which third-party services are embedded (Google Analytics → tracking)
|
||||
2. Which payment providers are used (Stripe → payment_data)
|
||||
3. Which CDN/fonts are loaded (Google Fonts → cross_border_transfer)
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Service category → intake flags mapping
|
||||
# This is the ONLY source of truth for what a service implies
|
||||
SERVICE_TO_FLAGS: dict[str, dict[str, bool]] = {
|
||||
# Tracking & Analytics → personal_data + tracking
|
||||
"tracking": {
|
||||
"personal_data": True,
|
||||
"tracking": True,
|
||||
},
|
||||
# Marketing → marketing + tracking + third_party_sharing
|
||||
"marketing": {
|
||||
"personal_data": True,
|
||||
"tracking": True,
|
||||
"marketing": True,
|
||||
"third_party_sharing": True,
|
||||
},
|
||||
# Heatmap/Session Recording → tracking + profiling
|
||||
"heatmap": {
|
||||
"personal_data": True,
|
||||
"tracking": True,
|
||||
"profiling": True,
|
||||
},
|
||||
# Payment → payment_data
|
||||
"payment": {
|
||||
"personal_data": True,
|
||||
"payment_data": True,
|
||||
},
|
||||
# Chatbot → personal_data (user sends messages)
|
||||
"chatbot": {
|
||||
"personal_data": True,
|
||||
"customer_data": True,
|
||||
},
|
||||
# CRM → customer_data + profiling
|
||||
"crm": {
|
||||
"personal_data": True,
|
||||
"customer_data": True,
|
||||
"profiling": True,
|
||||
},
|
||||
# CDN from non-EU → cross_border_transfer (IP sent to US)
|
||||
"cdn": {
|
||||
"personal_data": True,
|
||||
},
|
||||
}
|
||||
|
||||
# Specific services with special flags
|
||||
SPECIFIC_SERVICE_FLAGS: dict[str, dict[str, bool]] = {
|
||||
"klarna": {"automated_decisions": True, "payment_data": True},
|
||||
"paypal": {"cross_border_transfer": True, "payment_data": True},
|
||||
"stripe": {"cross_border_transfer": True, "payment_data": True},
|
||||
"google_analytics": {"cross_border_transfer": True, "tracking": True},
|
||||
"facebook_pixel": {"cross_border_transfer": True, "marketing": True, "profiling": True},
|
||||
"hotjar": {"profiling": True, "tracking": True},
|
||||
"ms_clarity": {"cross_border_transfer": True, "profiling": True},
|
||||
"tiktok_pixel": {"cross_border_transfer": True, "marketing": True},
|
||||
"intercom": {"cross_border_transfer": True, "ai_usage": True},
|
||||
}
|
||||
|
||||
|
||||
def extract_intake_flags_from_services(detected_services: list[dict]) -> dict:
|
||||
"""Derive intake flags from detected third-party services.
|
||||
|
||||
This is deterministic and 100% accurate — if Google Analytics is
|
||||
embedded, tracking IS happening. No guessing needed.
|
||||
"""
|
||||
flags = {
|
||||
"personal_data": False,
|
||||
"customer_data": False,
|
||||
"payment_data": False,
|
||||
"location_data": False,
|
||||
"biometric_data": False,
|
||||
"minor_data": False,
|
||||
"health_data": False,
|
||||
"marketing": False,
|
||||
"profiling": False,
|
||||
"automated_decisions": False,
|
||||
"third_party_sharing": False,
|
||||
"cross_border_transfer": False,
|
||||
"tracking": False,
|
||||
"ai_usage": False,
|
||||
}
|
||||
|
||||
for svc in detected_services:
|
||||
category = svc.get("category", "other")
|
||||
service_id = svc.get("id", "")
|
||||
eu_adequate = svc.get("eu_adequate", True)
|
||||
|
||||
# Apply category-level flags
|
||||
cat_flags = SERVICE_TO_FLAGS.get(category, {})
|
||||
for key, value in cat_flags.items():
|
||||
if value:
|
||||
flags[key] = True
|
||||
|
||||
# Apply service-specific flags
|
||||
svc_flags = SPECIFIC_SERVICE_FLAGS.get(service_id, {})
|
||||
for key, value in svc_flags.items():
|
||||
if value:
|
||||
flags[key] = True
|
||||
|
||||
# Non-EU service → cross_border_transfer
|
||||
if not eu_adequate:
|
||||
flags["cross_border_transfer"] = True
|
||||
flags["third_party_sharing"] = True
|
||||
|
||||
# Any website with detected services processes personal data (IP at minimum)
|
||||
if detected_services:
|
||||
flags["personal_data"] = True
|
||||
|
||||
active = {k: v for k, v in flags.items() if v}
|
||||
logger.info("Intake flags from %d services: %s", len(detected_services), active)
|
||||
return flags
|
||||
|
||||
|
||||
# Keep backward compatibility
|
||||
async def extract_intake_flags(text: str) -> dict:
|
||||
"""DEPRECATED — use extract_intake_flags_from_services() instead.
|
||||
|
||||
This function used LLM to guess flags from text content.
|
||||
Text content does NOT represent actual data processing.
|
||||
"""
|
||||
logger.warning(
|
||||
"extract_intake_flags(text) called — DEPRECATED. "
|
||||
"Use extract_intake_flags_from_services(detected_services) instead."
|
||||
)
|
||||
# Return minimal flags — website exists = personal_data (IP)
|
||||
return {"personal_data": True, "tracking": False}
|
||||
|
||||
|
||||
def flags_to_ucca_intake(flags: dict) -> dict:
|
||||
"""Convert extracted flags to UCCA intake format."""
|
||||
return {
|
||||
"data_types": {
|
||||
"personal_data": flags.get("personal_data", False),
|
||||
"customer_data": flags.get("customer_data", False),
|
||||
"location_data": flags.get("location_data", False),
|
||||
"biometric_data": flags.get("biometric_data", False),
|
||||
"minor_data": flags.get("minor_data", False),
|
||||
"images": False,
|
||||
"audio": False,
|
||||
"financial_data": flags.get("payment_data", False),
|
||||
"employee_data": False,
|
||||
"article_9_data": flags.get("health_data", False) or flags.get("biometric_data", False),
|
||||
},
|
||||
"purpose": {
|
||||
"marketing": flags.get("marketing", False),
|
||||
"analytics": flags.get("tracking", False),
|
||||
"profiling": flags.get("profiling", False),
|
||||
"automation": flags.get("ai_usage", False),
|
||||
"customer_support": False,
|
||||
"evaluation_scoring": flags.get("automated_decisions", False),
|
||||
"decision_making": flags.get("automated_decisions", False),
|
||||
},
|
||||
"automation": "fully_automated" if flags.get("automated_decisions") else
|
||||
"partially_automated" if flags.get("ai_usage") else "manual",
|
||||
"outputs": {
|
||||
"recommendations_to_users": flags.get("profiling", False),
|
||||
"data_export": flags.get("cross_border_transfer", False),
|
||||
"legal_effects": flags.get("automated_decisions", False),
|
||||
},
|
||||
"hosting": {
|
||||
"region": "non_eu" if flags.get("cross_border_transfer") else "eu",
|
||||
},
|
||||
}
|
||||
@@ -0,0 +1,152 @@
|
||||
"""
|
||||
Control Relevance Filter — filters out controls that are not relevant
|
||||
for the analyzed document based on keyword matching.
|
||||
|
||||
Prevents false positives like C_TRANSPARENCY being recommended when
|
||||
no AI usage is evident.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Top controls with their relevance conditions.
|
||||
# A control is only relevant if ANY keyword from 'requires_any' matches the text.
|
||||
# If 'requires_any' is empty, the control is always relevant.
|
||||
CONTROL_RELEVANCE: dict[str, dict] = {
|
||||
"C_TRANSPARENCY": {
|
||||
"description": "KI-Transparenz-Hinweis (Art. 52 AI Act)",
|
||||
"requires_any": [
|
||||
"künstliche intelligenz", "kuenstliche intelligenz",
|
||||
"artificial intelligence", "machine learning", "maschinelles lernen",
|
||||
"ki-gestützt", "ki-gestuetzt", "ai-powered", "ai system",
|
||||
"chatbot", "neural", "deep learning", "algorithmus", "algorithmen",
|
||||
"automatisierte entscheidung", "automated decision",
|
||||
],
|
||||
"reason": "Nur relevant wenn KI/ML tatsaechlich eingesetzt wird",
|
||||
},
|
||||
"C_DSFA_REQUIRED": {
|
||||
"description": "Datenschutz-Folgenabschaetzung durchfuehren",
|
||||
"requires_any": [
|
||||
"gesundheit", "biometrisch", "genetisch", "health", "biometric",
|
||||
"scoring", "profiling", "systematisch", "umfangreich",
|
||||
"videoüberwachung", "videoueberwachung", "kamera",
|
||||
"minderjährig", "minderjaehrig", "kinder",
|
||||
],
|
||||
"reason": "Nur bei hohem Risiko (Art. 9 Daten, Profiling, Ueberwachung)",
|
||||
},
|
||||
"C_ART22_INFO": {
|
||||
"description": "Info ueber automatisierte Einzelentscheidung (Art. 22 DSGVO)",
|
||||
"requires_any": [
|
||||
"automatisierte entscheidung", "automated decision", "scoring",
|
||||
"bonitaet", "kredit", "rating", "algorithmische entscheidung",
|
||||
"profiling", "klarna", "ratenzahlung",
|
||||
],
|
||||
"reason": "Nur bei automatisierten Einzelentscheidungen mit Rechtswirkung",
|
||||
},
|
||||
"C_DPO_REQUIRED": {
|
||||
"description": "Datenschutzbeauftragten bestellen",
|
||||
"requires_any": [], # Always relevant — empty means no filter
|
||||
"reason": "Generell relevant fuer Unternehmen",
|
||||
},
|
||||
"C_EXPLICIT_CONSENT": {
|
||||
"description": "Explizite Einwilligung einholen",
|
||||
"requires_any": [
|
||||
"cookie", "tracking", "analytics", "pixel", "marketing",
|
||||
"werbung", "newsletter", "remarketing", "retargeting",
|
||||
"einwilligung", "consent", "opt-in",
|
||||
],
|
||||
"reason": "Nur bei Tracking/Marketing das Einwilligung erfordert",
|
||||
},
|
||||
"C_CHILD_PROTECTION": {
|
||||
"description": "Besonderer Schutz fuer Minderdjaehrige",
|
||||
"requires_any": [
|
||||
"kinder", "minderjährig", "minderjaehrig", "jugend",
|
||||
"under 16", "unter 16", "schüler", "schueler", "child",
|
||||
],
|
||||
"reason": "Nur wenn Daten von Minderjaehrigen verarbeitet werden",
|
||||
},
|
||||
"C_THIRD_COUNTRY_SAFEGUARDS": {
|
||||
"description": "Drittlandtransfer absichern (Art. 44-49 DSGVO)",
|
||||
"requires_any": [
|
||||
"usa", "united states", "drittland", "drittst", "third countr",
|
||||
"standardvertragsklausel", "sccs", "binding corporate",
|
||||
"angemessenheitsbeschluss", "adequacy",
|
||||
"google", "meta", "facebook", "amazon", "microsoft", "apple",
|
||||
"cloudflare", "stripe", "paypal",
|
||||
],
|
||||
"reason": "Nur bei Datentransfer in Drittlaender",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def filter_controls(
|
||||
controls: list[str],
|
||||
source_text: str,
|
||||
intake_flags: dict | None = None,
|
||||
) -> list[str]:
|
||||
"""Filter controls based on relevance to the analyzed text.
|
||||
|
||||
Returns only controls that are relevant (keyword match or no filter defined).
|
||||
"""
|
||||
if not controls:
|
||||
return controls
|
||||
|
||||
text_lower = source_text.lower()
|
||||
filtered = []
|
||||
removed = []
|
||||
|
||||
for control in controls:
|
||||
# Extract control ID from string like "[C_TRANSPARENCY] Nutzer informieren..."
|
||||
control_id = _extract_control_id(control)
|
||||
|
||||
if control_id and control_id in CONTROL_RELEVANCE:
|
||||
rules = CONTROL_RELEVANCE[control_id]
|
||||
keywords = rules["requires_any"]
|
||||
|
||||
if not keywords:
|
||||
# No filter = always relevant
|
||||
filtered.append(control)
|
||||
continue
|
||||
|
||||
# Check if any keyword matches
|
||||
if any(kw in text_lower for kw in keywords):
|
||||
filtered.append(control)
|
||||
else:
|
||||
# Also check intake flags as fallback
|
||||
if intake_flags and _check_flags(control_id, intake_flags):
|
||||
filtered.append(control)
|
||||
else:
|
||||
removed.append((control_id, rules["reason"]))
|
||||
else:
|
||||
# Unknown control — keep it (don't filter what we don't understand)
|
||||
filtered.append(control)
|
||||
|
||||
if removed:
|
||||
logger.info(
|
||||
"Relevance filter removed %d controls: %s",
|
||||
len(removed),
|
||||
", ".join(f"{cid} ({reason})" for cid, reason in removed),
|
||||
)
|
||||
|
||||
return filtered
|
||||
|
||||
|
||||
def _extract_control_id(control: str) -> str | None:
|
||||
"""Extract control ID from '[C_XXX] description' format."""
|
||||
match = re.match(r"\[([A-Z_0-9]+)\]", control)
|
||||
return match.group(1) if match else None
|
||||
|
||||
|
||||
def _check_flags(control_id: str, flags: dict) -> bool:
|
||||
"""Check if intake flags make a control relevant."""
|
||||
flag_map = {
|
||||
"C_TRANSPARENCY": flags.get("ai_usage", False),
|
||||
"C_DSFA_REQUIRED": flags.get("health_data", False) or flags.get("biometric_data", False),
|
||||
"C_ART22_INFO": flags.get("automated_decisions", False),
|
||||
"C_EXPLICIT_CONSENT": flags.get("tracking", False) or flags.get("marketing", False),
|
||||
"C_CHILD_PROTECTION": flags.get("minor_data", False),
|
||||
"C_THIRD_COUNTRY_SAFEGUARDS": flags.get("cross_border_transfer", False),
|
||||
}
|
||||
return flag_map.get(control_id, False)
|
||||
@@ -0,0 +1,209 @@
|
||||
"""
|
||||
TCF 2.2 TC String Encoder — generates IAB Transparency & Consent strings.
|
||||
|
||||
Implements the TC String v2.2 format per IAB specification.
|
||||
The TC String is a base64url-encoded bitfield containing:
|
||||
- CMP metadata (ID, version, screen, consent language)
|
||||
- Purpose consents (12 standard IAB purposes)
|
||||
- Vendor consents (per IAB vendor ID)
|
||||
- Legitimate interest signals
|
||||
|
||||
Reference: https://github.com/InteractiveAdvertisingBureau/GDPR-Transparency-and-Consent-Framework
|
||||
|
||||
NOTE: This is a simplified encoder for CMP integration. For full GVL
|
||||
(Global Vendor List) support, integrate with the IAB GVL API.
|
||||
"""
|
||||
|
||||
import base64
|
||||
import math
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
|
||||
# IAB TCF 2.2 Standard Purposes
|
||||
IAB_PURPOSES = {
|
||||
1: {"name": "Store and/or access information on a device", "name_de": "Informationen auf Geraet speichern/abrufen"},
|
||||
2: {"name": "Select basic ads", "name_de": "Einfache Anzeigen auswaehlen"},
|
||||
3: {"name": "Create a personalised ads profile", "name_de": "Personalisiertes Anzeigenprofil erstellen"},
|
||||
4: {"name": "Select personalised ads", "name_de": "Personalisierte Anzeigen auswaehlen"},
|
||||
5: {"name": "Create a personalised content profile", "name_de": "Personalisiertes Inhaltsprofil erstellen"},
|
||||
6: {"name": "Select personalised content", "name_de": "Personalisierte Inhalte auswaehlen"},
|
||||
7: {"name": "Measure ad performance", "name_de": "Anzeigen-Leistung messen"},
|
||||
8: {"name": "Measure content performance", "name_de": "Inhalte-Leistung messen"},
|
||||
9: {"name": "Apply market research to generate audience insights", "name_de": "Marktforschung fuer Zielgruppen"},
|
||||
10: {"name": "Develop and improve products", "name_de": "Produkte entwickeln und verbessern"},
|
||||
11: {"name": "Use limited data to select content", "name_de": "Eingeschraenkte Daten fuer Inhalte nutzen"},
|
||||
12: {"name": "Use limited data to select ads", "name_de": "Eingeschraenkte Daten fuer Anzeigen nutzen"},
|
||||
}
|
||||
|
||||
# IAB Special Features
|
||||
IAB_SPECIAL_FEATURES = {
|
||||
1: {"name": "Use precise geolocation data", "name_de": "Praezise Standortdaten verwenden"},
|
||||
2: {"name": "Actively scan device characteristics for identification", "name_de": "Geraetemerkmale aktiv scannen"},
|
||||
}
|
||||
|
||||
# Category-to-Purpose mapping (how our banner categories map to IAB purposes)
|
||||
CATEGORY_PURPOSE_MAP = {
|
||||
"necessary": [], # No consent needed
|
||||
"functional": [1, 11], # Device access + limited data for content
|
||||
"statistics": [1, 7, 8, 9, 10], # Device access + measurement + research
|
||||
"marketing": [1, 2, 3, 4, 5, 6, 7, 12], # Most purposes
|
||||
}
|
||||
|
||||
|
||||
def _int_to_bits(value: int, length: int) -> str:
|
||||
"""Convert integer to fixed-length bit string."""
|
||||
return bin(value)[2:].zfill(length)
|
||||
|
||||
|
||||
def _datetime_to_deciseconds(dt: datetime) -> int:
|
||||
"""Convert datetime to deciseconds since epoch (IAB format)."""
|
||||
epoch = datetime(2000, 1, 1, tzinfo=timezone.utc)
|
||||
return int((dt - epoch).total_seconds() * 10)
|
||||
|
||||
|
||||
def _bits_to_base64url(bits: str) -> str:
|
||||
"""Convert bit string to base64url encoding (TC String format)."""
|
||||
# Pad to multiple of 8
|
||||
padding = (8 - len(bits) % 8) % 8
|
||||
bits += "0" * padding
|
||||
# Convert to bytes
|
||||
byte_array = bytearray()
|
||||
for i in range(0, len(bits), 8):
|
||||
byte_array.append(int(bits[i:i+8], 2))
|
||||
# Base64url encode (no padding)
|
||||
return base64.urlsafe_b64encode(bytes(byte_array)).rstrip(b"=").decode("ascii")
|
||||
|
||||
|
||||
class TCFEncoderService:
|
||||
"""Generates TC Strings per IAB TCF 2.2 specification."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
cmp_id: int = 1,
|
||||
cmp_version: int = 1,
|
||||
consent_screen: int = 1,
|
||||
consent_language: str = "DE",
|
||||
):
|
||||
self.cmp_id = cmp_id
|
||||
self.cmp_version = cmp_version
|
||||
self.consent_screen = consent_screen
|
||||
self.consent_language = consent_language
|
||||
|
||||
def encode(
|
||||
self,
|
||||
purpose_consents: dict[int, bool],
|
||||
vendor_consents: dict[int, bool],
|
||||
purpose_li: dict[int, bool] | None = None,
|
||||
special_features: dict[int, bool] | None = None,
|
||||
) -> str:
|
||||
"""Generate a TC String from consent decisions.
|
||||
|
||||
Args:
|
||||
purpose_consents: {purpose_id: True/False} for purposes 1-12
|
||||
vendor_consents: {vendor_id: True/False} for IAB vendor IDs
|
||||
purpose_li: Legitimate interest signals per purpose
|
||||
special_features: Special feature opt-ins
|
||||
Returns:
|
||||
Base64url-encoded TC String
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
created = _datetime_to_deciseconds(now)
|
||||
updated = created
|
||||
|
||||
bits = ""
|
||||
# Core TC String v2 fields
|
||||
bits += _int_to_bits(2, 6) # Version (6 bits) = 2
|
||||
bits += _int_to_bits(created, 36) # Created (36 bits)
|
||||
bits += _int_to_bits(updated, 36) # LastUpdated (36 bits)
|
||||
bits += _int_to_bits(self.cmp_id, 12) # CmpId (12 bits)
|
||||
bits += _int_to_bits(self.cmp_version, 12) # CmpVersion (12 bits)
|
||||
bits += _int_to_bits(self.consent_screen, 6) # ConsentScreen (6 bits)
|
||||
|
||||
# ConsentLanguage (12 bits = 2 × 6-bit letters)
|
||||
lang = self.consent_language.upper()[:2]
|
||||
bits += _int_to_bits(ord(lang[0]) - ord("A"), 6)
|
||||
bits += _int_to_bits(ord(lang[1]) - ord("A"), 6)
|
||||
|
||||
# VendorListVersion (12 bits) — use 0 if not fetching GVL
|
||||
bits += _int_to_bits(0, 12)
|
||||
# TcfPolicyVersion (6 bits) = 4 for TCF 2.2
|
||||
bits += _int_to_bits(4, 6)
|
||||
# IsServiceSpecific (1 bit) = 1
|
||||
bits += "1"
|
||||
# UseNonStandardTexts (1 bit) = 0
|
||||
bits += "0"
|
||||
|
||||
# SpecialFeatureOptIns (12 bits)
|
||||
sf = special_features or {}
|
||||
for i in range(1, 13):
|
||||
bits += "1" if sf.get(i, False) else "0"
|
||||
|
||||
# PurposesConsent (24 bits)
|
||||
for i in range(1, 25):
|
||||
bits += "1" if purpose_consents.get(i, False) else "0"
|
||||
|
||||
# PurposesLITransparency (24 bits)
|
||||
li = purpose_li or {}
|
||||
for i in range(1, 25):
|
||||
bits += "1" if li.get(i, False) else "0"
|
||||
|
||||
# Purpose one treatment (1 bit) = 0, PublisherCC (12 bits) = DE
|
||||
bits += "0"
|
||||
bits += _int_to_bits(ord("D") - ord("A"), 6)
|
||||
bits += _int_to_bits(ord("E") - ord("A"), 6)
|
||||
|
||||
# Vendor consents — Range encoding
|
||||
max_vendor = max(vendor_consents.keys()) if vendor_consents else 0
|
||||
bits += _int_to_bits(max_vendor, 16) # MaxVendorId
|
||||
# Use bitfield encoding (simpler than range)
|
||||
bits += "0" # IsRangeEncoding = 0 (bitfield)
|
||||
for i in range(1, max_vendor + 1):
|
||||
bits += "1" if vendor_consents.get(i, False) else "0"
|
||||
|
||||
# Vendor legitimate interests (same pattern)
|
||||
bits += _int_to_bits(max_vendor, 16)
|
||||
bits += "0"
|
||||
for i in range(1, max_vendor + 1):
|
||||
bits += "1" if vendor_consents.get(i, False) else "0" # Simplified: same as consent
|
||||
|
||||
return _bits_to_base64url(bits)
|
||||
|
||||
def encode_from_categories(
|
||||
self,
|
||||
categories: list[str],
|
||||
vendor_consents: dict[int, bool] | None = None,
|
||||
) -> str:
|
||||
"""Generate TC String from banner category selections.
|
||||
|
||||
Maps our banner categories (necessary, statistics, marketing, functional)
|
||||
to IAB purposes and generates the TC String.
|
||||
"""
|
||||
purpose_consents: dict[int, bool] = {}
|
||||
for cat in categories:
|
||||
for purpose_id in CATEGORY_PURPOSE_MAP.get(cat, []):
|
||||
purpose_consents[purpose_id] = True
|
||||
|
||||
return self.encode(
|
||||
purpose_consents=purpose_consents,
|
||||
vendor_consents=vendor_consents or {},
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def get_purposes() -> list[dict[str, Any]]:
|
||||
"""Return all 12 IAB purposes with translations."""
|
||||
return [
|
||||
{"id": pid, "name": info["name"], "name_de": info["name_de"]}
|
||||
for pid, info in IAB_PURPOSES.items()
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def get_special_features() -> list[dict[str, Any]]:
|
||||
return [
|
||||
{"id": fid, "name": info["name"], "name_de": info["name_de"]}
|
||||
for fid, info in IAB_SPECIAL_FEATURES.items()
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def get_category_purpose_map() -> dict[str, list[int]]:
|
||||
return CATEGORY_PURPOSE_MAP
|
||||
@@ -0,0 +1,159 @@
|
||||
"""
|
||||
Training Link Service — bridges document review approvals with the Academy.
|
||||
|
||||
After a document is approved, checks which roles need training on that
|
||||
document type and identifies gaps (missing/overdue assignments).
|
||||
|
||||
Gracefully handles missing training tables (Go service not migrated yet).
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TrainingLinkService:
|
||||
"""Links document approvals to training requirements."""
|
||||
|
||||
def __init__(self, db: Session) -> None:
|
||||
self.db = db
|
||||
|
||||
def _training_tables_exist(self) -> bool:
|
||||
"""Check if the Go-managed training tables exist."""
|
||||
try:
|
||||
self.db.execute(text("SELECT 1 FROM training_modules LIMIT 0"))
|
||||
return True
|
||||
except Exception:
|
||||
self.db.rollback()
|
||||
return False
|
||||
|
||||
def get_role_codes_for_document(self, tenant_id: str, document_type: str) -> list[dict]:
|
||||
"""Map document type → org roles → training role codes."""
|
||||
try:
|
||||
q = text("""
|
||||
SELECT m.role_key, t.training_role_code
|
||||
FROM compliance_document_role_mapping m
|
||||
LEFT JOIN compliance_role_training_mapping t
|
||||
ON t.org_role_key = m.role_key
|
||||
AND (t.tenant_id = :tid OR t.tenant_id = '__default__')
|
||||
WHERE m.tenant_id = :tid OR m.tenant_id = '__default__'
|
||||
AND m.document_type = :dt
|
||||
""")
|
||||
rows = self.db.execute(q, {"tid": tenant_id, "dt": document_type}).fetchall()
|
||||
return [{"role_key": r.role_key, "training_role_code": r.training_role_code} for r in rows]
|
||||
except Exception as e:
|
||||
logger.warning("Failed to get role codes: %s", e)
|
||||
return []
|
||||
|
||||
def get_training_requirements(self, tenant_id: str, document_type: str) -> dict[str, Any]:
|
||||
"""Get training modules required for roles associated with a document type."""
|
||||
if not self._training_tables_exist():
|
||||
return {
|
||||
"academy_available": False,
|
||||
"message": "Academy noch nicht eingerichtet. Training-Module werden nach Aktivierung automatisch verknuepft.",
|
||||
"requirements": [],
|
||||
}
|
||||
|
||||
role_mappings = self.get_role_codes_for_document(tenant_id, document_type)
|
||||
if not role_mappings:
|
||||
return {"academy_available": True, "message": "Keine Rollen-Zuordnung fuer diesen Dokumenttyp.", "requirements": []}
|
||||
|
||||
role_codes = [r["training_role_code"] for r in role_mappings if r.get("training_role_code")]
|
||||
if not role_codes:
|
||||
return {"academy_available": True, "message": "Keine Training-Codes konfiguriert.", "requirements": []}
|
||||
|
||||
try:
|
||||
placeholders = ",".join(f":rc{i}" for i in range(len(role_codes)))
|
||||
params: dict[str, Any] = {"tid": tenant_id}
|
||||
for i, rc in enumerate(role_codes):
|
||||
params[f"rc{i}"] = rc
|
||||
|
||||
q = text(f"""
|
||||
SELECT tm.role_code, m.module_code, m.title, m.description,
|
||||
m.frequency_type, m.duration_minutes, tm.is_mandatory
|
||||
FROM training_matrix tm
|
||||
JOIN training_modules m ON m.id = tm.module_id
|
||||
WHERE tm.tenant_id = :tid AND tm.role_code IN ({placeholders})
|
||||
AND m.is_active = TRUE
|
||||
ORDER BY tm.role_code, m.sort_order
|
||||
""")
|
||||
rows = self.db.execute(q, params).fetchall()
|
||||
reqs = [dict(r._mapping) for r in rows]
|
||||
return {"academy_available": True, "requirements": reqs, "total": len(reqs)}
|
||||
except Exception as e:
|
||||
logger.warning("Failed to query training requirements: %s", e)
|
||||
return {"academy_available": True, "requirements": [], "error": str(e)}
|
||||
|
||||
def check_training_gaps(
|
||||
self, tenant_id: str, document_type: str, project_id: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Check which persons assigned to roles have outstanding training."""
|
||||
if not self._training_tables_exist():
|
||||
return {"academy_available": False, "gaps": [], "total_gaps": 0}
|
||||
|
||||
role_mappings = self.get_role_codes_for_document(tenant_id, document_type)
|
||||
if not role_mappings:
|
||||
return {"academy_available": True, "gaps": [], "total_gaps": 0}
|
||||
|
||||
gaps = []
|
||||
for rm in role_mappings:
|
||||
role_key = rm["role_key"]
|
||||
role_code = rm.get("training_role_code")
|
||||
if not role_code:
|
||||
continue
|
||||
|
||||
# Get person assigned to this role
|
||||
where = "tenant_id = :tid AND role_key = :rk"
|
||||
params: dict[str, Any] = {"tid": tenant_id, "rk": role_key}
|
||||
if project_id:
|
||||
where += " AND (project_id = :pid OR project_id IS NULL)"
|
||||
params["pid"] = project_id
|
||||
|
||||
try:
|
||||
person = self.db.execute(text(
|
||||
f"SELECT person_name, person_email, role_label FROM compliance_org_roles WHERE {where} LIMIT 1"
|
||||
), params).fetchone()
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if not person or not person.person_name:
|
||||
continue
|
||||
|
||||
# Get required modules for this role code
|
||||
try:
|
||||
modules = self.db.execute(text("""
|
||||
SELECT m.id, m.module_code, m.title FROM training_matrix tm
|
||||
JOIN training_modules m ON m.id = tm.module_id
|
||||
WHERE tm.tenant_id = :tid AND tm.role_code = :rc AND m.is_active = TRUE AND tm.is_mandatory = TRUE
|
||||
"""), {"tid": tenant_id, "rc": role_code}).fetchall()
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
for mod in modules:
|
||||
# Check if assignment exists and is completed
|
||||
try:
|
||||
assignment = self.db.execute(text("""
|
||||
SELECT status, progress_percent FROM training_assignments
|
||||
WHERE tenant_id = :tid AND module_id = :mid AND user_email = :email
|
||||
ORDER BY created_at DESC LIMIT 1
|
||||
"""), {"tid": tenant_id, "mid": mod.id, "email": person.person_email}).fetchone()
|
||||
except Exception:
|
||||
assignment = None
|
||||
|
||||
if not assignment or assignment.status not in ("completed", "passed"):
|
||||
gaps.append({
|
||||
"person_name": person.person_name,
|
||||
"person_email": person.person_email,
|
||||
"role": person.role_label,
|
||||
"role_key": role_key,
|
||||
"module_code": mod.module_code,
|
||||
"module_title": mod.title,
|
||||
"status": assignment.status if assignment else "nicht_begonnen",
|
||||
"progress": assignment.progress_percent if assignment else 0,
|
||||
})
|
||||
|
||||
return {"academy_available": True, "gaps": gaps, "total_gaps": len(gaps)}
|
||||
@@ -0,0 +1,148 @@
|
||||
"""
|
||||
Website Compliance Checks — checks public website for consumer protection
|
||||
compliance (§312k BGB, §5 TMG, Art. 13 DSGVO, Cookie-Banner).
|
||||
|
||||
Extracted from agent_analyze_routes.py to keep route files slim.
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
import httpx
|
||||
|
||||
|
||||
class FollowUpQuestion:
|
||||
def __init__(self, id: str, question: str, legal_basis: str, severity: str, finding_if_no: str):
|
||||
self.id = id
|
||||
self.question = question
|
||||
self.legal_basis = legal_basis
|
||||
self.severity = severity
|
||||
self.finding_if_no = finding_if_no
|
||||
|
||||
|
||||
async def check_website_compliance(
|
||||
client: httpx.AsyncClient, url: str, html: str,
|
||||
) -> tuple[list[str], list[FollowUpQuestion]]:
|
||||
"""Scan public website for consumer protection compliance."""
|
||||
findings: list[str] = []
|
||||
follow_ups: list[FollowUpQuestion] = []
|
||||
html_lower = html.lower()
|
||||
base_domain = re.sub(r"https?://([^/]+).*", r"\1", url)
|
||||
|
||||
# E-Commerce detection — §312k only applies to sites with online contracts
|
||||
ecommerce_indicators = [
|
||||
r"warenkorb", r"cart", r"shop", r"bestell", r"order",
|
||||
r"checkout", r"kasse", r"kaufen", r"add.?to.?cart",
|
||||
r"stripe|paypal|klarna|mollie|adyen",
|
||||
r"abo", r"mitgliedschaft", r"subscription", r"premium",
|
||||
]
|
||||
is_ecommerce = any(re.search(p, html_lower) for p in ecommerce_indicators)
|
||||
|
||||
# --- §312k BGB: Kündigungsbutton (NUR bei E-Commerce/Abo-Websites) ---
|
||||
cancel_patterns = [
|
||||
r'href="[^"]*(?:kuendig|kündig|cancel|vertrag.?beenden|abo.?beenden|mitgliedschaft.?beenden)[^"]*"',
|
||||
r'(?:kündigen|kuendigen|vertrag beenden|abo beenden|mitgliedschaft kündigen)',
|
||||
]
|
||||
has_cancel_link = any(re.search(p, html_lower) for p in cancel_patterns)
|
||||
|
||||
cancel_urls_to_probe = [
|
||||
f"https://{base_domain}/kuendigen",
|
||||
f"https://{base_domain}/cancel",
|
||||
f"https://{base_domain}/vertrag-kuendigen",
|
||||
f"https://{base_domain}/abo-kuendigen",
|
||||
f"https://{base_domain}/account/cancel",
|
||||
]
|
||||
if not has_cancel_link:
|
||||
for probe_url in cancel_urls_to_probe:
|
||||
try:
|
||||
probe = await client.head(probe_url, follow_redirects=True, timeout=5.0)
|
||||
if probe.status_code < 400:
|
||||
has_cancel_link = True
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if not has_cancel_link and is_ecommerce:
|
||||
findings.append(
|
||||
"[§312k BGB] Kein oeffentlich sichtbarer Kuendigungsbutton gefunden. "
|
||||
"Seit 01.07.2022 muessen online geschlossene Vertraege mit max. 2 Klicks kuendbar sein."
|
||||
)
|
||||
follow_ups.append(FollowUpQuestion(
|
||||
id="cancel_button_312k",
|
||||
question="Koennen Sie nach Login im Kundenbereich innerhalb von 2 Klicks Ihren Vertrag kuendigen?",
|
||||
legal_basis="§ 312k BGB (Kuendigungsbutton), Omnibus-Richtlinie (EU) 2019/2161",
|
||||
severity="high",
|
||||
finding_if_no=(
|
||||
"[§312k BGB] VERSTOSS: Kein funktionaler Kuendigungsbutton vorhanden. "
|
||||
"Der Anbieter ist verpflichtet, einen leicht auffindbaren Kuendigungsbutton "
|
||||
"bereitzustellen (max. 2 Klicks). Ein Zwang zur telefonischen Kuendigung "
|
||||
"oder Kuendigung per Brief ist rechtswidrig."
|
||||
),
|
||||
))
|
||||
|
||||
# --- Impressumspflicht (§5 TMG / §18 MStV) ---
|
||||
imprint_patterns = [
|
||||
r'href="[^"]*(?:impressum|imprint|legal.?notice|about.?us/legal)[^"]*"',
|
||||
r'>impressum<',
|
||||
]
|
||||
has_imprint = any(re.search(p, html_lower) for p in imprint_patterns)
|
||||
if not has_imprint:
|
||||
findings.append(
|
||||
"[§5 TMG] Kein Impressum-Link auf der Seite gefunden. "
|
||||
"Geschaeftsmaessige Online-Dienste muessen ein leicht erreichbares Impressum bereitstellen."
|
||||
)
|
||||
|
||||
# --- Datenschutzerklaerung verlinkt? ---
|
||||
privacy_patterns = [
|
||||
r'href="[^"]*(?:datenschutz|privacy|dsgvo)[^"]*"',
|
||||
r'>datenschutz<',
|
||||
]
|
||||
has_privacy = any(re.search(p, html_lower) for p in privacy_patterns)
|
||||
if not has_privacy:
|
||||
findings.append(
|
||||
"[Art. 13 DSGVO] Kein Link zur Datenschutzerklaerung gefunden. "
|
||||
"Nutzer muessen ueber die Verarbeitung personenbezogener Daten informiert werden."
|
||||
)
|
||||
|
||||
# --- Cookie-Consent-Banner ---
|
||||
cookie_patterns = [
|
||||
r'(?:cookie.?consent|cookie.?banner|consent.?manager|didomi|cookiebot|onetrust|usercentrics)',
|
||||
r'(?:gdpr|dsgvo).?(?:consent|einwilligung)',
|
||||
]
|
||||
has_cookie_consent = any(re.search(p, html_lower) for p in cookie_patterns)
|
||||
if not has_cookie_consent:
|
||||
follow_ups.append(FollowUpQuestion(
|
||||
id="cookie_consent",
|
||||
question="Wird beim ersten Besuch der Website ein Cookie-Consent-Banner angezeigt?",
|
||||
legal_basis="§ 25 TDDDG (ehem. TTDSG), Art. 5(3) ePrivacy-Richtlinie",
|
||||
severity="medium",
|
||||
finding_if_no=(
|
||||
"[§25 TDDDG] Kein Cookie-Consent-Banner erkannt. "
|
||||
"Vor dem Setzen nicht-essentieller Cookies ist eine Einwilligung erforderlich."
|
||||
),
|
||||
))
|
||||
|
||||
return findings, follow_ups
|
||||
|
||||
|
||||
def to_string_list(items: list) -> list[str]:
|
||||
"""Convert list of dicts or strings to list of strings."""
|
||||
result = []
|
||||
for item in (items or []):
|
||||
if isinstance(item, dict):
|
||||
desc = item.get("description", item.get("name", item.get("code", str(item))))
|
||||
code = item.get("code", item.get("id", ""))
|
||||
result.append(f"[{code}] {desc}" if code else str(desc))
|
||||
else:
|
||||
result.append(str(item))
|
||||
return result
|
||||
|
||||
|
||||
def risk_to_escalation(risk_level: str) -> str:
|
||||
"""Map UCCA risk level to escalation level."""
|
||||
mapping = {
|
||||
"MINIMAL": "E0",
|
||||
"LIMITED": "E1",
|
||||
"HIGH": "E2",
|
||||
"UNACCEPTABLE": "E3",
|
||||
}
|
||||
return mapping.get(risk_level.upper() if risk_level else "", "E0")
|
||||
@@ -40,107 +40,8 @@ class ScanResult:
|
||||
missing_pages: dict = field(default_factory=dict) # url -> status_code
|
||||
|
||||
|
||||
# ── Service Registry ──────────────────────────────────────────────────────────
|
||||
# Each entry: regex pattern -> service metadata
|
||||
SERVICE_REGISTRY: dict[str, dict] = {
|
||||
# --- Tracking & Analytics ---
|
||||
r"google.?analytics|gtag\(|UA-\d+|G-\w{5,}": {
|
||||
"id": "google_analytics", "name": "Google Analytics", "category": "tracking",
|
||||
"provider": "Google LLC", "country": "US", "eu_adequate": False,
|
||||
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, §25 TDDDG",
|
||||
},
|
||||
r"googletagmanager|gtm\.js": {
|
||||
"id": "google_tag_manager", "name": "Google Tag Manager", "category": "tracking",
|
||||
"provider": "Google LLC", "country": "US", "eu_adequate": False,
|
||||
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO",
|
||||
},
|
||||
r"facebook\.net/.*fbevents|fbq\(": {
|
||||
"id": "facebook_pixel", "name": "Meta/Facebook Pixel", "category": "marketing",
|
||||
"provider": "Meta Platforms", "country": "US", "eu_adequate": False,
|
||||
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, §25 TDDDG",
|
||||
},
|
||||
r"hotjar\.com|_hjSettings": {
|
||||
"id": "hotjar", "name": "Hotjar", "category": "tracking",
|
||||
"provider": "Hotjar Ltd", "country": "MT", "eu_adequate": True,
|
||||
"requires_consent": True, "legal_ref": "§25 TDDDG (Session Recording)",
|
||||
},
|
||||
r"clarity\.ms": {
|
||||
"id": "ms_clarity", "name": "Microsoft Clarity", "category": "tracking",
|
||||
"provider": "Microsoft", "country": "US", "eu_adequate": False,
|
||||
"requires_consent": True, "legal_ref": "§25 TDDDG (Session Replay), Art. 44 DSGVO",
|
||||
},
|
||||
r"matomo|piwik": {
|
||||
"id": "matomo", "name": "Matomo", "category": "tracking",
|
||||
"provider": "InnoCraft/Self-hosted", "country": "EU/Self", "eu_adequate": True,
|
||||
"requires_consent": False, "legal_ref": "Cookieless moeglich, §25 TDDDG",
|
||||
},
|
||||
r"plausible\.io": {
|
||||
"id": "plausible", "name": "Plausible Analytics", "category": "tracking",
|
||||
"provider": "Plausible Insights", "country": "EE", "eu_adequate": True,
|
||||
"requires_consent": False, "legal_ref": "EU-Anbieter, cookieless",
|
||||
},
|
||||
# --- CDN & Fonts ---
|
||||
r"fonts\.googleapis\.com|fonts\.gstatic\.com": {
|
||||
"id": "google_fonts", "name": "Google Fonts (remote)", "category": "cdn",
|
||||
"provider": "Google LLC", "country": "US", "eu_adequate": False,
|
||||
"requires_consent": True, "legal_ref": "LG Muenchen I, Az. 3 O 17493/20",
|
||||
},
|
||||
r"cdn\.cloudflare\.com|cdnjs\.cloudflare\.com": {
|
||||
"id": "cloudflare_cdn", "name": "Cloudflare CDN", "category": "cdn",
|
||||
"provider": "Cloudflare Inc", "country": "US", "eu_adequate": False,
|
||||
"requires_consent": False, "legal_ref": "Art. 44-49 DSGVO, berechtigtes Interesse",
|
||||
},
|
||||
# --- Chatbots ---
|
||||
r"widget\.intercom\.io|intercomcdn": {
|
||||
"id": "intercom", "name": "Intercom", "category": "chatbot",
|
||||
"provider": "Intercom Inc", "country": "US", "eu_adequate": False,
|
||||
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, KI-gestuetzt",
|
||||
},
|
||||
r"tidio\.co|tidioChatApi": {
|
||||
"id": "tidio", "name": "Tidio Chat", "category": "chatbot",
|
||||
"provider": "Tidio LLC", "country": "PL", "eu_adequate": True,
|
||||
"requires_consent": False, "legal_ref": "EU-Anbieter",
|
||||
},
|
||||
r"zendesk\.com/embeddable|zdassets": {
|
||||
"id": "zendesk", "name": "Zendesk", "category": "chatbot",
|
||||
"provider": "Zendesk Inc", "country": "US", "eu_adequate": False,
|
||||
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO",
|
||||
},
|
||||
# --- Payment ---
|
||||
r"js\.stripe\.com|stripe\.com/v3": {
|
||||
"id": "stripe", "name": "Stripe", "category": "payment",
|
||||
"provider": "Stripe Inc", "country": "US", "eu_adequate": False,
|
||||
"requires_consent": False, "legal_ref": "Art. 6(1)(b) Vertragserfuellung, SCCs",
|
||||
},
|
||||
r"paypal\.com/sdk|paypalobjects": {
|
||||
"id": "paypal", "name": "PayPal", "category": "payment",
|
||||
"provider": "PayPal Holdings", "country": "US", "eu_adequate": False,
|
||||
"requires_consent": False, "legal_ref": "Art. 6(1)(b) Vertragserfuellung",
|
||||
},
|
||||
r"klarna\.com|klarna-payments": {
|
||||
"id": "klarna", "name": "Klarna", "category": "payment",
|
||||
"provider": "Klarna AB", "country": "SE", "eu_adequate": True,
|
||||
"requires_consent": False, "legal_ref": "EU, aber Art. 22 DSGVO bei Bonitaetspruefung!",
|
||||
},
|
||||
# --- Captcha ---
|
||||
r"recaptcha|grecaptcha": {
|
||||
"id": "recaptcha", "name": "Google reCAPTCHA", "category": "other",
|
||||
"provider": "Google LLC", "country": "US", "eu_adequate": False,
|
||||
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, §25 TDDDG",
|
||||
},
|
||||
# --- Video ---
|
||||
r"youtube\.com/embed|youtube-nocookie|ytimg": {
|
||||
"id": "youtube", "name": "YouTube", "category": "other",
|
||||
"provider": "Google LLC", "country": "US", "eu_adequate": False,
|
||||
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, 2-Klick empfohlen",
|
||||
},
|
||||
# --- Consent Management ---
|
||||
r"didomi|cookiebot|onetrust|usercentrics|consentmanager|quantcast": {
|
||||
"id": "cmp", "name": "Consent Management Platform", "category": "other",
|
||||
"provider": "Various", "country": "EU", "eu_adequate": True,
|
||||
"requires_consent": False, "legal_ref": "CMP vorhanden — gut",
|
||||
},
|
||||
}
|
||||
# ── Service Registry (imported from master) ──────────────────────────────────
|
||||
from compliance.services.service_registry import SERVICE_REGISTRY # noqa: E402
|
||||
|
||||
AI_TEXT_PATTERNS = [
|
||||
r"k(?:ue|ü)nstliche.?intelligenz",
|
||||
@@ -157,9 +58,13 @@ AI_TEXT_PATTERNS = [
|
||||
|
||||
FOOTER_LINK_PATTERNS = [
|
||||
(r'href="([^"]*(?:impressum|imprint|legal-notice)[^"]*)"', "impressum"),
|
||||
(r'href="([^"]*(?:datenschutz|privacy|dsgvo)[^"]*)"', "datenschutz"),
|
||||
(r'href="([^"]*(?:datenschutz|privacy|dsgvo|hinweise.?zum.?datenschutz)[^"]*)"', "datenschutz"),
|
||||
(r'href="([^"]*(?:agb|terms|nutzungsbedingungen)[^"]*)"', "agb"),
|
||||
(r'href="([^"]*(?:cookie)[^"]*)"', "cookies"),
|
||||
# Deep DSE links (regional pages, sub-pages, service marks)
|
||||
(r'href="([^"]*(?:datenschutzinformation|datenschutzerklaerung|datenschutzerkl)[^"]*)"', "datenschutz_deep"),
|
||||
# Navigation links often contain DSB/privacy sub-pages
|
||||
(r'href="([^"]*(?:ueber.?uns.*datenschutz|servicemarken.*datenschutz|kontakt.*datenschutz)[^"]*)"', "datenschutz_nav"),
|
||||
]
|
||||
|
||||
|
||||
@@ -183,15 +88,46 @@ async def scan_website(base_url: str) -> ScanResult:
|
||||
href = match.group(1)
|
||||
if href.startswith("/"):
|
||||
href = urljoin(origin, href)
|
||||
if href.startswith(origin):
|
||||
if href.startswith(origin) and not re.search(r"\.(css|js|png|jpg|gif|svg|pdf|zip)(\?|$)", href):
|
||||
page_urls.add(href)
|
||||
|
||||
# 3. Scan all pages (max 10)
|
||||
for url in list(page_urls)[:10]:
|
||||
html = start_html if url == origin else await _fetch_page(client, url, result)
|
||||
if html:
|
||||
# 3. Scan all pages in PARALLEL (max 10)
|
||||
import asyncio
|
||||
other_urls = [u for u in list(page_urls)[:10] if u != origin]
|
||||
fetch_tasks = [_fetch_page(client, u, result) for u in other_urls]
|
||||
other_htmls = await asyncio.gather(*fetch_tasks, return_exceptions=True)
|
||||
|
||||
# Process start page
|
||||
_detect_services(start_html, origin, result)
|
||||
_detect_ai_mentions(start_html, origin, result)
|
||||
|
||||
# Process other pages + discover DSE-internal links
|
||||
dse_internal_urls = set()
|
||||
for url, html in zip(other_urls, other_htmls):
|
||||
if isinstance(html, str) and html:
|
||||
_detect_services(html, url, result)
|
||||
_detect_ai_mentions(html, url, result)
|
||||
# If this is a DSE page, find links within it (SAME DOMAIN only)
|
||||
if re.search(r"datenschutz|privacy|dsgvo", url, re.IGNORECASE):
|
||||
for pattern, _ in FOOTER_LINK_PATTERNS:
|
||||
for match in re.finditer(pattern, html, re.IGNORECASE):
|
||||
href = match.group(1)
|
||||
if href.startswith("/"):
|
||||
href = urljoin(origin, href)
|
||||
# IMPORTANT: Only follow links on the SAME domain
|
||||
# External links (etracker.com, google.de) must NOT be scanned
|
||||
if href.startswith(origin) and href not in page_urls:
|
||||
dse_internal_urls.add(href)
|
||||
|
||||
# 4. Follow DSE-internal links (additional pages linked from privacy policy)
|
||||
if dse_internal_urls:
|
||||
extra_urls = [u for u in list(dse_internal_urls)[:5] if u not in page_urls]
|
||||
if extra_urls:
|
||||
extra_tasks = [_fetch_page(client, u, result) for u in extra_urls]
|
||||
extra_htmls = await asyncio.gather(*extra_tasks, return_exceptions=True)
|
||||
for url, html in zip(extra_urls, extra_htmls):
|
||||
if isinstance(html, str) and html:
|
||||
_detect_services(html, url, result)
|
||||
|
||||
# Deduplicate services
|
||||
seen = set()
|
||||
|
||||
Reference in New Issue
Block a user