Merge feat/zeroclaw-compliance-agent into main

Brings all compliance doc-check features:
- 162 regex checks + 1874 Master Controls
- LLM-agnostic agent with tool calling
- Banner check (46 checks, 30 CMPs, stealth, Shadow DOM)
- Impressum check (24 checks)
- Deep consent verification (DataLayer, GCM, TCF)
- CMP E2E tests (39 tests)
- HTML email reports, FAQ, persistent history

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-05-11 11:44:20 +02:00
175 changed files with 20063 additions and 1283 deletions
@@ -63,6 +63,13 @@ _ROUTER_MODULES = [
"tom_mapping_routes",
"llm_audit_routes",
"assertion_routes",
"org_role_routes",
"document_review_routes",
"banner_analytics_routes",
"banner_ab_routes",
"compliance_report_routes",
"whistleblower_routes",
"tcf_routes",
]
_loaded_count = 0
@@ -15,6 +15,14 @@ from fastapi import APIRouter
from pydantic import BaseModel
from compliance.services.smtp_sender import send_email
from compliance.services.intake_extractor import extract_intake_flags_from_services, flags_to_ucca_intake
from compliance.services.relevance_filter import filter_controls
from compliance.services.website_compliance_checks import (
check_website_compliance as _check_website_compliance,
FollowUpQuestion,
to_string_list as _to_string_list,
risk_to_escalation as _risk_to_escalation,
)
logger = logging.getLogger(__name__)
@@ -77,21 +85,32 @@ async def analyze_url(req: AnalyzeRequest):
# Step 2: Classify via SDK LLM
classification = await _classify(client, text)
# Step 3: Assess via UCCA
assessment = await _assess(client, text, classification)
# Step 3: Detect services from HTML (deterministic, no LLM needed)
from compliance.services.service_registry import SERVICE_REGISTRY
detected_services = []
html_lower = raw_html.lower()
for pattern, meta in SERVICE_REGISTRY.items():
if re.search(pattern, html_lower):
detected_services.append(meta)
# Step 4: Determine role
# Step 4: Derive intake flags from DETECTED SERVICES (not from text!)
intake_flags = extract_intake_flags_from_services(detected_services)
# Step 5: Assess via UCCA with service-derived flags
assessment = await _assess(client, text, classification, intake_flags)
# Step 5: Determine role
esc_level = assessment.get("escalation_level", "E0")
role = ESCALATION_ROLES.get(esc_level, ESCALATION_ROLES["E0"])
# Step 5: Website compliance checks (§312k BGB etc.)
# Step 6: Website compliance checks (§312k BGB etc.)
site_findings, follow_ups = await _check_website_compliance(client, req.url, raw_html)
# Step 6: Merge findings
# Step 7: Merge and filter findings/controls
findings = assessment.get("triggered_rules", [])
controls = assessment.get("required_controls", [])
findings_str = _to_string_list(findings) + site_findings
controls_str = _to_string_list(controls)
controls_str = filter_controls(_to_string_list(controls), text, intake_flags)
# Escalate if website checks found issues
if site_findings and esc_level == "E0":
@@ -105,7 +124,7 @@ async def analyze_url(req: AnalyzeRequest):
email_result = send_email(
recipient=req.recipient,
subject=f"[{mode_label}] Compliance-Finding: {classification}{req.url[:60]}",
body_html=f"<div>{summary}</div>",
body_html=summary,
)
return AnalyzeResponse(
@@ -179,34 +198,24 @@ async def _classify(client: httpx.AsyncClient, text: str) -> str:
return "other"
async def _assess(client: httpx.AsyncClient, text: str, classification: str) -> dict:
async def _assess(client: httpx.AsyncClient, text: str, classification: str, intake_flags: dict | None = None) -> dict:
"""Run UCCA assessment via SDK. Returns flattened result dict."""
try:
# UCCA expects boolean intake flags, not string categories
# Use LLM-extracted flags if available, otherwise minimal defaults
if intake_flags:
ucca_intake = flags_to_ucca_intake(intake_flags)
else:
ucca_intake = {
"data_types": {"personal_data": True},
"purpose": {},
"automation": "manual",
"outputs": {},
}
resp = await client.post(f"{SDK_URL}/sdk/v1/ucca/assess", headers=SDK_HEADERS, json={
"use_case_text": text[:3000],
"domain": classification,
"data_types": {
"personal_data": True,
"customer_data": True,
"location_data": "tracking" in text.lower() or "standort" in text.lower(),
"images": False,
"biometric_data": "biometrisch" in text.lower(),
"minor_data": "kinder" in text.lower() or "minderjährig" in text.lower(),
},
"purpose": {
"marketing": "werbung" in text.lower() or "marketing" in text.lower(),
"analytics": "analyse" in text.lower() or "analytics" in text.lower(),
"profiling": "profil" in text.lower() or "personalis" in text.lower(),
"automation": False,
"customer_support": False,
},
"automation": "partially_automated",
"outputs": {
"content_generation": False,
"recommendations_to_users": "empfehl" in text.lower(),
"data_export": "export" in text.lower() or "uebertrag" in text.lower(),
},
**ucca_intake,
})
data = resp.json()
# Flatten: UCCA wraps result under "assessment" and "result"
@@ -227,126 +236,27 @@ async def _assess(client: httpx.AsyncClient, text: str, classification: str) ->
return {"risk_level": "unknown", "risk_score": 0, "escalation_level": "E0"}
async def _check_website_compliance(
client: httpx.AsyncClient, url: str, html: str,
) -> tuple[list[str], list[FollowUpQuestion]]:
"""Scan public website for consumer protection compliance (§312k BGB etc.)."""
findings: list[str] = []
follow_ups: list[FollowUpQuestion] = []
html_lower = html.lower()
base_domain = re.sub(r"https?://([^/]+).*", r"\1", url)
# --- §312k BGB: Kündigungsbutton ---
cancel_patterns = [
r'href="[^"]*(?:kuendig|kündig|cancel|vertrag.?beenden|abo.?beenden|mitgliedschaft.?beenden)[^"]*"',
r'(?:kündigen|kuendigen|vertrag beenden|abo beenden|mitgliedschaft kündigen)',
]
has_cancel_link = any(re.search(p, html_lower) for p in cancel_patterns)
# Also check common cancel URLs
cancel_urls_to_probe = [
f"https://{base_domain}/kuendigen",
f"https://{base_domain}/cancel",
f"https://{base_domain}/vertrag-kuendigen",
f"https://{base_domain}/abo-kuendigen",
f"https://{base_domain}/account/cancel",
]
if not has_cancel_link:
for probe_url in cancel_urls_to_probe:
try:
probe = await client.head(probe_url, follow_redirects=True, timeout=5.0)
if probe.status_code < 400:
has_cancel_link = True
break
except Exception:
continue
if not has_cancel_link:
findings.append(
"[§312k BGB] Kein oeffentlich sichtbarer Kuendigungsbutton gefunden. "
"Seit 01.07.2022 muessen online geschlossene Vertraege mit max. 2 Klicks kuendbar sein."
)
follow_ups.append(FollowUpQuestion(
id="cancel_button_312k",
question="Koennen Sie nach Login im Kundenbereich innerhalb von 2 Klicks Ihren Vertrag kuendigen?",
legal_basis="§ 312k BGB (Kuendigungsbutton), Omnibus-Richtlinie (EU) 2019/2161",
severity="high",
finding_if_no=(
"[§312k BGB] VERSTOSS: Kein funktionaler Kuendigungsbutton vorhanden. "
"Der Anbieter ist verpflichtet, einen leicht auffindbaren Kuendigungsbutton "
"bereitzustellen (max. 2 Klicks). Ein Zwang zur telefonischen Kuendigung "
"oder Kuendigung per Brief ist rechtswidrig."
),
))
# --- Impressumspflicht (§5 TMG / §18 MStV) ---
imprint_patterns = [
r'href="[^"]*(?:impressum|imprint|legal.?notice|about.?us/legal)[^"]*"',
r'>impressum<',
]
has_imprint = any(re.search(p, html_lower) for p in imprint_patterns)
if not has_imprint:
findings.append(
"[§5 TMG] Kein Impressum-Link auf der Seite gefunden. "
"Geschaeftsmaessige Online-Dienste muessen ein leicht erreichbares Impressum bereitstellen."
)
# --- Datenschutzerklaerung verlinkt? ---
privacy_patterns = [
r'href="[^"]*(?:datenschutz|privacy|dsgvo)[^"]*"',
r'>datenschutz<',
]
has_privacy = any(re.search(p, html_lower) for p in privacy_patterns)
if not has_privacy:
findings.append(
"[Art. 13 DSGVO] Kein Link zur Datenschutzerklaerung gefunden. "
"Nutzer muessen ueber die Verarbeitung personenbezogener Daten informiert werden."
)
# --- Cookie-Consent-Banner ---
cookie_patterns = [
r'(?:cookie.?consent|cookie.?banner|consent.?manager|didomi|cookiebot|onetrust|usercentrics)',
r'(?:gdpr|dsgvo).?(?:consent|einwilligung)',
]
has_cookie_consent = any(re.search(p, html_lower) for p in cookie_patterns)
if not has_cookie_consent:
follow_ups.append(FollowUpQuestion(
id="cookie_consent",
question="Wird beim ersten Besuch der Website ein Cookie-Consent-Banner angezeigt?",
legal_basis="§ 25 TDDDG (ehem. TTDSG), Art. 5(3) ePrivacy-Richtlinie",
severity="medium",
finding_if_no=(
"[§25 TDDDG] Kein Cookie-Consent-Banner erkannt. "
"Vor dem Setzen nicht-essentieller Cookies ist eine Einwilligung erforderlich."
),
))
return findings, follow_ups
# _check_website_compliance, _to_string_list, _risk_to_escalation
# → extracted to compliance/services/website_compliance_checks.py
def _to_string_list(items: list) -> list[str]:
"""Convert list of dicts or strings to list of strings."""
result = []
for item in (items or []):
if isinstance(item, dict):
# UCCA returns {code, category, description} or {id, name, description}
desc = item.get("description", item.get("name", item.get("code", str(item))))
code = item.get("code", item.get("id", ""))
result.append(f"[{code}] {desc}" if code else str(desc))
else:
result.append(str(item))
return result
DOC_TYPE_LABELS = {
"privacy_policy": "Datenschutzerklaerung",
"cookie_banner": "Cookie-Banner",
"terms_of_service": "AGB",
"imprint": "Impressum",
"dpa": "Auftragsverarbeitung (AVV)",
"other": "Sonstiges",
}
def _risk_to_escalation(risk_level: str) -> str:
"""Map UCCA risk level to escalation level."""
mapping = {
"MINIMAL": "E0",
"LIMITED": "E1",
"HIGH": "E2",
"UNACCEPTABLE": "E3",
}
return mapping.get(risk_level.upper() if risk_level else "", "E0")
RISK_COLORS = {
"MINIMAL": ("#16a34a", "Niedrig"),
"LOW": ("#ca8a04", "Gering"),
"LIMITED": ("#ea580c", "Mittel"),
"HIGH": ("#dc2626", "Hoch"),
"UNACCEPTABLE": ("#991b1b", "Kritisch"),
}
def _build_summary(
@@ -354,48 +264,54 @@ def _build_summary(
findings_str: list[str], controls_str: list[str],
mode: str = "post_launch",
) -> str:
"""Build a German manager summary, adapted to pre/post-launch context."""
"""Build HTML summary for email and frontend."""
risk = assessment.get("risk_level", "unbekannt")
score = assessment.get("risk_score", 0)
recommendation = assessment.get("recommendation", "")
dsfa = assessment.get("dsfa_recommended", False)
is_live = mode == "post_launch"
risk_color, risk_label = RISK_COLORS.get(risk, ("#6b7280", risk))
doc_label = DOC_TYPE_LABELS.get(classification, classification)
findings_text = "\n".join(f"- {f}" for f in findings_str[:5]) if findings_str else "Keine"
controls_text = "\n".join(f"- {c}" for c in controls_str[:5]) if controls_str else "Keine"
mode_header = (
"PRUEFUNG LIVE-WEBSITE — Das Dokument ist bereits oeffentlich zugaenglich."
mode_banner = (
'<div style="background:#fef2f2;border-left:4px solid #dc2626;padding:12px 16px;margin-bottom:16px;">'
'<strong style="color:#991b1b;">LIVE-WEBSITE</strong> — Das Dokument ist bereits oeffentlich zugaenglich.</div>'
if is_live else
"INTERNE PRUEFUNG — Das Dokument ist noch nicht veroeffentlicht."
'<div style="background:#eff6ff;border-left:4px solid #3b82f6;padding:12px 16px;margin-bottom:16px;">'
'<strong style="color:#1e40af;">INTERNE PRUEFUNG</strong> — Dokument noch nicht veroeffentlicht.</div>'
)
parts = [
mode_header,
"",
f"Dokumenttyp: {classification}",
f"Quelle: {url}",
f"Risikobewertung: {risk} ({score}/100)",
f"Zustaendig: {role}",
f"DSFA empfohlen: {'Ja' if dsfa else 'Nein'}",
"",
f"Findings:\n{findings_text}",
"",
f"Erforderliche Massnahmen:\n{controls_text}",
]
findings_html = "".join(f'<li style="margin-bottom:4px;">{f}</li>' for f in findings_str[:8]) if findings_str else '<li style="color:#6b7280;">Keine</li>'
controls_html = "".join(f'<li style="margin-bottom:4px;">{c}</li>' for c in controls_str[:8]) if controls_str else '<li style="color:#6b7280;">Keine</li>'
warning = ""
if is_live and findings_str:
parts.extend([
"",
"ACHTUNG: Diese Maengel sind bereits oeffentlich sichtbar. "
"Sofortige Nachbesserung empfohlen um Abmahnrisiken zu minimieren.",
])
warning = (
'<div style="background:#fef2f2;border:1px solid #fecaca;border-radius:8px;padding:12px 16px;margin-top:16px;">'
'<strong style="color:#dc2626;">⚠ ACHTUNG:</strong> Diese Maengel sind bereits oeffentlich sichtbar. '
'Sofortige Nachbesserung empfohlen um Abmahnrisiken zu minimieren.</div>'
)
elif not is_live and controls_str:
parts.extend([
"",
"Empfehlung: Implementieren Sie die erforderlichen Kontrollen vor der Veroeffentlichung.",
])
warning = (
'<div style="background:#f0fdf4;border:1px solid #bbf7d0;border-radius:8px;padding:12px 16px;margin-top:16px;">'
'Empfehlung: Implementieren Sie die erforderlichen Kontrollen vor der Veroeffentlichung.</div>'
)
if recommendation:
parts.extend(["", f"Weitere Empfehlung: {recommendation}"])
return "\n".join(parts)
rec_html = f'<p style="color:#475569;margin-top:12px;"><em>{recommendation}</em></p>' if recommendation else ""
return f"""
{mode_banner}
<table style="width:100%;border-collapse:collapse;margin-bottom:16px;">
<tr><td style="padding:6px 0;color:#64748b;width:180px;">Dokumenttyp</td><td style="padding:6px 0;font-weight:600;">{doc_label}</td></tr>
<tr><td style="padding:6px 0;color:#64748b;">Quelle</td><td style="padding:6px 0;"><a href="{url}" style="color:#6366f1;">{url}</a></td></tr>
<tr><td style="padding:6px 0;color:#64748b;">Risikobewertung</td><td style="padding:6px 0;"><span style="background:{risk_color};color:white;padding:2px 8px;border-radius:4px;font-size:13px;">{risk_label} ({score}/100)</span></td></tr>
<tr><td style="padding:6px 0;color:#64748b;">Zustaendig</td><td style="padding:6px 0;font-weight:600;">{role}</td></tr>
<tr><td style="padding:6px 0;color:#64748b;">DSFA empfohlen</td><td style="padding:6px 0;">{'Ja' if dsfa else 'Nein'}</td></tr>
</table>
<h3 style="color:#1e293b;font-size:15px;margin:16px 0 8px;">Findings</h3>
<ul style="margin:0;padding-left:20px;color:#334155;">{findings_html}</ul>
<h3 style="color:#1e293b;font-size:15px;margin:16px 0 8px;">Erforderliche Massnahmen</h3>
<ul style="margin:0;padding-left:20px;color:#334155;">{controls_html}</ul>
{warning}
{rec_html}
"""
@@ -0,0 +1,94 @@
"""
Agent Compare Routes — scan multiple websites and compare compliance posture.
POST /api/compliance/agent/compare
"""
import asyncio
import logging
from datetime import datetime, timezone
import httpx
from fastapi import APIRouter
from pydantic import BaseModel
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/compliance/agent", tags=["agent"])
class CompareRequest(BaseModel):
urls: list[str] # 2-5 URLs to compare
mode: str = "post_launch"
class SiteResult(BaseModel):
url: str
domain: str
risk_level: str = ""
risk_score: float = 0
findings_count: int = 0
services_count: int = 0
has_impressum: bool = False
has_datenschutz: bool = False
has_cookie_banner: bool = False
has_google_fonts: bool = False
tracking_before_consent: int = 0
classification: str = ""
scan_status: str = "pending"
class CompareResponse(BaseModel):
sites: list[SiteResult]
compared_at: str
@router.post("/compare", response_model=CompareResponse)
async def compare_websites(req: CompareRequest):
"""Scan multiple websites and compare their compliance posture."""
urls = req.urls[:5] # Max 5
async def scan_one(url: str) -> SiteResult:
domain = url.split("/")[2] if len(url.split("/")) > 2 else url
try:
async with httpx.AsyncClient(timeout=120.0) as client:
resp = await client.post(
"http://localhost:8002/api/compliance/agent/scan",
json={"url": url, "mode": req.mode},
)
if resp.status_code != 200:
return SiteResult(url=url, domain=domain, scan_status="failed")
data = resp.json()
services = data.get("services", [])
findings = data.get("findings", [])
return SiteResult(
url=url,
domain=domain,
risk_level=data.get("risk_level", ""),
risk_score=data.get("risk_score", 0),
findings_count=len(findings),
services_count=len(services),
has_impressum=not any("IMPRESSUM" in f.get("code", "") for f in findings if isinstance(f, dict)),
has_datenschutz=not any("DATENSCHUTZ" in f.get("code", "") for f in findings if isinstance(f, dict)),
has_cookie_banner=data.get("chatbot_detected", False) or any(
s.get("id") == "cmp" for s in services if isinstance(s, dict)
),
has_google_fonts=any(
s.get("id") == "google_fonts" for s in services if isinstance(s, dict)
),
classification=data.get("classification", ""),
scan_status="completed",
)
except Exception as e:
logger.error("Compare scan failed for %s: %s", url, e)
return SiteResult(url=url, domain=domain, scan_status="error")
# Scan all in parallel
results = await asyncio.gather(*[scan_one(u) for u in urls])
return CompareResponse(
sites=list(results),
compared_at=datetime.now(timezone.utc).isoformat(),
)
@@ -0,0 +1,220 @@
"""
Agent History Routes — persist and retrieve scan results.
GET /api/compliance/agent/scans — list recent scans
GET /api/compliance/agent/scans/{id} — get single scan
POST /api/compliance/agent/scans — save a scan result
"""
import json
import logging
import os
import uuid
from datetime import datetime, timezone
from fastapi import APIRouter, Query
from fastapi.responses import Response
from pydantic import BaseModel
from compliance.services.agent_pdf_export import generate_scan_pdf
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/compliance/agent", tags=["agent"])
DATABASE_URL = os.environ.get(
"COMPLIANCE_DATABASE_URL",
os.environ.get("DATABASE_URL", ""),
)
class SaveScanRequest(BaseModel):
url: str
scan_type: str = "scan"
analysis_mode: str = "post_launch"
result: dict # Full scan result JSON
class ScanHistoryItem(BaseModel):
id: str
url: str
scan_type: str
analysis_mode: str
risk_level: str | None = None
risk_score: float = 0
findings_count: int = 0
pages_scanned: int = 0
email_sent: bool = False
created_at: str
class ScanDetail(BaseModel):
id: str
url: str
scan_type: str
analysis_mode: str
result: dict
created_at: str
async def _get_pool():
"""Get or create database connection pool."""
import asyncpg
if not DATABASE_URL:
return None
try:
return await asyncpg.create_pool(DATABASE_URL, min_size=1, max_size=3)
except Exception as e:
logger.warning("DB connection failed: %s", e)
return None
@router.post("/scans")
async def save_scan(req: SaveScanRequest):
"""Save a scan result to the database."""
pool = await _get_pool()
if not pool:
return {"status": "skipped", "reason": "no database"}
scan_id = str(uuid.uuid4())
result = req.result
try:
async with pool.acquire() as conn:
await conn.execute("""
INSERT INTO compliance_agent_scans
(id, url, scan_type, analysis_mode, classification, risk_level,
risk_score, escalation_level, responsible_role, services,
findings, summary_html, pages_scanned, pages_list, email_sent,
created_at)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16)
""",
uuid.UUID(scan_id),
req.url,
req.scan_type,
req.analysis_mode,
result.get("classification", ""),
result.get("risk_level", ""),
result.get("risk_score", 0),
result.get("escalation_level", ""),
result.get("responsible_role", ""),
json.dumps(result.get("services", [])),
json.dumps(result.get("findings", [])),
result.get("summary", result.get("summary_html", "")),
result.get("pages_scanned", 0),
json.dumps(result.get("pages_list", [])),
result.get("email_status") == "sent",
datetime.now(timezone.utc),
)
return {"status": "saved", "id": scan_id}
except Exception as e:
logger.error("Failed to save scan: %s", e)
return {"status": "error", "error": str(e)}
finally:
await pool.close()
@router.get("/scans", response_model=list[ScanHistoryItem])
async def list_scans(
limit: int = Query(20, le=100),
scan_type: str | None = None,
):
"""List recent scans."""
pool = await _get_pool()
if not pool:
return []
try:
async with pool.acquire() as conn:
query = """
SELECT id, url, scan_type, analysis_mode, risk_level, risk_score,
findings, pages_scanned, email_sent, created_at
FROM compliance_agent_scans
"""
params = []
if scan_type:
query += " WHERE scan_type = $1"
params.append(scan_type)
query += " ORDER BY created_at DESC LIMIT " + str(limit)
rows = await conn.fetch(query, *params)
return [
ScanHistoryItem(
id=str(r["id"]),
url=r["url"],
scan_type=r["scan_type"],
analysis_mode=r["analysis_mode"],
risk_level=r["risk_level"],
risk_score=r["risk_score"] or 0,
findings_count=len(json.loads(r["findings"] or "[]")),
pages_scanned=r["pages_scanned"] or 0,
email_sent=r["email_sent"] or False,
created_at=r["created_at"].isoformat() if r["created_at"] else "",
)
for r in rows
]
except Exception as e:
logger.error("Failed to list scans: %s", e)
return []
finally:
await pool.close()
@router.get("/scans/{scan_id}", response_model=ScanDetail)
async def get_scan(scan_id: str):
"""Get a single scan result."""
pool = await _get_pool()
if not pool:
return ScanDetail(id=scan_id, url="", scan_type="", analysis_mode="", result={}, created_at="")
try:
async with pool.acquire() as conn:
row = await conn.fetchrow("""
SELECT * FROM compliance_agent_scans WHERE id = $1
""", uuid.UUID(scan_id))
if not row:
return ScanDetail(id=scan_id, url="", scan_type="", analysis_mode="", result={}, created_at="")
return ScanDetail(
id=str(row["id"]),
url=row["url"],
scan_type=row["scan_type"],
analysis_mode=row["analysis_mode"],
result={
"classification": row["classification"],
"risk_level": row["risk_level"],
"risk_score": row["risk_score"],
"services": json.loads(row["services"] or "[]"),
"findings": json.loads(row["findings"] or "[]"),
"summary": row["summary_html"],
"pages_scanned": row["pages_scanned"],
"pages_list": json.loads(row["pages_list"] or "[]"),
},
created_at=row["created_at"].isoformat() if row["created_at"] else "",
)
except Exception as e:
logger.error("Failed to get scan: %s", e)
return ScanDetail(id=scan_id, url="", scan_type="", analysis_mode="", result={}, created_at="")
finally:
await pool.close()
@router.post("/scans/pdf")
async def export_scan_pdf(req: SaveScanRequest):
"""Generate a PDF report from scan results (no DB required)."""
try:
pdf_bytes = generate_scan_pdf({
"url": req.url,
"scan_type": req.scan_type,
"analysis_mode": req.analysis_mode,
**req.result,
})
return Response(
content=pdf_bytes,
media_type="application/pdf",
headers={"Content-Disposition": f'attachment; filename="compliance-report-{req.url.split("/")[2][:30]}.pdf"'},
)
except Exception as e:
logger.error("PDF generation failed: %s", e)
return {"error": str(e)}
@@ -0,0 +1,111 @@
"""
Agent Recurring Scan Routes — schedule and run automated periodic scans.
POST /api/compliance/agent/monitored-urls — add URL to monitoring
GET /api/compliance/agent/monitored-urls — list monitored URLs
POST /api/compliance/agent/run-scheduled — trigger all scheduled scans
"""
import json
import logging
import os
import uuid
from datetime import datetime, timezone
from fastapi import APIRouter
from pydantic import BaseModel
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/compliance/agent", tags=["agent"])
DATABASE_URL = os.environ.get(
"COMPLIANCE_DATABASE_URL",
os.environ.get("DATABASE_URL", ""),
)
# In-memory fallback when no DB available
_monitored_urls: list[dict] = []
class MonitoredURL(BaseModel):
url: str
scan_type: str = "scan" # scan, consent_test
frequency: str = "weekly" # daily, weekly, monthly
recipient: str = "dsb@breakpilot.local"
enabled: bool = True
@router.post("/monitored-urls")
async def add_monitored_url(req: MonitoredURL):
"""Add a URL to the monitoring list."""
entry = {
"id": str(uuid.uuid4()),
"url": req.url,
"scan_type": req.scan_type,
"frequency": req.frequency,
"recipient": req.recipient,
"enabled": req.enabled,
"created_at": datetime.now(timezone.utc).isoformat(),
"last_scan_at": None,
}
_monitored_urls.append(entry)
logger.info("Added monitored URL: %s (%s)", req.url, req.frequency)
return {"status": "added", **entry}
@router.get("/monitored-urls")
async def list_monitored_urls():
"""List all monitored URLs."""
return {"urls": _monitored_urls}
@router.delete("/monitored-urls/{url_id}")
async def remove_monitored_url(url_id: str):
"""Remove a URL from monitoring."""
global _monitored_urls
_monitored_urls = [u for u in _monitored_urls if u["id"] != url_id]
return {"status": "removed"}
@router.post("/run-scheduled")
async def run_scheduled_scans():
"""Trigger all enabled scheduled scans. Called by cron/ZeroClaw."""
import httpx
results = []
backend_url = "http://localhost:8002"
for entry in _monitored_urls:
if not entry["enabled"]:
continue
url = entry["url"]
scan_type = entry["scan_type"]
logger.info("Running scheduled %s for %s", scan_type, url)
try:
async with httpx.AsyncClient(timeout=300.0) as client:
if scan_type == "consent_test":
resp = await client.post(
"http://bp-compliance-consent-tester:8094/scan",
json={"url": url},
)
else:
resp = await client.post(
f"{backend_url}/api/compliance/agent/scan",
json={"url": url, "mode": "post_launch", "recipient": entry["recipient"]},
)
entry["last_scan_at"] = datetime.now(timezone.utc).isoformat()
results.append({
"url": url,
"scan_type": scan_type,
"status": "completed" if resp.status_code == 200 else "failed",
"status_code": resp.status_code,
})
except Exception as e:
logger.error("Scheduled scan failed for %s: %s", url, e)
results.append({"url": url, "scan_type": scan_type, "status": "error", "error": str(e)})
return {"scans_triggered": len(results), "results": results}
@@ -73,6 +73,7 @@ def build_scan_summary(
f"Findings: {n_findings} ({high} mit hoher Prioritaet)",
])
<<<<<<< HEAD
# DSI Documents section — grouped with their findings
if discovered_docs:
parts.extend(["", f"Rechtliche Dokumente ({len(discovered_docs)})"])
@@ -108,6 +109,27 @@ def build_scan_summary(
marker = "!!" if sev == "HIGH" else "!" if sev == "MEDIUM" else "i"
parts.append(f" [{marker}] {txt}")
elif findings:
=======
# DSI Documents section
if discovered_docs:
parts.extend([
"",
f"Rechtliche Dokumente gefunden: {len(discovered_docs)}",
])
for doc in discovered_docs:
pct = doc.completeness_pct if hasattr(doc, 'completeness_pct') else 0
fc = doc.findings_count if hasattr(doc, 'findings_count') else 0
wc = doc.word_count if hasattr(doc, 'word_count') else 0
status = "OK" if pct >= 80 else "LUECKENHAFT" if pct >= 50 else "MANGELHAFT"
dt = doc.doc_type if hasattr(doc, 'doc_type') else "unknown"
title = doc.title if hasattr(doc, 'title') else "?"
parts.append(
f" [{status}] {title} ({dt}, {wc} Woerter, "
f"{pct}% vollstaendig, {fc} Maengel)"
)
if findings:
>>>>>>> feat/zeroclaw-compliance-agent
parts.append("")
for f in findings[:20]:
sev = f.severity if hasattr(f, 'severity') else "?"
@@ -123,6 +145,7 @@ def build_scan_summary(
])
return "\n".join(parts)
<<<<<<< HEAD
async def fetch_dse_text(url: str, scanned_pages: list[str]) -> str:
@@ -161,3 +184,5 @@ async def fetch_dse_html(url: str, scanned_pages: list[str]) -> str:
return resp.text
except Exception:
return ""
=======
>>>>>>> feat/zeroclaw-compliance-agent
@@ -23,9 +23,13 @@ from compliance.services.mandatory_content_checker import (
check_mandatory_documents, check_dse_mandatory_content, MandatoryFinding,
)
from compliance.services.legal_basis_validator import validate_legal_bases
<<<<<<< HEAD
from compliance.api.agent_scan_helpers import (
add_corrections, build_scan_summary, fetch_dse_text, fetch_dse_html,
)
=======
from compliance.api.agent_scan_helpers import add_corrections, build_scan_summary
>>>>>>> feat/zeroclaw-compliance-agent
logger = logging.getLogger(__name__)
@@ -79,7 +83,10 @@ class ScanFinding(BaseModel):
severity: str
text: str
correction: str = ""
<<<<<<< HEAD
doc_title: str = ""
=======
>>>>>>> feat/zeroclaw-compliance-agent
text_reference: TextReferenceModel | None = None
@@ -219,17 +226,69 @@ async def _execute_scan(req: ScanRequest, scan_id: str = "") -> ScanResponse:
else:
scan = await scan_website(req.url)
<<<<<<< HEAD
logger.info("Scanned %d pages, found %d services", len(scan.pages_scanned), len(scan.detected_services))
_progress(f"Schritt 2/7: Rechtliche Dokumente suchen... ({len(scan.pages_scanned)} Seiten gescannt)")
=======
# Step 1: Scan website — try Playwright first (JS-rendered), fallback to httpx
playwright_htmls: dict[str, str] = {}
try:
async with httpx.AsyncClient(timeout=120.0) as pw_client:
pw_resp = await pw_client.post(
"http://bp-compliance-consent-tester:8094/website-scan",
json={"url": req.url, "max_pages": 15, "click_nav": True},
)
if pw_resp.status_code == 200:
pw_data = pw_resp.json()
playwright_htmls = pw_data.get("page_htmls", {})
logger.info("Playwright scan: %d pages, %d scripts",
pw_data.get("pages_count", 0), len(pw_data.get("external_scripts", [])))
except Exception as e:
logger.warning("Playwright scanner unavailable, falling back to httpx: %s", e)
# Use Playwright results if available, otherwise fall back to httpx scanner
if playwright_htmls:
# Build ScanResult from Playwright data
from compliance.services.website_scanner import ScanResult, DetectedService, _detect_services, _detect_ai_mentions
from compliance.services.service_registry import SERVICE_REGISTRY
scan = ScanResult()
scan.pages_scanned = list(playwright_htmls.keys())
for page_url, html in playwright_htmls.items():
_detect_services(html, page_url, scan)
_detect_ai_mentions(html, page_url, scan)
# Deduplicate
seen = set()
unique = []
for svc in scan.detected_services:
if svc.id not in seen:
seen.add(svc.id)
unique.append(svc)
scan.detected_services = unique
scan.chatbot_detected = any(s.category == "chatbot" for s in scan.detected_services)
if scan.chatbot_detected:
scan.chatbot_provider = next(s.name for s in scan.detected_services if s.category == "chatbot")
else:
scan = await scan_website(req.url)
logger.info("Scanned %d pages, found %d services", len(scan.pages_scanned), len(scan.detected_services))
>>>>>>> feat/zeroclaw-compliance-agent
# Step 1b: DSI Discovery — find all legal documents on the website
discovered_docs: list[DiscoveredDocument] = []
dsi_findings: list[ScanFinding] = []
try:
<<<<<<< HEAD
async with httpx.AsyncClient(timeout=300.0) as dsi_client:
dsi_resp = await dsi_client.post(
"http://bp-compliance-consent-tester:8094/dsi-discovery",
json={"url": req.url, "max_documents": 30},
=======
async with httpx.AsyncClient(timeout=180.0) as dsi_client:
dsi_resp = await dsi_client.post(
"http://bp-compliance-consent-tester:8094/dsi-discovery",
json={"url": req.url, "max_documents": 20},
>>>>>>> feat/zeroclaw-compliance-agent
)
if dsi_resp.status_code == 200:
dsi_data = dsi_resp.json()
@@ -241,12 +300,17 @@ async def _execute_scan(req: ScanRequest, scan_id: str = "") -> ScanResponse:
)
for doc in dsi_data.get("documents", []):
doc_type = classify_document_type(doc["title"], doc["url"])
<<<<<<< HEAD
doc_text = doc.get("full_text", "") or doc.get("text_preview", "")
logger.info("DSI check: '%s' type=%s text_len=%d full_text_len=%d preview_len=%d",
doc["title"][:50], doc_type, len(doc_text),
len(doc.get("full_text", "")), len(doc.get("text_preview", "")))
doc_findings = check_document_completeness(
doc_text, doc_type, doc["title"], doc["url"],
=======
doc_findings = check_document_completeness(
doc.get("text_preview", ""), doc_type, doc["title"], doc["url"],
>>>>>>> feat/zeroclaw-compliance-agent
)
# Count completeness
score_finding = next((f for f in doc_findings if "SCORE" in f.get("code", "")), None)
@@ -268,6 +332,7 @@ async def _execute_scan(req: ScanRequest, scan_id: str = "") -> ScanResponse:
if "SCORE" not in df.get("code", ""):
dsi_findings.append(ScanFinding(
code=df["code"], severity=df["severity"], text=df["text"],
<<<<<<< HEAD
doc_title=doc["title"],
))
except Exception as e:
@@ -296,6 +361,24 @@ async def _execute_scan(req: ScanRequest, scan_id: str = "") -> ScanResponse:
pass
if not dse_text:
dse_text = await fetch_dse_text(req.url, scan.pages_scanned)
=======
))
except Exception as e:
logger.warning("DSI discovery failed: %s", e)
# Step 2: Fetch privacy policy text (from Playwright HTMLs or httpx)
dse_text = ""
for page_url, html in playwright_htmls.items():
if re.search(r"datenschutz|privacy|dsgvo", page_url, re.IGNORECASE):
import re as _re
clean = _re.sub(r"<(script|style)[^>]*>.*?</\1>", "", html, flags=_re.DOTALL | _re.IGNORECASE)
clean = _re.sub(r"<[^>]+>", " ", clean)
clean = _re.sub(r"\s+", " ", clean).strip()
dse_text = clean[:4000]
break
if not dse_text:
dse_text = await _fetch_dse_text(req.url, scan.pages_scanned)
>>>>>>> feat/zeroclaw-compliance-agent
# Step 3: Extract services mentioned in DSE via LLM + text fallback
dse_services = await extract_dse_services(dse_text) if dse_text else []
@@ -320,11 +403,18 @@ async def _execute_scan(req: ScanRequest, scan_id: str = "") -> ScanResponse:
dse_html = html
break
if not dse_html:
<<<<<<< HEAD
dse_html = await fetch_dse_html(req.url, scan.pages_scanned)
dse_sections = parse_dse(dse_html, req.url) if dse_html else []
logger.info("Parsed %d DSE sections", len(dse_sections))
_progress("Schritt 4/7: SOLL/IST Vergleich...")
=======
dse_html = await _fetch_dse_html(req.url, scan.pages_scanned)
dse_sections = parse_dse(dse_html, req.url) if dse_html else []
logger.info("Parsed %d DSE sections", len(dse_sections))
>>>>>>> feat/zeroclaw-compliance-agent
# Step 5: SOLL/IST comparison
detected_dicts = [_service_to_dict(s) for s in scan.detected_services]
comparison = compare_services(detected_dicts, dse_services)
@@ -363,7 +453,10 @@ async def _execute_scan(req: ScanRequest, scan_id: str = "") -> ScanResponse:
# Step 8c: Add DSI document findings
findings.extend(dsi_findings)
<<<<<<< HEAD
_progress(f"Schritt 5/7: Korrekturen generieren... ({len(findings)} Findings)")
=======
>>>>>>> feat/zeroclaw-compliance-agent
# Step 9: Generate corrections for pre-launch mode
if not is_live and findings:
await add_corrections(findings, dse_text)
@@ -400,6 +493,24 @@ async def _execute_scan(req: ScanRequest, scan_id: str = "") -> ScanResponse:
async def _fetch_dse_html(url: str, scanned_pages: list[str]) -> str:
"""Fetch the raw HTML of the privacy policy page (for structured parsing)."""
import re
dse_url = None
for page in scanned_pages:
if re.search(r"datenschutz|privacy|dsgvo", page, re.IGNORECASE):
dse_url = page
break
if not dse_url:
dse_url = url
try:
async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client:
resp = await client.get(dse_url, headers={"User-Agent": "BreakPilot-Compliance-Agent/1.0"})
return resp.text
except Exception:
return ""
def _service_to_dict(svc: DetectedService) -> dict:
return {
"id": svc.id, "name": svc.name, "category": svc.category,
@@ -0,0 +1,120 @@
"""
FastAPI routes for Banner A/B Testing.
Endpoints:
GET /banner/ab/{site_config_id}/variants — list variants
POST /banner/ab/{site_config_id}/variants — create variant
PUT /banner/ab/variants/{variant_id} — update variant
DELETE /banner/ab/variants/{variant_id} — delete variant
GET /banner/ab/{site_config_id}/stats — per-variant stats
GET /banner/ab/assign — assign variant for device
"""
import logging
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel
from sqlalchemy.orm import Session
from classroom_engine.database import get_db
from .tenant_utils import get_tenant_id as _get_tenant_id
from compliance.services.banner_ab_service import BannerABService
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/banner/ab", tags=["banner-ab-testing"])
class VariantCreate(BaseModel):
variant_name: str
variant_key: str = "A"
traffic_percent: int = 50
is_control: bool = False
banner_title: Optional[str] = None
banner_description: Optional[str] = None
position: Optional[str] = None
style: Optional[str] = None
primary_color: Optional[str] = None
show_decline_all: Optional[bool] = None
theme_overrides: Optional[dict] = None
class VariantUpdate(BaseModel):
variant_name: Optional[str] = None
traffic_percent: Optional[int] = None
is_control: Optional[bool] = None
banner_title: Optional[str] = None
banner_description: Optional[str] = None
position: Optional[str] = None
style: Optional[str] = None
primary_color: Optional[str] = None
show_decline_all: Optional[bool] = None
is_active: Optional[bool] = None
@router.get("/{site_config_id}/variants")
def list_variants(
site_config_id: str,
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
service = BannerABService(db)
return service.list_variants(tenant_id, site_config_id)
@router.post("/{site_config_id}/variants")
def create_variant(
site_config_id: str,
body: VariantCreate,
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
service = BannerABService(db)
return service.create_variant(tenant_id, site_config_id, body.model_dump())
@router.put("/variants/{variant_id}")
def update_variant(
variant_id: str,
body: VariantUpdate,
db: Session = Depends(get_db),
):
service = BannerABService(db)
result = service.update_variant(variant_id, body.model_dump(exclude_none=True))
if not result:
raise HTTPException(404, "Variant not found")
return result
@router.delete("/variants/{variant_id}")
def delete_variant(
variant_id: str,
db: Session = Depends(get_db),
):
service = BannerABService(db)
if not service.delete_variant(variant_id):
raise HTTPException(404, "Variant not found")
return {"deleted": True}
@router.get("/{site_config_id}/stats")
def variant_stats(
site_config_id: str,
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
service = BannerABService(db)
return service.get_variant_stats(tenant_id, site_config_id)
@router.get("/assign")
def assign_variant(
site_config_id: str = Query(...),
device_fingerprint: str = Query(...),
db: Session = Depends(get_db),
):
service = BannerABService(db)
variant = service.assign_variant(site_config_id, device_fingerprint)
if not variant:
return {"variant": None, "message": "No active A/B test"}
return {"variant": variant}
@@ -0,0 +1,67 @@
"""
FastAPI routes for Banner Consent Analytics.
Endpoints:
GET /banner/analytics/{site_id}/overview — high-level stats
GET /banner/analytics/{site_id}/time-series — opt-in rate over time
GET /banner/analytics/{site_id}/categories — acceptance per category
GET /banner/analytics/{site_id}/devices — mobile/desktop/tablet breakdown
"""
import logging
from typing import Optional
from fastapi import APIRouter, Depends, Query
from sqlalchemy.orm import Session
from classroom_engine.database import get_db
from .tenant_utils import get_tenant_id as _get_tenant_id
from compliance.services.banner_analytics_service import BannerAnalyticsService
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/banner/analytics", tags=["banner-analytics"])
@router.get("/{site_id}/overview")
def analytics_overview(
site_id: str,
days: int = Query(30, le=365),
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
service = BannerAnalyticsService(db)
return service.get_overview_stats(tenant_id, site_id, days)
@router.get("/{site_id}/time-series")
def analytics_time_series(
site_id: str,
period: str = Query("daily"),
days: int = Query(30, le=365),
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
service = BannerAnalyticsService(db)
return service.get_time_series(tenant_id, site_id, period, days)
@router.get("/{site_id}/categories")
def analytics_categories(
site_id: str,
days: int = Query(30, le=365),
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
service = BannerAnalyticsService(db)
return service.get_category_breakdown(tenant_id, site_id, days)
@router.get("/{site_id}/devices")
def analytics_devices(
site_id: str,
days: int = Query(30, le=365),
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
service = BannerAnalyticsService(db)
return service.get_device_breakdown(tenant_id, site_id, days)
@@ -74,6 +74,7 @@ async def record_consent(
device_fingerprint=body.device_fingerprint,
categories=body.categories,
vendors=body.vendors,
vendor_consents=body.vendor_consents,
ip_address=body.ip_address,
user_agent=body.user_agent,
consent_string=body.consent_string,
@@ -0,0 +1,38 @@
"""
FastAPI route for Compliance Report PDF generation.
Endpoint:
GET /compliance/report/pdf — generate comprehensive compliance report as PDF
"""
import logging
from typing import Optional
from fastapi import APIRouter, Depends, Query
from fastapi.responses import StreamingResponse
from sqlalchemy.orm import Session
import io
from classroom_engine.database import get_db
from .tenant_utils import get_tenant_id as _get_tenant_id
from compliance.services.compliance_pdf_generator import CompliancePDFGenerator
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/compliance/report", tags=["compliance-report"])
@router.get("/pdf")
def generate_compliance_report_pdf(
project_id: Optional[str] = Query(None),
language: str = Query("de"),
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
"""Generate a comprehensive compliance PDF report for a project."""
generator = CompliancePDFGenerator(db)
pdf_bytes, filename = generator.generate(tenant_id, project_id, language)
return StreamingResponse(
io.BytesIO(pdf_bytes),
media_type="application/pdf",
headers={"Content-Disposition": f'attachment; filename="{filename}"'},
)
@@ -0,0 +1,380 @@
"""
FastAPI routes for Document Review Workflow.
Tracks which compliance documents have been sent for review, their status,
and handles email notifications to reviewers.
Endpoints:
GET /document-reviews list reviews with filters
GET /document-reviews/stats counts by status
POST /document-reviews create review (auto-assign from mapping)
GET /document-reviews/{id} single review
POST /document-reviews/{id}/send send notification email
POST /document-reviews/{id}/approve mark as approved
POST /document-reviews/{id}/reject mark as rejected
GET /document-reviews/for-document reviews for a specific doc type
"""
import hashlib
import logging
from datetime import datetime
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel
from sqlalchemy import text
from sqlalchemy.orm import Session
from classroom_engine.database import get_db
from .tenant_utils import get_tenant_id as _get_tenant_id
from .db_utils import row_to_dict as _row_to_dict
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/document-reviews", tags=["document-reviews"])
# =============================================================================
# Schemas
# =============================================================================
class ReviewCreate(BaseModel):
document_type: str
document_title: str
document_content: Optional[str] = None
project_id: Optional[str] = None
submitted_by: Optional[str] = None
review_link: Optional[str] = None
class ReviewReject(BaseModel):
comment: str
# =============================================================================
# Routes
# =============================================================================
@router.get("")
def list_reviews(
project_id: Optional[str] = Query(None),
status: Optional[str] = Query(None),
document_type: Optional[str] = Query(None),
reviewer_role_key: Optional[str] = Query(None),
limit: int = Query(50, le=200),
offset: int = Query(0),
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
where = ["tenant_id = :tid"]
params = {"tid": tenant_id, "lim": limit, "off": offset}
if project_id:
where.append("project_id = :pid")
params["pid"] = project_id
if status:
where.append("status = :status")
params["status"] = status
if document_type:
where.append("document_type = :dt")
params["dt"] = document_type
if reviewer_role_key:
where.append("reviewer_role_key = :rrk")
params["rrk"] = reviewer_role_key
q = text(f"""
SELECT * FROM compliance_document_reviews
WHERE {' AND '.join(where)}
ORDER BY created_at DESC LIMIT :lim OFFSET :off
""")
rows = db.execute(q, params).fetchall()
return [_row_to_dict(r) for r in rows]
@router.get("/stats")
def review_stats(
project_id: Optional[str] = Query(None),
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
where = "tenant_id = :tid"
params = {"tid": tenant_id}
if project_id:
where += " AND project_id = :pid"
params["pid"] = project_id
q = text(f"SELECT status, COUNT(*) as count FROM compliance_document_reviews WHERE {where} GROUP BY status")
rows = db.execute(q, params).fetchall()
return {r.status: r.count for r in rows}
@router.get("/for-document")
def reviews_for_document(
document_type: str = Query(...),
project_id: Optional[str] = Query(None),
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
where = "tenant_id = :tid AND document_type = :dt"
params = {"tid": tenant_id, "dt": document_type}
if project_id:
where += " AND project_id = :pid"
params["pid"] = project_id
q = text(f"SELECT * FROM compliance_document_reviews WHERE {where} ORDER BY created_at DESC LIMIT 10")
rows = db.execute(q, params).fetchall()
return [_row_to_dict(r) for r in rows]
@router.post("")
def create_review(
body: ReviewCreate,
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
# Find reviewer(s) from mapping + org_roles
q = text("""
SELECT m.role_key, m.is_primary, r.person_name, r.person_email, r.role_label
FROM compliance_document_role_mapping m
LEFT JOIN compliance_org_roles r
ON r.tenant_id = m.tenant_id AND r.role_key = m.role_key
AND (r.project_id = :pid OR r.project_id IS NULL)
WHERE m.tenant_id = :tid AND m.document_type = :dt
ORDER BY m.is_primary DESC
""")
mappings = db.execute(q, {"tid": tenant_id, "dt": body.document_type, "pid": body.project_id}).fetchall()
if not mappings:
raise HTTPException(404, f"No reviewer mapping found for document type '{body.document_type}'")
content_hash = hashlib.sha256(body.document_content.encode()).hexdigest() if body.document_content else None
created = []
for m in mappings:
m_dict = _row_to_dict(m)
ins = text("""
INSERT INTO compliance_document_reviews
(tenant_id, project_id, document_type, document_title, document_content_hash,
reviewer_role_key, reviewer_name, reviewer_email, submitted_by, review_link, submitted_at)
VALUES (:tid, :pid, :dt, :title, :hash, :rrk, :rn, :re, :sb, :rl, NOW())
RETURNING *
""")
row = db.execute(ins, {
"tid": tenant_id, "pid": body.project_id, "dt": body.document_type,
"title": body.document_title, "hash": content_hash,
"rrk": m_dict["role_key"], "rn": m_dict.get("person_name"),
"re": m_dict.get("person_email"), "sb": body.submitted_by,
"rl": body.review_link,
}).fetchone()
created.append(_row_to_dict(row))
db.commit()
return created
@router.get("/{review_id}")
def get_review(
review_id: str,
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
q = text("SELECT * FROM compliance_document_reviews WHERE id = :rid AND tenant_id = :tid")
row = db.execute(q, {"rid": review_id, "tid": tenant_id}).fetchone()
if not row:
raise HTTPException(404, "Review not found")
return _row_to_dict(row)
@router.post("/{review_id}/send")
def send_notification(
review_id: str,
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
q = text("SELECT * FROM compliance_document_reviews WHERE id = :rid AND tenant_id = :tid")
row = db.execute(q, {"rid": review_id, "tid": tenant_id}).fetchone()
if not row:
raise HTTPException(404, "Review not found")
review = _row_to_dict(row)
if not review.get("reviewer_email"):
raise HTTPException(400, "No email for reviewer — assign a person to this role first")
try:
from compliance.services.smtp_sender import send_email
result = send_email(
recipient=review["reviewer_email"],
subject=f"[BreakPilot] Dokument zur Pruefung: {review['document_title']}",
body_html=f"""
<h2>Dokument zur Pruefung</h2>
<p>Sehr geehrte/r <strong>{review.get('reviewer_name') or 'Pruefer/in'}</strong>,</p>
<p>das folgende Dokument wurde Ihnen zur inhaltlichen Pruefung zugewiesen:</p>
<table style="border-collapse:collapse;margin:16px 0;">
<tr><td style="padding:4px 12px 4px 0;font-weight:bold;">Dokument:</td>
<td>{review['document_title']}</td></tr>
<tr><td style="padding:4px 12px 4px 0;font-weight:bold;">Typ:</td>
<td>{review['document_type']}</td></tr>
<tr><td style="padding:4px 12px 4px 0;font-weight:bold;">Eingereicht von:</td>
<td>{review.get('submitted_by') or 'System'}</td></tr>
</table>
<p>Bitte pruefen Sie das Dokument auf <strong>inhaltliche Richtigkeit</strong>,
<strong>Vollstaendigkeit</strong> und <strong>Umsetzbarkeit</strong>.</p>
{f'<p><a href="{review["review_link"]}" style="background:#7c3aed;color:white;padding:10px 20px;border-radius:6px;text-decoration:none;">Dokument oeffnen</a></p>' if review.get("review_link") else ''}
<p style="color:#888;font-size:12px;">BreakPilot Compliance SDK</p>
""",
)
# Update review status
db.execute(text("""
UPDATE compliance_document_reviews
SET status = 'in_review', email_sent = TRUE, email_sent_at = NOW(), updated_at = NOW()
WHERE id = :rid
"""), {"rid": review_id})
db.commit()
return {"sent": True, "email": review["reviewer_email"], "result": result}
except Exception as e:
logger.error("Failed to send review email: %s", e)
raise HTTPException(500, f"Email sending failed: {e}")
@router.post("/{review_id}/approve")
def approve_review(
review_id: str,
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
q = text("""
UPDATE compliance_document_reviews
SET status = 'approved', reviewed_at = NOW(), updated_at = NOW()
WHERE id = :rid AND tenant_id = :tid
RETURNING *
""")
row = db.execute(q, {"rid": review_id, "tid": tenant_id}).fetchone()
if not row:
raise HTTPException(404, "Review not found")
db.commit()
review = _row_to_dict(row)
# Notify all OTHER roles mapped to this document type about the approval
_notify_approval(db, tenant_id, review)
# Check training gaps
training_info = {"training_gaps": 0, "academy_available": False}
try:
from compliance.services.training_link_service import TrainingLinkService
tls = TrainingLinkService(db)
gaps = tls.check_training_gaps(tenant_id, review["document_type"], review.get("project_id"))
training_info = {"training_gaps": gaps.get("total_gaps", 0), "academy_available": gaps.get("academy_available", False)}
# Send training notification emails for each gap
if gaps.get("gaps"):
_notify_training_gaps(gaps["gaps"], review)
except Exception as e:
logger.warning("Training gap check failed (non-blocking): %s", e)
review["training"] = training_info
return review
def _notify_approval(db: Session, tenant_id: str, review: dict):
"""Send approval notification to all other roles mapped to this document type."""
try:
from compliance.services.smtp_sender import send_email
q = text("""
SELECT DISTINCT r.person_name, r.person_email, r.role_label
FROM compliance_document_role_mapping m
JOIN compliance_org_roles r
ON r.tenant_id = m.tenant_id AND r.role_key = m.role_key
AND (r.project_id = :pid OR r.project_id IS NULL)
WHERE m.tenant_id = :tid AND m.document_type = :dt
AND m.role_key != :reviewer_key AND r.person_email IS NOT NULL
""")
others = db.execute(q, {
"tid": tenant_id, "dt": review["document_type"],
"pid": review.get("project_id"), "reviewer_key": review["reviewer_role_key"],
}).fetchall()
for other in others:
o = _row_to_dict(other)
send_email(
recipient=o["person_email"],
subject=f"[BreakPilot] Freigabe: {review['document_title']}",
body_html=f"""
<h2>Dokument freigegeben</h2>
<p>Sehr geehrte/r <strong>{o.get('person_name') or o['role_label']}</strong>,</p>
<p>das Dokument <strong>{review['document_title']}</strong> wurde von
{review.get('reviewer_name') or review['reviewer_role_key']} freigegeben.</p>
<p>Bitte pruefen Sie, ob fuer Ihren Verantwortungsbereich Handlungsbedarf besteht
(z.B. Schulungsbedarf, Prozessanpassungen).</p>
<p style="color:#888;font-size:12px;">BreakPilot Compliance SDK</p>
""",
)
logger.info("Notified %d other roles about approval of %s", len(others), review["document_title"])
except Exception as e:
logger.warning("Approval notification failed (non-blocking): %s", e)
def _notify_training_gaps(gaps: list[dict], review: dict):
"""Send training requirement emails to persons with outstanding modules."""
try:
from compliance.services.smtp_sender import send_email
for gap in gaps:
if not gap.get("person_email"):
continue
send_email(
recipient=gap["person_email"],
subject=f"[BreakPilot] Schulungsbedarf: {gap['module_title']}",
body_html=f"""
<h2>Schulungsbedarf nach Dokument-Freigabe</h2>
<p>Sehr geehrte/r <strong>{gap['person_name']}</strong>,</p>
<p>nach Freigabe des Dokuments <strong>{review['document_title']}</strong>
ist fuer Ihre Rolle (<strong>{gap['role']}</strong>) eine Schulung erforderlich:</p>
<p><strong>{gap['module_title']}</strong> ({gap['module_code']})</p>
<p>Status: {gap['status']}</p>
<p><a href="/sdk/training/learner" style="background:#7c3aed;color:white;padding:10px 20px;border-radius:6px;text-decoration:none;">Zur Academy</a></p>
<p style="color:#888;font-size:12px;">BreakPilot Compliance SDK</p>
""",
)
logger.info("Sent %d training gap notifications for %s", len(gaps), review["document_title"])
except Exception as e:
logger.warning("Training notification failed (non-blocking): %s", e)
@router.post("/{review_id}/reject")
def reject_review(
review_id: str,
body: ReviewReject,
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
q = text("""
UPDATE compliance_document_reviews
SET status = 'rejected', reviewed_at = NOW(), review_comment = :comment, updated_at = NOW()
WHERE id = :rid AND tenant_id = :tid
RETURNING *
""")
row = db.execute(q, {"rid": review_id, "tid": tenant_id, "comment": body.comment}).fetchone()
if not row:
raise HTTPException(404, "Review not found")
db.commit()
return _row_to_dict(row)
# =============================================================================
# Training Integration
# =============================================================================
@router.get("/training-requirements")
def get_training_requirements(
document_type: str = Query(...),
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
from compliance.services.training_link_service import TrainingLinkService
service = TrainingLinkService(db)
return service.get_training_requirements(tenant_id, document_type)
@router.get("/training-gaps")
def get_training_gaps(
document_type: str = Query(...),
project_id: Optional[str] = Query(None),
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
from compliance.services.training_link_service import TrainingLinkService
service = TrainingLinkService(db)
return service.check_training_gaps(tenant_id, document_type, project_id)
@@ -243,6 +243,19 @@ async def change_status(
return svc.change_status(dsr_id, body, tenant_id)
@router.post("/{dsr_id}/reject-art11")
async def reject_art11(
dsr_id: str,
notes: str = Query(""),
tenant_id: str = Depends(_get_tenant),
db: Session = Depends(get_db),
):
"""Reject DSR under Art. 11 DSGVO — data subject not identifiable."""
from compliance.services.dsr_art11_service import DSRArt11Service
with translate_domain_errors():
return DSRArt11Service(db).reject_not_identifiable(dsr_id, tenant_id, notes)
@router.post("/{dsr_id}/verify-identity")
async def verify_identity(
dsr_id: str,
@@ -367,3 +380,42 @@ async def update_exception_check(
):
with translate_domain_errors():
return svc.update_exception_check(dsr_id, check_id, body, tenant_id)
# =============================================================================
# User Data Export (Art. 15 / Art. 20)
# =============================================================================
@router.get("/{dsr_id}/export-user-data")
async def export_user_data(
dsr_id: str,
format: str = Query("json"),
tenant_id: str = Depends(_get_tenant),
svc: DSRService = Depends(_dsr_svc),
db: Session = Depends(get_db),
):
"""Export all CMP data about the data subject as JSON, CSV, or PDF."""
import io
from compliance.services.dsr_export_service import DSRExportService
with translate_domain_errors():
dsr = svc.get(dsr_id, tenant_id)
email = dsr.get("requester_email")
if not email:
from fastapi import HTTPException
raise HTTPException(400, "DSR has no requester email")
export_svc = DSRExportService(db)
if format == "pdf":
content, filename = export_svc.export_pdf(tenant_id, email)
return StreamingResponse(io.BytesIO(content), media_type="application/pdf",
headers={"Content-Disposition": f'attachment; filename="{filename}"'})
elif format == "csv":
content, filename = export_svc.export_csv(tenant_id, email)
return StreamingResponse(io.BytesIO(content), media_type="text/csv",
headers={"Content-Disposition": f'attachment; filename="{filename}"'})
else:
content, filename = export_svc.export_json(tenant_id, email)
return StreamingResponse(io.BytesIO(content), media_type="application/json",
headers={"Content-Disposition": f'attachment; filename="{filename}"'})
@@ -0,0 +1,255 @@
"""
FastAPI routes for Organizational Compliance Roles.
Manages the 7 standard compliance roles (DSB, GF, IT-Leiter, etc.)
and the document-to-role mapping that determines who reviews which documents.
Endpoints:
GET /org-roles list roles for tenant/project
POST /org-roles create/upsert a role
PUT /org-roles/{id} update role details
DELETE /org-roles/{id} remove a role
GET /org-roles/defaults 7 standard role definitions
POST /org-roles/seed seed default roles for a project
POST /org-roles/{id}/send-test send test email to role
GET /org-roles/mapping document-to-role mapping
PUT /org-roles/mapping update mapping
"""
import logging
from typing import Optional, List
from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel
from sqlalchemy import text
from sqlalchemy.orm import Session
from classroom_engine.database import get_db
from .tenant_utils import get_tenant_id as _get_tenant_id
from .db_utils import row_to_dict as _row_to_dict
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/org-roles", tags=["org-roles"])
# =============================================================================
# Standard role definitions
# =============================================================================
DEFAULT_ROLES = [
{"role_key": "dsb", "role_label": "Datenschutzbeauftragter (DSB)"},
{"role_key": "gf", "role_label": "Geschaeftsfuehrung"},
{"role_key": "it_leiter", "role_label": "IT-Leiter / CISO"},
{"role_key": "hr_leitung", "role_label": "HR-Leitung"},
{"role_key": "marketing_leitung", "role_label": "Marketing-Leitung"},
{"role_key": "compliance_beauftragter", "role_label": "Compliance-Beauftragter"},
{"role_key": "einkauf", "role_label": "Einkauf / Vendor Management"},
]
# =============================================================================
# Schemas
# =============================================================================
class OrgRoleCreate(BaseModel):
role_key: str
role_label: str
person_name: Optional[str] = None
person_email: Optional[str] = None
department: Optional[str] = None
project_id: Optional[str] = None
class OrgRoleUpdate(BaseModel):
role_label: Optional[str] = None
person_name: Optional[str] = None
person_email: Optional[str] = None
department: Optional[str] = None
is_active: Optional[bool] = None
class MappingEntry(BaseModel):
document_type: str
role_key: str
is_primary: bool = True
class MappingUpdate(BaseModel):
entries: List[MappingEntry]
# =============================================================================
# Routes
# =============================================================================
@router.get("")
def list_roles(
project_id: Optional[str] = Query(None),
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
q = text("""
SELECT * FROM compliance_org_roles
WHERE tenant_id = :tid AND (project_id = :pid OR (:pid IS NULL AND project_id IS NULL))
ORDER BY role_key
""")
rows = db.execute(q, {"tid": tenant_id, "pid": project_id}).fetchall()
return [_row_to_dict(r) for r in rows]
@router.get("/defaults")
def get_defaults():
return DEFAULT_ROLES
@router.post("")
def create_role(
body: OrgRoleCreate,
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
q = text("""
INSERT INTO compliance_org_roles (tenant_id, project_id, role_key, role_label, person_name, person_email, department)
VALUES (:tid, :pid, :rk, :rl, :pn, :pe, :dept)
ON CONFLICT (tenant_id, project_id, role_key) DO UPDATE
SET role_label = EXCLUDED.role_label,
person_name = COALESCE(EXCLUDED.person_name, compliance_org_roles.person_name),
person_email = COALESCE(EXCLUDED.person_email, compliance_org_roles.person_email),
department = COALESCE(EXCLUDED.department, compliance_org_roles.department),
updated_at = NOW()
RETURNING *
""")
row = db.execute(q, {
"tid": tenant_id, "pid": body.project_id, "rk": body.role_key,
"rl": body.role_label, "pn": body.person_name, "pe": body.person_email,
"dept": body.department,
}).fetchone()
db.commit()
return _row_to_dict(row)
@router.put("/{role_id}")
def update_role(
role_id: str,
body: OrgRoleUpdate,
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
sets, params = [], {"rid": role_id, "tid": tenant_id}
for field in ["role_label", "person_name", "person_email", "department", "is_active"]:
val = getattr(body, field, None)
if val is not None:
sets.append(f"{field} = :{field}")
params[field] = val
if not sets:
raise HTTPException(400, "No fields to update")
sets.append("updated_at = NOW()")
q = text(f"UPDATE compliance_org_roles SET {', '.join(sets)} WHERE id = :rid AND tenant_id = :tid RETURNING *")
row = db.execute(q, params).fetchone()
if not row:
raise HTTPException(404, "Role not found")
db.commit()
return _row_to_dict(row)
@router.delete("/{role_id}")
def delete_role(
role_id: str,
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
q = text("DELETE FROM compliance_org_roles WHERE id = :rid AND tenant_id = :tid")
result = db.execute(q, {"rid": role_id, "tid": tenant_id})
db.commit()
if result.rowcount == 0:
raise HTTPException(404, "Role not found")
return {"deleted": True}
@router.post("/seed")
def seed_roles(
project_id: Optional[str] = Query(None),
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
created = 0
for role in DEFAULT_ROLES:
q = text("""
INSERT INTO compliance_org_roles (tenant_id, project_id, role_key, role_label)
VALUES (:tid, :pid, :rk, :rl)
ON CONFLICT (tenant_id, project_id, role_key) DO NOTHING
""")
result = db.execute(q, {"tid": tenant_id, "pid": project_id, "rk": role["role_key"], "rl": role["role_label"]})
created += result.rowcount
db.commit()
return {"seeded": created, "total": len(DEFAULT_ROLES)}
@router.post("/{role_id}/send-test")
def send_test_email(
role_id: str,
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
q = text("SELECT * FROM compliance_org_roles WHERE id = :rid AND tenant_id = :tid")
role = db.execute(q, {"rid": role_id, "tid": tenant_id}).fetchone()
if not role:
raise HTTPException(404, "Role not found")
role_dict = _row_to_dict(role)
if not role_dict.get("person_email"):
raise HTTPException(400, "No email configured for this role")
try:
from compliance.services.smtp_sender import send_email
result = send_email(
recipient=role_dict["person_email"],
subject=f"[BreakPilot] Test-E-Mail fuer {role_dict['role_label']}",
body_html=f"""
<h2>Test-E-Mail</h2>
<p>Diese E-Mail bestaetigt, dass die Zustellung an die Rolle
<strong>{role_dict['role_label']}</strong> funktioniert.</p>
<p>Empfaenger: {role_dict['person_name'] or 'N/A'} ({role_dict['person_email']})</p>
<p style="color:#888;font-size:12px;">Gesendet von BreakPilot Compliance SDK</p>
""",
)
return {"sent": True, "email": role_dict["person_email"], "result": result}
except Exception as e:
logger.error("Failed to send test email: %s", e)
raise HTTPException(500, f"Email sending failed: {e}")
# =============================================================================
# Document-to-Role Mapping
# =============================================================================
@router.get("/mapping")
def get_mapping(
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
q = text("""
SELECT * FROM compliance_document_role_mapping
WHERE tenant_id = :tid
ORDER BY document_type, role_key
""")
rows = db.execute(q, {"tid": tenant_id}).fetchall()
return [_row_to_dict(r) for r in rows]
@router.put("/mapping")
def update_mapping(
body: MappingUpdate,
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
for entry in body.entries:
q = text("""
INSERT INTO compliance_document_role_mapping (tenant_id, document_type, role_key, is_primary)
VALUES (:tid, :dt, :rk, :ip)
ON CONFLICT (tenant_id, document_type, role_key) DO UPDATE
SET is_primary = EXCLUDED.is_primary
""")
db.execute(q, {"tid": tenant_id, "dt": entry.document_type, "rk": entry.role_key, "ip": entry.is_primary})
db.commit()
return {"updated": len(body.entries)}
@@ -0,0 +1,95 @@
"""
FastAPI routes for IAB TCF 2.2 (Transparency & Consent Framework).
Endpoints:
GET /tcf/purposes list 12 IAB purposes with translations
GET /tcf/special-features list 2 IAB special features
GET /tcf/category-mapping banner category IAB purpose mapping
POST /tcf/encode generate TC String from consent decisions
POST /tcf/encode-categories generate TC String from banner categories
"""
import logging
from typing import Optional, List, Dict
from fastapi import APIRouter, Depends
from pydantic import BaseModel
from sqlalchemy.orm import Session
from classroom_engine.database import get_db
from .tenant_utils import get_tenant_id as _get_tenant_id
from compliance.services.tcf_encoder_service import TCFEncoderService
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/tcf", tags=["tcf"])
class TCFEncodeRequest(BaseModel):
purpose_consents: Dict[int, bool] = {}
vendor_consents: Dict[int, bool] = {}
purpose_li: Optional[Dict[int, bool]] = None
special_features: Optional[Dict[int, bool]] = None
cmp_id: int = 1
cmp_version: int = 1
consent_language: str = "DE"
class TCFCategoryEncodeRequest(BaseModel):
categories: List[str] = []
vendor_consents: Optional[Dict[int, bool]] = None
cmp_id: int = 1
consent_language: str = "DE"
@router.get("/purposes")
def list_purposes():
return TCFEncoderService.get_purposes()
@router.get("/special-features")
def list_special_features():
return TCFEncoderService.get_special_features()
@router.get("/category-mapping")
def get_category_mapping():
return TCFEncoderService.get_category_purpose_map()
@router.post("/encode")
def encode_tc_string(body: TCFEncodeRequest):
encoder = TCFEncoderService(
cmp_id=body.cmp_id,
cmp_version=body.cmp_version,
consent_language=body.consent_language,
)
tc_string = encoder.encode(
purpose_consents=body.purpose_consents,
vendor_consents=body.vendor_consents,
purpose_li=body.purpose_li,
special_features=body.special_features,
)
return {"tc_string": tc_string, "version": 2}
@router.post("/encode-categories")
def encode_from_categories(body: TCFCategoryEncodeRequest):
encoder = TCFEncoderService(
cmp_id=body.cmp_id,
consent_language=body.consent_language,
)
tc_string = encoder.encode_from_categories(
categories=body.categories,
vendor_consents=body.vendor_consents,
)
# Also return which purposes were set
from compliance.services.tcf_encoder_service import CATEGORY_PURPOSE_MAP
purpose_ids = set()
for cat in body.categories:
purpose_ids.update(CATEGORY_PURPOSE_MAP.get(cat, []))
return {
"tc_string": tc_string,
"version": 2,
"purposes_consented": sorted(purpose_ids),
"categories": body.categories,
}
@@ -0,0 +1,310 @@
"""
FastAPI routes for Whistleblower (HinSchG) Hinweisgeberschutz.
Admin endpoints for managing reports + public endpoint for anonymous submissions.
Deadlines: 7 days acknowledgment (§ 17 Abs. 1), 3 months feedback (§ 17 Abs. 2).
Endpoints:
GET /whistleblower/reports list with filters
GET /whistleblower/reports/stats counts by status/category
POST /whistleblower/reports create report (admin)
GET /whistleblower/reports/{id} single report with messages
PUT /whistleblower/reports/{id} update status/priority/assignment
POST /whistleblower/reports/{id}/acknowledge send acknowledgment
POST /whistleblower/reports/{id}/close close report
POST /whistleblower/reports/{id}/messages add message
GET /whistleblower/reports/{id}/measures list measures
POST /whistleblower/reports/{id}/measures add measure
POST /whistleblower/submit public anonymous submission
GET /whistleblower/check/{access_key} reporter checks status
"""
import logging
import secrets
import string
from datetime import datetime, timedelta, timezone
from typing import Optional, List
from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel
from sqlalchemy import text
from sqlalchemy.orm import Session
from classroom_engine.database import get_db
from .tenant_utils import get_tenant_id as _get_tenant_id
from .db_utils import row_to_dict as _row_to_dict
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/whistleblower", tags=["whistleblower"])
VALID_CATEGORIES = {"corruption", "fraud", "data_protection", "discrimination",
"environment", "competition", "product_safety", "tax_evasion", "other"}
VALID_STATUSES = {"new", "acknowledged", "under_review", "investigation",
"measures_taken", "closed", "rejected"}
def _gen_ref(tenant_id: str, db: Session) -> str:
year = datetime.now().year
q = text("SELECT COUNT(*) FROM compliance_whistleblower_reports WHERE tenant_id = :tid")
count = db.execute(q, {"tid": tenant_id}).scalar() or 0
return f"WB-{year}-{count + 1:06d}"
def _gen_access_key() -> str:
chars = string.ascii_uppercase + string.digits
parts = [''.join(secrets.choice(chars) for _ in range(4)) for _ in range(3)]
return '-'.join(parts)
# =============================================================================
# Schemas
# =============================================================================
class ReportCreate(BaseModel):
category: str = "other"
title: str
description: str
is_anonymous: bool = True
reporter_name: Optional[str] = None
reporter_email: Optional[str] = None
reporter_phone: Optional[str] = None
priority: str = "normal"
class ReportUpdate(BaseModel):
status: Optional[str] = None
priority: Optional[str] = None
assigned_to: Optional[str] = None
category: Optional[str] = None
class MessageCreate(BaseModel):
message: str
sender_type: str = "admin"
is_internal: bool = False
class MeasureCreate(BaseModel):
title: str
description: Optional[str] = None
responsible: Optional[str] = None
due_date: Optional[str] = None
# =============================================================================
# Admin Routes
# =============================================================================
@router.get("/reports")
def list_reports(
status: Optional[str] = Query(None),
category: Optional[str] = Query(None),
limit: int = Query(50, le=200),
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
where = ["tenant_id = :tid"]
params = {"tid": tenant_id, "lim": limit}
if status:
where.append("status = :st")
params["st"] = status
if category:
where.append("category = :cat")
params["cat"] = category
q = text(f"SELECT * FROM compliance_whistleblower_reports WHERE {' AND '.join(where)} ORDER BY received_at DESC LIMIT :lim")
return [_row_to_dict(r) for r in db.execute(q, params).fetchall()]
@router.get("/reports/stats")
def report_stats(
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
now = datetime.now(timezone.utc)
q = text("SELECT status, COUNT(*) as cnt FROM compliance_whistleblower_reports WHERE tenant_id = :tid GROUP BY status")
by_status = {r.status: r.cnt for r in db.execute(q, {"tid": tenant_id}).fetchall()}
q2 = text("SELECT category, COUNT(*) as cnt FROM compliance_whistleblower_reports WHERE tenant_id = :tid GROUP BY category")
by_category = {r.category: r.cnt for r in db.execute(q2, {"tid": tenant_id}).fetchall()}
q3 = text("SELECT COUNT(*) FROM compliance_whistleblower_reports WHERE tenant_id = :tid AND deadline_acknowledgment < :now AND acknowledged_at IS NULL AND status = 'new'")
overdue_ack = db.execute(q3, {"tid": tenant_id, "now": now}).scalar() or 0
q4 = text("SELECT COUNT(*) FROM compliance_whistleblower_reports WHERE tenant_id = :tid AND deadline_feedback < :now AND status NOT IN ('closed', 'rejected')")
overdue_fb = db.execute(q4, {"tid": tenant_id, "now": now}).scalar() or 0
total = sum(by_status.values())
return {"total": total, "by_status": by_status, "by_category": by_category, "overdue_acknowledgment": overdue_ack, "overdue_feedback": overdue_fb}
@router.post("/reports")
def create_report(
body: ReportCreate,
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
now = datetime.now(timezone.utc)
ref = _gen_ref(tenant_id, db)
ak = _gen_access_key()
q = text("""
INSERT INTO compliance_whistleblower_reports
(tenant_id, reference_number, access_key, category, title, description,
is_anonymous, reporter_name, reporter_email, reporter_phone, priority,
received_at, deadline_acknowledgment, deadline_feedback)
VALUES (:tid, :ref, :ak, :cat, :title, :desc,
:anon, :rn, :re, :rp, :pri,
:now, :dl_ack, :dl_fb)
RETURNING *
""")
row = db.execute(q, {
"tid": tenant_id, "ref": ref, "ak": ak,
"cat": body.category, "title": body.title, "desc": body.description,
"anon": body.is_anonymous, "rn": body.reporter_name,
"re": body.reporter_email, "rp": body.reporter_phone,
"pri": body.priority, "now": now,
"dl_ack": now + timedelta(days=7),
"dl_fb": now + timedelta(days=90),
}).fetchone()
db.commit()
return _row_to_dict(row)
@router.get("/reports/{report_id}")
def get_report(
report_id: str,
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
row = db.execute(text("SELECT * FROM compliance_whistleblower_reports WHERE id = :rid AND tenant_id = :tid"),
{"rid": report_id, "tid": tenant_id}).fetchone()
if not row:
raise HTTPException(404, "Report not found")
result = _row_to_dict(row)
msgs = db.execute(text("SELECT * FROM compliance_whistleblower_messages WHERE report_id = :rid ORDER BY created_at"),
{"rid": report_id}).fetchall()
result["messages"] = [_row_to_dict(m) for m in msgs]
measures = db.execute(text("SELECT * FROM compliance_whistleblower_measures WHERE report_id = :rid ORDER BY created_at"),
{"rid": report_id}).fetchall()
result["measures"] = [_row_to_dict(m) for m in measures]
return result
@router.put("/reports/{report_id}")
def update_report(
report_id: str,
body: ReportUpdate,
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
sets, params = [], {"rid": report_id, "tid": tenant_id}
for field in ["status", "priority", "assigned_to", "category"]:
val = getattr(body, field, None)
if val is not None:
sets.append(f"{field} = :{field}")
params[field] = val
if not sets:
raise HTTPException(400, "No fields to update")
sets.append("updated_at = NOW()")
q = text(f"UPDATE compliance_whistleblower_reports SET {', '.join(sets)} WHERE id = :rid AND tenant_id = :tid RETURNING *")
row = db.execute(q, params).fetchone()
if not row:
raise HTTPException(404, "Report not found")
db.commit()
return _row_to_dict(row)
@router.post("/reports/{report_id}/acknowledge")
def acknowledge_report(
report_id: str,
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
q = text("""
UPDATE compliance_whistleblower_reports
SET status = 'acknowledged', acknowledged_at = NOW(), updated_at = NOW()
WHERE id = :rid AND tenant_id = :tid RETURNING *
""")
row = db.execute(q, {"rid": report_id, "tid": tenant_id}).fetchone()
if not row:
raise HTTPException(404, "Report not found")
db.commit()
return _row_to_dict(row)
@router.post("/reports/{report_id}/close")
def close_report(
report_id: str,
reason: str = Query(""),
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
q = text("""
UPDATE compliance_whistleblower_reports
SET status = 'closed', closed_at = NOW(), closure_reason = :reason, updated_at = NOW()
WHERE id = :rid AND tenant_id = :tid RETURNING *
""")
row = db.execute(q, {"rid": report_id, "tid": tenant_id, "reason": reason}).fetchone()
if not row:
raise HTTPException(404, "Report not found")
db.commit()
return _row_to_dict(row)
@router.post("/reports/{report_id}/messages")
def add_message(
report_id: str,
body: MessageCreate,
db: Session = Depends(get_db),
tenant_id: str = Depends(_get_tenant_id),
):
q = text("""
INSERT INTO compliance_whistleblower_messages (report_id, sender_type, message, is_internal)
VALUES (:rid, :st, :msg, :internal) RETURNING *
""")
row = db.execute(q, {"rid": report_id, "st": body.sender_type, "msg": body.message, "internal": body.is_internal}).fetchone()
db.commit()
return _row_to_dict(row)
@router.get("/reports/{report_id}/measures")
def list_measures(report_id: str, db: Session = Depends(get_db)):
return [_row_to_dict(r) for r in db.execute(text(
"SELECT * FROM compliance_whistleblower_measures WHERE report_id = :rid ORDER BY created_at"
), {"rid": report_id}).fetchall()]
@router.post("/reports/{report_id}/measures")
def add_measure(
report_id: str, body: MeasureCreate,
db: Session = Depends(get_db),
):
q = text("""
INSERT INTO compliance_whistleblower_measures (report_id, title, description, responsible, due_date)
VALUES (:rid, :title, :desc, :resp, :due) RETURNING *
""")
row = db.execute(q, {"rid": report_id, "title": body.title, "desc": body.description,
"resp": body.responsible, "due": body.due_date}).fetchone()
db.commit()
return _row_to_dict(row)
# =============================================================================
# Public Routes (Anonymous)
# =============================================================================
@router.post("/submit")
def submit_report(body: ReportCreate, db: Session = Depends(get_db), tenant_id: str = Depends(_get_tenant_id)):
"""Public anonymous submission — same as create but returns only access_key."""
body.is_anonymous = True
result = create_report(body, db, tenant_id)
return {"access_key": result["access_key"], "reference_number": result["reference_number"],
"message": "Ihre Meldung wurde erfolgreich eingereicht. Nutzen Sie den Zugangscode um den Status zu pruefen."}
@router.get("/check/{access_key}")
def check_status(access_key: str, db: Session = Depends(get_db), tenant_id: str = Depends(_get_tenant_id)):
"""Reporter checks status anonymously via access key."""
row = db.execute(text(
"SELECT id, reference_number, status, category, received_at, acknowledged_at FROM compliance_whistleblower_reports WHERE access_key = :ak AND tenant_id = :tid"
), {"ak": access_key, "tid": tenant_id}).fetchone()
if not row:
raise HTTPException(404, "Meldung nicht gefunden")
result = _row_to_dict(row)
msgs = db.execute(text(
"SELECT message, sender_type, created_at FROM compliance_whistleblower_messages WHERE report_id = :rid AND is_internal = FALSE ORDER BY created_at"
), {"rid": result["id"]}).fetchall()
result["messages"] = [_row_to_dict(m) for m in msgs]
return result