feat: Phase 6-8 — PDF export, recurring scans, multi-website compare
Phase 6: PDF export via WeasyPrint — POST /agent/scans/pdf generates printable compliance report with findings table, service comparison, risk badge, and legal disclaimer. Phase 7: Recurring scans — POST /agent/monitored-urls to add URLs, POST /agent/run-scheduled triggers all enabled scans (cron/ZeroClaw). In-memory storage with DB upgrade path. Phase 8: Multi-website compare — POST /agent/compare with 2-5 URLs, parallel scanning, comparison table (risk, findings, services, compliance features per site). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,94 @@
|
||||
"""
|
||||
Agent Compare Routes — scan multiple websites and compare compliance posture.
|
||||
|
||||
POST /api/compliance/agent/compare
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import httpx
|
||||
from fastapi import APIRouter
|
||||
from pydantic import BaseModel
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/compliance/agent", tags=["agent"])
|
||||
|
||||
|
||||
class CompareRequest(BaseModel):
|
||||
urls: list[str] # 2-5 URLs to compare
|
||||
mode: str = "post_launch"
|
||||
|
||||
|
||||
class SiteResult(BaseModel):
|
||||
url: str
|
||||
domain: str
|
||||
risk_level: str = ""
|
||||
risk_score: float = 0
|
||||
findings_count: int = 0
|
||||
services_count: int = 0
|
||||
has_impressum: bool = False
|
||||
has_datenschutz: bool = False
|
||||
has_cookie_banner: bool = False
|
||||
has_google_fonts: bool = False
|
||||
tracking_before_consent: int = 0
|
||||
classification: str = ""
|
||||
scan_status: str = "pending"
|
||||
|
||||
|
||||
class CompareResponse(BaseModel):
|
||||
sites: list[SiteResult]
|
||||
compared_at: str
|
||||
|
||||
|
||||
@router.post("/compare", response_model=CompareResponse)
|
||||
async def compare_websites(req: CompareRequest):
|
||||
"""Scan multiple websites and compare their compliance posture."""
|
||||
urls = req.urls[:5] # Max 5
|
||||
|
||||
async def scan_one(url: str) -> SiteResult:
|
||||
domain = url.split("/")[2] if len(url.split("/")) > 2 else url
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
resp = await client.post(
|
||||
"http://localhost:8002/api/compliance/agent/scan",
|
||||
json={"url": url, "mode": req.mode},
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return SiteResult(url=url, domain=domain, scan_status="failed")
|
||||
|
||||
data = resp.json()
|
||||
services = data.get("services", [])
|
||||
findings = data.get("findings", [])
|
||||
|
||||
return SiteResult(
|
||||
url=url,
|
||||
domain=domain,
|
||||
risk_level=data.get("risk_level", ""),
|
||||
risk_score=data.get("risk_score", 0),
|
||||
findings_count=len(findings),
|
||||
services_count=len(services),
|
||||
has_impressum=not any("IMPRESSUM" in f.get("code", "") for f in findings if isinstance(f, dict)),
|
||||
has_datenschutz=not any("DATENSCHUTZ" in f.get("code", "") for f in findings if isinstance(f, dict)),
|
||||
has_cookie_banner=data.get("chatbot_detected", False) or any(
|
||||
s.get("id") == "cmp" for s in services if isinstance(s, dict)
|
||||
),
|
||||
has_google_fonts=any(
|
||||
s.get("id") == "google_fonts" for s in services if isinstance(s, dict)
|
||||
),
|
||||
classification=data.get("classification", ""),
|
||||
scan_status="completed",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Compare scan failed for %s: %s", url, e)
|
||||
return SiteResult(url=url, domain=domain, scan_status="error")
|
||||
|
||||
# Scan all in parallel
|
||||
results = await asyncio.gather(*[scan_one(u) for u in urls])
|
||||
|
||||
return CompareResponse(
|
||||
sites=list(results),
|
||||
compared_at=datetime.now(timezone.utc).isoformat(),
|
||||
)
|
||||
@@ -13,8 +13,11 @@ import uuid
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from fastapi import APIRouter, Query
|
||||
from fastapi.responses import Response
|
||||
from pydantic import BaseModel
|
||||
|
||||
from compliance.services.agent_pdf_export import generate_scan_pdf
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/compliance/agent", tags=["agent"])
|
||||
@@ -195,3 +198,23 @@ async def get_scan(scan_id: str):
|
||||
return ScanDetail(id=scan_id, url="", scan_type="", analysis_mode="", result={}, created_at="")
|
||||
finally:
|
||||
await pool.close()
|
||||
|
||||
|
||||
@router.post("/scans/pdf")
|
||||
async def export_scan_pdf(req: SaveScanRequest):
|
||||
"""Generate a PDF report from scan results (no DB required)."""
|
||||
try:
|
||||
pdf_bytes = generate_scan_pdf({
|
||||
"url": req.url,
|
||||
"scan_type": req.scan_type,
|
||||
"analysis_mode": req.analysis_mode,
|
||||
**req.result,
|
||||
})
|
||||
return Response(
|
||||
content=pdf_bytes,
|
||||
media_type="application/pdf",
|
||||
headers={"Content-Disposition": f'attachment; filename="compliance-report-{req.url.split("/")[2][:30]}.pdf"'},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("PDF generation failed: %s", e)
|
||||
return {"error": str(e)}
|
||||
|
||||
@@ -0,0 +1,111 @@
|
||||
"""
|
||||
Agent Recurring Scan Routes — schedule and run automated periodic scans.
|
||||
|
||||
POST /api/compliance/agent/monitored-urls — add URL to monitoring
|
||||
GET /api/compliance/agent/monitored-urls — list monitored URLs
|
||||
POST /api/compliance/agent/run-scheduled — trigger all scheduled scans
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from fastapi import APIRouter
|
||||
from pydantic import BaseModel
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/compliance/agent", tags=["agent"])
|
||||
|
||||
DATABASE_URL = os.environ.get(
|
||||
"COMPLIANCE_DATABASE_URL",
|
||||
os.environ.get("DATABASE_URL", ""),
|
||||
)
|
||||
|
||||
# In-memory fallback when no DB available
|
||||
_monitored_urls: list[dict] = []
|
||||
|
||||
|
||||
class MonitoredURL(BaseModel):
|
||||
url: str
|
||||
scan_type: str = "scan" # scan, consent_test
|
||||
frequency: str = "weekly" # daily, weekly, monthly
|
||||
recipient: str = "dsb@breakpilot.local"
|
||||
enabled: bool = True
|
||||
|
||||
|
||||
@router.post("/monitored-urls")
|
||||
async def add_monitored_url(req: MonitoredURL):
|
||||
"""Add a URL to the monitoring list."""
|
||||
entry = {
|
||||
"id": str(uuid.uuid4()),
|
||||
"url": req.url,
|
||||
"scan_type": req.scan_type,
|
||||
"frequency": req.frequency,
|
||||
"recipient": req.recipient,
|
||||
"enabled": req.enabled,
|
||||
"created_at": datetime.now(timezone.utc).isoformat(),
|
||||
"last_scan_at": None,
|
||||
}
|
||||
_monitored_urls.append(entry)
|
||||
logger.info("Added monitored URL: %s (%s)", req.url, req.frequency)
|
||||
return {"status": "added", **entry}
|
||||
|
||||
|
||||
@router.get("/monitored-urls")
|
||||
async def list_monitored_urls():
|
||||
"""List all monitored URLs."""
|
||||
return {"urls": _monitored_urls}
|
||||
|
||||
|
||||
@router.delete("/monitored-urls/{url_id}")
|
||||
async def remove_monitored_url(url_id: str):
|
||||
"""Remove a URL from monitoring."""
|
||||
global _monitored_urls
|
||||
_monitored_urls = [u for u in _monitored_urls if u["id"] != url_id]
|
||||
return {"status": "removed"}
|
||||
|
||||
|
||||
@router.post("/run-scheduled")
|
||||
async def run_scheduled_scans():
|
||||
"""Trigger all enabled scheduled scans. Called by cron/ZeroClaw."""
|
||||
import httpx
|
||||
|
||||
results = []
|
||||
backend_url = "http://localhost:8002"
|
||||
|
||||
for entry in _monitored_urls:
|
||||
if not entry["enabled"]:
|
||||
continue
|
||||
|
||||
url = entry["url"]
|
||||
scan_type = entry["scan_type"]
|
||||
logger.info("Running scheduled %s for %s", scan_type, url)
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=300.0) as client:
|
||||
if scan_type == "consent_test":
|
||||
resp = await client.post(
|
||||
"http://bp-compliance-consent-tester:8094/scan",
|
||||
json={"url": url},
|
||||
)
|
||||
else:
|
||||
resp = await client.post(
|
||||
f"{backend_url}/api/compliance/agent/scan",
|
||||
json={"url": url, "mode": "post_launch", "recipient": entry["recipient"]},
|
||||
)
|
||||
|
||||
entry["last_scan_at"] = datetime.now(timezone.utc).isoformat()
|
||||
results.append({
|
||||
"url": url,
|
||||
"scan_type": scan_type,
|
||||
"status": "completed" if resp.status_code == 200 else "failed",
|
||||
"status_code": resp.status_code,
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error("Scheduled scan failed for %s: %s", url, e)
|
||||
results.append({"url": url, "scan_type": scan_type, "status": "error", "error": str(e)})
|
||||
|
||||
return {"scans_triggered": len(results), "results": results}
|
||||
@@ -0,0 +1,95 @@
|
||||
"""
|
||||
Agent PDF Export — generates printable compliance scan reports.
|
||||
|
||||
Uses WeasyPrint to convert HTML report to PDF.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from io import BytesIO
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def generate_scan_pdf(scan_data: dict) -> bytes:
|
||||
"""Generate a PDF report from scan results."""
|
||||
from weasyprint import HTML
|
||||
|
||||
html = _build_report_html(scan_data)
|
||||
pdf_buffer = BytesIO()
|
||||
HTML(string=html).write_pdf(pdf_buffer)
|
||||
return pdf_buffer.getvalue()
|
||||
|
||||
|
||||
def _severity_color(sev: str) -> str:
|
||||
return {"HIGH": "#dc2626", "CRITICAL": "#991b1b", "MEDIUM": "#ea580c", "LOW": "#2563eb"}.get(sev, "#6b7280")
|
||||
|
||||
|
||||
def _build_report_html(data: dict) -> str:
|
||||
"""Build HTML for the PDF report."""
|
||||
url = data.get("url", "")
|
||||
scan_type = data.get("scan_type", "scan")
|
||||
mode = data.get("analysis_mode", "post_launch")
|
||||
findings = data.get("findings", [])
|
||||
services = data.get("services", [])
|
||||
risk = data.get("risk_level", "")
|
||||
score = data.get("risk_score", 0)
|
||||
pages = data.get("pages_scanned", 0)
|
||||
now = datetime.now(timezone.utc).strftime("%d.%m.%Y %H:%M UTC")
|
||||
|
||||
mode_label = "Live-Website Pruefung" if mode == "post_launch" else "Interne Pruefung"
|
||||
type_label = {"quick": "Schnellanalyse", "scan": "Website-Scan", "consent_test": "Cookie-Test"}.get(scan_type, scan_type)
|
||||
|
||||
findings_rows = ""
|
||||
for f in findings:
|
||||
sev = f.get("severity", "MEDIUM") if isinstance(f, dict) else "MEDIUM"
|
||||
text = f.get("text", str(f)) if isinstance(f, dict) else str(f)
|
||||
color = _severity_color(sev)
|
||||
findings_rows += f'<tr><td style="color:{color};font-weight:bold;padding:6px 8px;border-bottom:1px solid #e5e7eb;">{sev}</td><td style="padding:6px 8px;border-bottom:1px solid #e5e7eb;">{text}</td></tr>'
|
||||
|
||||
services_rows = ""
|
||||
for s in services:
|
||||
if isinstance(s, dict):
|
||||
status_icon = "✓" if s.get("in_dse") or s.get("status") == "ok" else "✗"
|
||||
status_color = "#16a34a" if status_icon == "✓" else "#dc2626"
|
||||
services_rows += f'<tr><td style="color:{status_color};font-weight:bold;padding:4px 8px;border-bottom:1px solid #f3f4f6;">{status_icon}</td><td style="padding:4px 8px;border-bottom:1px solid #f3f4f6;">{s.get("name","")}</td><td style="padding:4px 8px;border-bottom:1px solid #f3f4f6;">{s.get("country","")}</td><td style="padding:4px 8px;border-bottom:1px solid #f3f4f6;">{s.get("category","")}</td></tr>'
|
||||
|
||||
return f"""<!DOCTYPE html>
|
||||
<html><head><meta charset="utf-8">
|
||||
<style>
|
||||
body {{ font-family: -apple-system, Arial, sans-serif; font-size: 11px; color: #1e293b; margin: 40px; }}
|
||||
h1 {{ font-size: 20px; color: #1e1b4b; margin-bottom: 4px; }}
|
||||
h2 {{ font-size: 14px; color: #334155; border-bottom: 2px solid #e2e8f0; padding-bottom: 4px; margin-top: 24px; }}
|
||||
.meta {{ color: #64748b; font-size: 10px; margin-bottom: 20px; }}
|
||||
.badge {{ display: inline-block; padding: 2px 8px; border-radius: 4px; color: white; font-size: 10px; font-weight: bold; }}
|
||||
table {{ width: 100%; border-collapse: collapse; }}
|
||||
th {{ text-align: left; padding: 6px 8px; background: #f8fafc; border-bottom: 2px solid #e2e8f0; font-size: 10px; color: #64748b; }}
|
||||
.warning {{ background: #fef2f2; border-left: 4px solid #dc2626; padding: 10px 14px; margin: 16px 0; }}
|
||||
.footer {{ margin-top: 30px; padding-top: 10px; border-top: 1px solid #e2e8f0; color: #94a3b8; font-size: 9px; }}
|
||||
</style></head><body>
|
||||
|
||||
<h1>Compliance Agent Report</h1>
|
||||
<p class="meta">{type_label} | {mode_label} | {now}</p>
|
||||
|
||||
<table style="margin-bottom:20px;">
|
||||
<tr><td style="padding:4px 0;color:#64748b;width:150px;">URL</td><td style="padding:4px 0;"><strong>{url}</strong></td></tr>
|
||||
<tr><td style="padding:4px 0;color:#64748b;">Risikobewertung</td><td style="padding:4px 0;"><span class="badge" style="background:{_severity_color(risk) if risk else '#6b7280'}">{risk} ({score}/100)</span></td></tr>
|
||||
<tr><td style="padding:4px 0;color:#64748b;">Seiten gescannt</td><td style="padding:4px 0;">{pages}</td></tr>
|
||||
<tr><td style="padding:4px 0;color:#64748b;">Findings</td><td style="padding:4px 0;"><strong>{len(findings)}</strong></td></tr>
|
||||
</table>
|
||||
|
||||
{'<div class="warning"><strong>ACHTUNG:</strong> Maengel auf einer bereits veroeffentlichten Website. Sofortige Korrektur empfohlen.</div>' if mode == "post_launch" and findings else ''}
|
||||
|
||||
<h2>Findings ({len(findings)})</h2>
|
||||
<table>
|
||||
<tr><th>Schwere</th><th>Beschreibung</th></tr>
|
||||
{findings_rows if findings_rows else '<tr><td colspan="2" style="padding:8px;color:#16a34a;">Keine Findings — alles OK</td></tr>'}
|
||||
</table>
|
||||
|
||||
{'<h2>Dienstleister-Abgleich</h2><table><tr><th>Status</th><th>Dienst</th><th>Land</th><th>Kategorie</th></tr>' + services_rows + '</table>' if services_rows else ''}
|
||||
|
||||
<div class="footer">
|
||||
Automatisch erstellt vom BreakPilot Compliance Agent | {now}<br>
|
||||
Dieses Dokument ersetzt keine Rechtsberatung.
|
||||
</div>
|
||||
</body></html>"""
|
||||
@@ -46,6 +46,8 @@ from compliance.api.agent_notification_routes import router as agent_notify_rout
|
||||
from compliance.api.agent_analyze_routes import router as agent_analyze_router
|
||||
from compliance.api.agent_scan_routes import router as agent_scan_router
|
||||
from compliance.api.agent_history_routes import router as agent_history_router
|
||||
from compliance.api.agent_recurring_routes import router as agent_recurring_router
|
||||
from compliance.api.agent_compare_routes import router as agent_compare_router
|
||||
|
||||
# Middleware
|
||||
from middleware import (
|
||||
@@ -146,6 +148,8 @@ app.include_router(agent_notify_router, prefix="/api")
|
||||
app.include_router(agent_analyze_router, prefix="/api")
|
||||
app.include_router(agent_scan_router, prefix="/api")
|
||||
app.include_router(agent_history_router, prefix="/api")
|
||||
app.include_router(agent_recurring_router, prefix="/api")
|
||||
app.include_router(agent_compare_router, prefix="/api")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user