feat: Phase 3 — registry 82 services, mandatory checker, SDK flow step

- website_scanner.py: imports from master service_registry.py (82 services)
- agent_scan_routes.py: mandatory content checks (documents + DSE sections)
- steps-betrieb.ts: Compliance Agent step added to SDK Flow (seq 5000)
- PLAN: Phase 9 (Authenticated Testing) added to product roadmap

Mandatory checks know what MUST be there:
- Documents: Impressum, DSE, AGB, Widerrufsbelehrung
- DSE content: 9 Art. 13 DSGVO fields (DSB, Speicherdauer, etc.)
- Impressum content: 5 §5 TMG fields (GF, HRB, USt-ID, etc.)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-29 15:04:44 +02:00
parent 642382cbe8
commit 5c5054f740
4 changed files with 79 additions and 102 deletions
@@ -18,6 +18,9 @@ from compliance.services.dse_service_extractor import extract_dse_services, comp
from compliance.services.smtp_sender import send_email
from compliance.services.dse_parser import parse_dse
from compliance.services.dse_matcher import build_text_references, TextReference
from compliance.services.mandatory_content_checker import (
check_mandatory_documents, check_dse_mandatory_content, MandatoryFinding,
)
logger = logging.getLogger(__name__)
@@ -120,7 +123,16 @@ async def scan_website_endpoint(req: ScanRequest):
# Step 7: Generate findings with text references
services_info, findings = _build_findings(comparison, scan, is_live, text_refs)
# Step 8: Generate corrections for pre-launch mode
# Step 8: Check mandatory content (documents + DSE sections)
mandatory_findings = check_mandatory_documents(scan.pages_scanned, scan.missing_pages)
mandatory_findings += check_dse_mandatory_content(dse_sections, dse_text)
for mf in mandatory_findings:
findings.append(ScanFinding(
code=mf.code, severity=mf.severity,
text=f"{mf.text}" + (f"{mf.suggestion}" if mf.suggestion else ""),
))
# Step 9: Generate corrections for pre-launch mode
if not is_live and findings:
await _add_corrections(findings, dse_text)
@@ -40,107 +40,8 @@ class ScanResult:
missing_pages: dict = field(default_factory=dict) # url -> status_code
# ── Service Registry ──────────────────────────────────────────────────────────
# Each entry: regex pattern -> service metadata
SERVICE_REGISTRY: dict[str, dict] = {
# --- Tracking & Analytics ---
r"google.?analytics|gtag\(|UA-\d+|G-\w{5,}": {
"id": "google_analytics", "name": "Google Analytics", "category": "tracking",
"provider": "Google LLC", "country": "US", "eu_adequate": False,
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, §25 TDDDG",
},
r"googletagmanager|gtm\.js": {
"id": "google_tag_manager", "name": "Google Tag Manager", "category": "tracking",
"provider": "Google LLC", "country": "US", "eu_adequate": False,
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO",
},
r"facebook\.net/.*fbevents|fbq\(": {
"id": "facebook_pixel", "name": "Meta/Facebook Pixel", "category": "marketing",
"provider": "Meta Platforms", "country": "US", "eu_adequate": False,
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, §25 TDDDG",
},
r"hotjar\.com|_hjSettings": {
"id": "hotjar", "name": "Hotjar", "category": "tracking",
"provider": "Hotjar Ltd", "country": "MT", "eu_adequate": True,
"requires_consent": True, "legal_ref": "§25 TDDDG (Session Recording)",
},
r"clarity\.ms": {
"id": "ms_clarity", "name": "Microsoft Clarity", "category": "tracking",
"provider": "Microsoft", "country": "US", "eu_adequate": False,
"requires_consent": True, "legal_ref": "§25 TDDDG (Session Replay), Art. 44 DSGVO",
},
r"matomo|piwik": {
"id": "matomo", "name": "Matomo", "category": "tracking",
"provider": "InnoCraft/Self-hosted", "country": "EU/Self", "eu_adequate": True,
"requires_consent": False, "legal_ref": "Cookieless moeglich, §25 TDDDG",
},
r"plausible\.io": {
"id": "plausible", "name": "Plausible Analytics", "category": "tracking",
"provider": "Plausible Insights", "country": "EE", "eu_adequate": True,
"requires_consent": False, "legal_ref": "EU-Anbieter, cookieless",
},
# --- CDN & Fonts ---
r"fonts\.googleapis\.com|fonts\.gstatic\.com": {
"id": "google_fonts", "name": "Google Fonts (remote)", "category": "cdn",
"provider": "Google LLC", "country": "US", "eu_adequate": False,
"requires_consent": True, "legal_ref": "LG Muenchen I, Az. 3 O 17493/20",
},
r"cdn\.cloudflare\.com|cdnjs\.cloudflare\.com": {
"id": "cloudflare_cdn", "name": "Cloudflare CDN", "category": "cdn",
"provider": "Cloudflare Inc", "country": "US", "eu_adequate": False,
"requires_consent": False, "legal_ref": "Art. 44-49 DSGVO, berechtigtes Interesse",
},
# --- Chatbots ---
r"widget\.intercom\.io|intercomcdn": {
"id": "intercom", "name": "Intercom", "category": "chatbot",
"provider": "Intercom Inc", "country": "US", "eu_adequate": False,
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, KI-gestuetzt",
},
r"tidio\.co|tidioChatApi": {
"id": "tidio", "name": "Tidio Chat", "category": "chatbot",
"provider": "Tidio LLC", "country": "PL", "eu_adequate": True,
"requires_consent": False, "legal_ref": "EU-Anbieter",
},
r"zendesk\.com/embeddable|zdassets": {
"id": "zendesk", "name": "Zendesk", "category": "chatbot",
"provider": "Zendesk Inc", "country": "US", "eu_adequate": False,
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO",
},
# --- Payment ---
r"js\.stripe\.com|stripe\.com/v3": {
"id": "stripe", "name": "Stripe", "category": "payment",
"provider": "Stripe Inc", "country": "US", "eu_adequate": False,
"requires_consent": False, "legal_ref": "Art. 6(1)(b) Vertragserfuellung, SCCs",
},
r"paypal\.com/sdk|paypalobjects": {
"id": "paypal", "name": "PayPal", "category": "payment",
"provider": "PayPal Holdings", "country": "US", "eu_adequate": False,
"requires_consent": False, "legal_ref": "Art. 6(1)(b) Vertragserfuellung",
},
r"klarna\.com|klarna-payments": {
"id": "klarna", "name": "Klarna", "category": "payment",
"provider": "Klarna AB", "country": "SE", "eu_adequate": True,
"requires_consent": False, "legal_ref": "EU, aber Art. 22 DSGVO bei Bonitaetspruefung!",
},
# --- Captcha ---
r"recaptcha|grecaptcha": {
"id": "recaptcha", "name": "Google reCAPTCHA", "category": "other",
"provider": "Google LLC", "country": "US", "eu_adequate": False,
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, §25 TDDDG",
},
# --- Video ---
r"youtube\.com/embed|youtube-nocookie|ytimg": {
"id": "youtube", "name": "YouTube", "category": "other",
"provider": "Google LLC", "country": "US", "eu_adequate": False,
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, 2-Klick empfohlen",
},
# --- Consent Management ---
r"didomi|cookiebot|onetrust|usercentrics|consentmanager|quantcast": {
"id": "cmp", "name": "Consent Management Platform", "category": "other",
"provider": "Various", "country": "EU", "eu_adequate": True,
"requires_consent": False, "legal_ref": "CMP vorhanden — gut",
},
}
# ── Service Registry (imported from master) ──────────────────────────────────
from compliance.services.service_registry import SERVICE_REGISTRY # noqa: E402
AI_TEXT_PATTERNS = [
r"k(?:ue|ü)nstliche.?intelligenz",