feat: Phase 3 — registry 82 services, mandatory checker, SDK flow step

- website_scanner.py: imports from master service_registry.py (82 services)
- agent_scan_routes.py: mandatory content checks (documents + DSE sections)
- steps-betrieb.ts: Compliance Agent step added to SDK Flow (seq 5000)
- PLAN: Phase 9 (Authenticated Testing) added to product roadmap

Mandatory checks know what MUST be there:
- Documents: Impressum, DSE, AGB, Widerrufsbelehrung
- DSE content: 9 Art. 13 DSGVO fields (DSB, Speicherdauer, etc.)
- Impressum content: 5 §5 TMG fields (GF, HRB, USt-ID, etc.)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-29 15:04:44 +02:00
parent 642382cbe8
commit 5c5054f740
4 changed files with 79 additions and 102 deletions
@@ -341,4 +341,26 @@ export const STEPS_BETRIEB: SDKFlowStep[] = [
url: '/sdk/control-library',
completion: 100,
},
{
id: 'compliance-agent',
name: 'Compliance Agent',
nameShort: 'Agent',
package: 'betrieb',
seq: 5000,
checkpointId: 'CP-AGENT',
checkpointType: 'OPTIONAL',
checkpointReviewer: 'NONE',
description: 'Automatische Website-Analyse auf DSGVO-Konformitaet mit 3 Modi: Schnellanalyse, Website-Scan und Cookie-Consent-Test.',
descriptionLong: 'Der Compliance Agent analysiert Websites und Dokumente automatisch auf DSGVO-Konformitaet. Drei Modi: (1) Schnellanalyse — einzelne URL klassifizieren und bewerten via Qwen LLM + UCCA Assessment. (2) Website-Scan — 5-10 Unterseiten crawlen, 82 Drittanbieter-Dienste erkennen, SOLL/IST-Abgleich gegen Datenschutzerklaerung, Pflichtinhalte pruefen (Art. 13 DSGVO, §5 TMG). (3) Cookie-Consent-Test — Playwright Headless Browser testet was VOR und NACH Cookie-Einwilligung geladen wird (§25 TDDDG). Pre-Launch-Modus fuer interne Dokumente mit einbaufertigen Korrekturvorschlaegen. Post-Launch-Modus mit Abmahnrisiko-Warnungen. Textblock-Referenzierung zeigt Originaltext, Position in der DSE und Korrekturvorschlag. Email-Benachrichtigung an zustaendige Rolle.',
legalBasis: 'Art. 5, 13, 25 DSGVO, §5 TMG, §25 TDDDG, §312k BGB',
inputs: [],
outputs: ['scanResults', 'findings', 'corrections'],
prerequisiteSteps: [],
dbTables: [],
dbMode: 'none',
ragCollections: [],
isOptional: true,
url: '/sdk/agent',
completion: 80,
},
]
@@ -18,6 +18,9 @@ from compliance.services.dse_service_extractor import extract_dse_services, comp
from compliance.services.smtp_sender import send_email
from compliance.services.dse_parser import parse_dse
from compliance.services.dse_matcher import build_text_references, TextReference
from compliance.services.mandatory_content_checker import (
check_mandatory_documents, check_dse_mandatory_content, MandatoryFinding,
)
logger = logging.getLogger(__name__)
@@ -120,7 +123,16 @@ async def scan_website_endpoint(req: ScanRequest):
# Step 7: Generate findings with text references
services_info, findings = _build_findings(comparison, scan, is_live, text_refs)
# Step 8: Generate corrections for pre-launch mode
# Step 8: Check mandatory content (documents + DSE sections)
mandatory_findings = check_mandatory_documents(scan.pages_scanned, scan.missing_pages)
mandatory_findings += check_dse_mandatory_content(dse_sections, dse_text)
for mf in mandatory_findings:
findings.append(ScanFinding(
code=mf.code, severity=mf.severity,
text=f"{mf.text}" + (f"{mf.suggestion}" if mf.suggestion else ""),
))
# Step 9: Generate corrections for pre-launch mode
if not is_live and findings:
await _add_corrections(findings, dse_text)
@@ -40,107 +40,8 @@ class ScanResult:
missing_pages: dict = field(default_factory=dict) # url -> status_code
# ── Service Registry ──────────────────────────────────────────────────────────
# Each entry: regex pattern -> service metadata
SERVICE_REGISTRY: dict[str, dict] = {
# --- Tracking & Analytics ---
r"google.?analytics|gtag\(|UA-\d+|G-\w{5,}": {
"id": "google_analytics", "name": "Google Analytics", "category": "tracking",
"provider": "Google LLC", "country": "US", "eu_adequate": False,
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, §25 TDDDG",
},
r"googletagmanager|gtm\.js": {
"id": "google_tag_manager", "name": "Google Tag Manager", "category": "tracking",
"provider": "Google LLC", "country": "US", "eu_adequate": False,
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO",
},
r"facebook\.net/.*fbevents|fbq\(": {
"id": "facebook_pixel", "name": "Meta/Facebook Pixel", "category": "marketing",
"provider": "Meta Platforms", "country": "US", "eu_adequate": False,
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, §25 TDDDG",
},
r"hotjar\.com|_hjSettings": {
"id": "hotjar", "name": "Hotjar", "category": "tracking",
"provider": "Hotjar Ltd", "country": "MT", "eu_adequate": True,
"requires_consent": True, "legal_ref": "§25 TDDDG (Session Recording)",
},
r"clarity\.ms": {
"id": "ms_clarity", "name": "Microsoft Clarity", "category": "tracking",
"provider": "Microsoft", "country": "US", "eu_adequate": False,
"requires_consent": True, "legal_ref": "§25 TDDDG (Session Replay), Art. 44 DSGVO",
},
r"matomo|piwik": {
"id": "matomo", "name": "Matomo", "category": "tracking",
"provider": "InnoCraft/Self-hosted", "country": "EU/Self", "eu_adequate": True,
"requires_consent": False, "legal_ref": "Cookieless moeglich, §25 TDDDG",
},
r"plausible\.io": {
"id": "plausible", "name": "Plausible Analytics", "category": "tracking",
"provider": "Plausible Insights", "country": "EE", "eu_adequate": True,
"requires_consent": False, "legal_ref": "EU-Anbieter, cookieless",
},
# --- CDN & Fonts ---
r"fonts\.googleapis\.com|fonts\.gstatic\.com": {
"id": "google_fonts", "name": "Google Fonts (remote)", "category": "cdn",
"provider": "Google LLC", "country": "US", "eu_adequate": False,
"requires_consent": True, "legal_ref": "LG Muenchen I, Az. 3 O 17493/20",
},
r"cdn\.cloudflare\.com|cdnjs\.cloudflare\.com": {
"id": "cloudflare_cdn", "name": "Cloudflare CDN", "category": "cdn",
"provider": "Cloudflare Inc", "country": "US", "eu_adequate": False,
"requires_consent": False, "legal_ref": "Art. 44-49 DSGVO, berechtigtes Interesse",
},
# --- Chatbots ---
r"widget\.intercom\.io|intercomcdn": {
"id": "intercom", "name": "Intercom", "category": "chatbot",
"provider": "Intercom Inc", "country": "US", "eu_adequate": False,
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, KI-gestuetzt",
},
r"tidio\.co|tidioChatApi": {
"id": "tidio", "name": "Tidio Chat", "category": "chatbot",
"provider": "Tidio LLC", "country": "PL", "eu_adequate": True,
"requires_consent": False, "legal_ref": "EU-Anbieter",
},
r"zendesk\.com/embeddable|zdassets": {
"id": "zendesk", "name": "Zendesk", "category": "chatbot",
"provider": "Zendesk Inc", "country": "US", "eu_adequate": False,
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO",
},
# --- Payment ---
r"js\.stripe\.com|stripe\.com/v3": {
"id": "stripe", "name": "Stripe", "category": "payment",
"provider": "Stripe Inc", "country": "US", "eu_adequate": False,
"requires_consent": False, "legal_ref": "Art. 6(1)(b) Vertragserfuellung, SCCs",
},
r"paypal\.com/sdk|paypalobjects": {
"id": "paypal", "name": "PayPal", "category": "payment",
"provider": "PayPal Holdings", "country": "US", "eu_adequate": False,
"requires_consent": False, "legal_ref": "Art. 6(1)(b) Vertragserfuellung",
},
r"klarna\.com|klarna-payments": {
"id": "klarna", "name": "Klarna", "category": "payment",
"provider": "Klarna AB", "country": "SE", "eu_adequate": True,
"requires_consent": False, "legal_ref": "EU, aber Art. 22 DSGVO bei Bonitaetspruefung!",
},
# --- Captcha ---
r"recaptcha|grecaptcha": {
"id": "recaptcha", "name": "Google reCAPTCHA", "category": "other",
"provider": "Google LLC", "country": "US", "eu_adequate": False,
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, §25 TDDDG",
},
# --- Video ---
r"youtube\.com/embed|youtube-nocookie|ytimg": {
"id": "youtube", "name": "YouTube", "category": "other",
"provider": "Google LLC", "country": "US", "eu_adequate": False,
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, 2-Klick empfohlen",
},
# --- Consent Management ---
r"didomi|cookiebot|onetrust|usercentrics|consentmanager|quantcast": {
"id": "cmp", "name": "Consent Management Platform", "category": "other",
"provider": "Various", "country": "EU", "eu_adequate": True,
"requires_consent": False, "legal_ref": "CMP vorhanden — gut",
},
}
# ── Service Registry (imported from master) ──────────────────────────────────
from compliance.services.service_registry import SERVICE_REGISTRY # noqa: E402
AI_TEXT_PATTERNS = [
r"k(?:ue|ü)nstliche.?intelligenz",
+42
View File
@@ -466,6 +466,48 @@ Risiko-Score | 15/100 | 45/100 | 20/100 | 55/100 |
| 4 | Phase 6 | PDF-Export | Druckbare Reports fuer Management |
| 4 | Phase 7 | Recurring Scans | Automatische Ueberwachung |
| 5 | Phase 8 | Multi-Website Vergleich | Wettbewerber-Benchmark |
| 6 | Phase 9 | Authenticated Testing | Login-Bereich pruefen (§312k, Art. 17, 20) |
---
## Phase 9: Authenticated Website Testing (P3, 2 Tage)
### Konzept
Ein DSB gibt seine eigenen Credentials im SDK ein. Playwright loggt sich ein
und prueft den Kundenbereich auf Pflichtfunktionen:
### Pruefbare Rechte nach Login
| Pruefung | Rechtsgrundlage | Methode |
|----------|----------------|---------|
| Kuendigungsbutton (2 Klicks) | §312k BGB | Navigation suchen, Klicks zaehlen |
| Konto loeschen | Art. 17 DSGVO | "Konto loeschen" Button suchen |
| Daten exportieren | Art. 20 DSGVO | "Daten herunterladen" suchen |
| Einwilligungen widerrufen | Art. 7(3) DSGVO | Consent-Einstellungen suchen |
| Profildaten einsehen | Art. 15 DSGVO | Profil-/Kontobereich pruefen |
### Sicherheit
- Credentials werden NUR fuer die Dauer des Tests im Browser-Kontext gehalten
- Kein Speichern in DB, kein Logging, kein Senden an Dritte
- Nach Test: Browser-Kontext wird zerstoert, Credentials verworfen
- HTTPS-only (kein HTTP-Login)
### Implementierung
- Erweiterung des `consent-tester` Service um Login-Flow
- Neuer Tab im Frontend: "Authentifizierter Test"
- Credential-Eingabe als einmalige Formularfelder (nicht gespeichert)
- Screenshots als Belege fuer den Report
### Dateien
| Datei | LOC | Zweck |
|-------|-----|-------|
| `consent-tester/services/authenticated_scanner.py` | ~200 | Login + Kundenbereich-Checks |
| `consent-tester/main.py` | +30 | Neuer /authenticated-scan Endpoint |
| Frontend: AuthenticatedTestTab | ~150 | Credential-Eingabe + Ergebnis |
## Investoren-Demo Szenario