feat: Phase 3 — registry 82 services, mandatory checker, SDK flow step
- website_scanner.py: imports from master service_registry.py (82 services) - agent_scan_routes.py: mandatory content checks (documents + DSE sections) - steps-betrieb.ts: Compliance Agent step added to SDK Flow (seq 5000) - PLAN: Phase 9 (Authenticated Testing) added to product roadmap Mandatory checks know what MUST be there: - Documents: Impressum, DSE, AGB, Widerrufsbelehrung - DSE content: 9 Art. 13 DSGVO fields (DSB, Speicherdauer, etc.) - Impressum content: 5 §5 TMG fields (GF, HRB, USt-ID, etc.) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -341,4 +341,26 @@ export const STEPS_BETRIEB: SDKFlowStep[] = [
|
||||
url: '/sdk/control-library',
|
||||
completion: 100,
|
||||
},
|
||||
{
|
||||
id: 'compliance-agent',
|
||||
name: 'Compliance Agent',
|
||||
nameShort: 'Agent',
|
||||
package: 'betrieb',
|
||||
seq: 5000,
|
||||
checkpointId: 'CP-AGENT',
|
||||
checkpointType: 'OPTIONAL',
|
||||
checkpointReviewer: 'NONE',
|
||||
description: 'Automatische Website-Analyse auf DSGVO-Konformitaet mit 3 Modi: Schnellanalyse, Website-Scan und Cookie-Consent-Test.',
|
||||
descriptionLong: 'Der Compliance Agent analysiert Websites und Dokumente automatisch auf DSGVO-Konformitaet. Drei Modi: (1) Schnellanalyse — einzelne URL klassifizieren und bewerten via Qwen LLM + UCCA Assessment. (2) Website-Scan — 5-10 Unterseiten crawlen, 82 Drittanbieter-Dienste erkennen, SOLL/IST-Abgleich gegen Datenschutzerklaerung, Pflichtinhalte pruefen (Art. 13 DSGVO, §5 TMG). (3) Cookie-Consent-Test — Playwright Headless Browser testet was VOR und NACH Cookie-Einwilligung geladen wird (§25 TDDDG). Pre-Launch-Modus fuer interne Dokumente mit einbaufertigen Korrekturvorschlaegen. Post-Launch-Modus mit Abmahnrisiko-Warnungen. Textblock-Referenzierung zeigt Originaltext, Position in der DSE und Korrekturvorschlag. Email-Benachrichtigung an zustaendige Rolle.',
|
||||
legalBasis: 'Art. 5, 13, 25 DSGVO, §5 TMG, §25 TDDDG, §312k BGB',
|
||||
inputs: [],
|
||||
outputs: ['scanResults', 'findings', 'corrections'],
|
||||
prerequisiteSteps: [],
|
||||
dbTables: [],
|
||||
dbMode: 'none',
|
||||
ragCollections: [],
|
||||
isOptional: true,
|
||||
url: '/sdk/agent',
|
||||
completion: 80,
|
||||
},
|
||||
]
|
||||
|
||||
@@ -18,6 +18,9 @@ from compliance.services.dse_service_extractor import extract_dse_services, comp
|
||||
from compliance.services.smtp_sender import send_email
|
||||
from compliance.services.dse_parser import parse_dse
|
||||
from compliance.services.dse_matcher import build_text_references, TextReference
|
||||
from compliance.services.mandatory_content_checker import (
|
||||
check_mandatory_documents, check_dse_mandatory_content, MandatoryFinding,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -120,7 +123,16 @@ async def scan_website_endpoint(req: ScanRequest):
|
||||
# Step 7: Generate findings with text references
|
||||
services_info, findings = _build_findings(comparison, scan, is_live, text_refs)
|
||||
|
||||
# Step 8: Generate corrections for pre-launch mode
|
||||
# Step 8: Check mandatory content (documents + DSE sections)
|
||||
mandatory_findings = check_mandatory_documents(scan.pages_scanned, scan.missing_pages)
|
||||
mandatory_findings += check_dse_mandatory_content(dse_sections, dse_text)
|
||||
for mf in mandatory_findings:
|
||||
findings.append(ScanFinding(
|
||||
code=mf.code, severity=mf.severity,
|
||||
text=f"{mf.text}" + (f" — {mf.suggestion}" if mf.suggestion else ""),
|
||||
))
|
||||
|
||||
# Step 9: Generate corrections for pre-launch mode
|
||||
if not is_live and findings:
|
||||
await _add_corrections(findings, dse_text)
|
||||
|
||||
|
||||
@@ -40,107 +40,8 @@ class ScanResult:
|
||||
missing_pages: dict = field(default_factory=dict) # url -> status_code
|
||||
|
||||
|
||||
# ── Service Registry ──────────────────────────────────────────────────────────
|
||||
# Each entry: regex pattern -> service metadata
|
||||
SERVICE_REGISTRY: dict[str, dict] = {
|
||||
# --- Tracking & Analytics ---
|
||||
r"google.?analytics|gtag\(|UA-\d+|G-\w{5,}": {
|
||||
"id": "google_analytics", "name": "Google Analytics", "category": "tracking",
|
||||
"provider": "Google LLC", "country": "US", "eu_adequate": False,
|
||||
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, §25 TDDDG",
|
||||
},
|
||||
r"googletagmanager|gtm\.js": {
|
||||
"id": "google_tag_manager", "name": "Google Tag Manager", "category": "tracking",
|
||||
"provider": "Google LLC", "country": "US", "eu_adequate": False,
|
||||
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO",
|
||||
},
|
||||
r"facebook\.net/.*fbevents|fbq\(": {
|
||||
"id": "facebook_pixel", "name": "Meta/Facebook Pixel", "category": "marketing",
|
||||
"provider": "Meta Platforms", "country": "US", "eu_adequate": False,
|
||||
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, §25 TDDDG",
|
||||
},
|
||||
r"hotjar\.com|_hjSettings": {
|
||||
"id": "hotjar", "name": "Hotjar", "category": "tracking",
|
||||
"provider": "Hotjar Ltd", "country": "MT", "eu_adequate": True,
|
||||
"requires_consent": True, "legal_ref": "§25 TDDDG (Session Recording)",
|
||||
},
|
||||
r"clarity\.ms": {
|
||||
"id": "ms_clarity", "name": "Microsoft Clarity", "category": "tracking",
|
||||
"provider": "Microsoft", "country": "US", "eu_adequate": False,
|
||||
"requires_consent": True, "legal_ref": "§25 TDDDG (Session Replay), Art. 44 DSGVO",
|
||||
},
|
||||
r"matomo|piwik": {
|
||||
"id": "matomo", "name": "Matomo", "category": "tracking",
|
||||
"provider": "InnoCraft/Self-hosted", "country": "EU/Self", "eu_adequate": True,
|
||||
"requires_consent": False, "legal_ref": "Cookieless moeglich, §25 TDDDG",
|
||||
},
|
||||
r"plausible\.io": {
|
||||
"id": "plausible", "name": "Plausible Analytics", "category": "tracking",
|
||||
"provider": "Plausible Insights", "country": "EE", "eu_adequate": True,
|
||||
"requires_consent": False, "legal_ref": "EU-Anbieter, cookieless",
|
||||
},
|
||||
# --- CDN & Fonts ---
|
||||
r"fonts\.googleapis\.com|fonts\.gstatic\.com": {
|
||||
"id": "google_fonts", "name": "Google Fonts (remote)", "category": "cdn",
|
||||
"provider": "Google LLC", "country": "US", "eu_adequate": False,
|
||||
"requires_consent": True, "legal_ref": "LG Muenchen I, Az. 3 O 17493/20",
|
||||
},
|
||||
r"cdn\.cloudflare\.com|cdnjs\.cloudflare\.com": {
|
||||
"id": "cloudflare_cdn", "name": "Cloudflare CDN", "category": "cdn",
|
||||
"provider": "Cloudflare Inc", "country": "US", "eu_adequate": False,
|
||||
"requires_consent": False, "legal_ref": "Art. 44-49 DSGVO, berechtigtes Interesse",
|
||||
},
|
||||
# --- Chatbots ---
|
||||
r"widget\.intercom\.io|intercomcdn": {
|
||||
"id": "intercom", "name": "Intercom", "category": "chatbot",
|
||||
"provider": "Intercom Inc", "country": "US", "eu_adequate": False,
|
||||
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, KI-gestuetzt",
|
||||
},
|
||||
r"tidio\.co|tidioChatApi": {
|
||||
"id": "tidio", "name": "Tidio Chat", "category": "chatbot",
|
||||
"provider": "Tidio LLC", "country": "PL", "eu_adequate": True,
|
||||
"requires_consent": False, "legal_ref": "EU-Anbieter",
|
||||
},
|
||||
r"zendesk\.com/embeddable|zdassets": {
|
||||
"id": "zendesk", "name": "Zendesk", "category": "chatbot",
|
||||
"provider": "Zendesk Inc", "country": "US", "eu_adequate": False,
|
||||
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO",
|
||||
},
|
||||
# --- Payment ---
|
||||
r"js\.stripe\.com|stripe\.com/v3": {
|
||||
"id": "stripe", "name": "Stripe", "category": "payment",
|
||||
"provider": "Stripe Inc", "country": "US", "eu_adequate": False,
|
||||
"requires_consent": False, "legal_ref": "Art. 6(1)(b) Vertragserfuellung, SCCs",
|
||||
},
|
||||
r"paypal\.com/sdk|paypalobjects": {
|
||||
"id": "paypal", "name": "PayPal", "category": "payment",
|
||||
"provider": "PayPal Holdings", "country": "US", "eu_adequate": False,
|
||||
"requires_consent": False, "legal_ref": "Art. 6(1)(b) Vertragserfuellung",
|
||||
},
|
||||
r"klarna\.com|klarna-payments": {
|
||||
"id": "klarna", "name": "Klarna", "category": "payment",
|
||||
"provider": "Klarna AB", "country": "SE", "eu_adequate": True,
|
||||
"requires_consent": False, "legal_ref": "EU, aber Art. 22 DSGVO bei Bonitaetspruefung!",
|
||||
},
|
||||
# --- Captcha ---
|
||||
r"recaptcha|grecaptcha": {
|
||||
"id": "recaptcha", "name": "Google reCAPTCHA", "category": "other",
|
||||
"provider": "Google LLC", "country": "US", "eu_adequate": False,
|
||||
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, §25 TDDDG",
|
||||
},
|
||||
# --- Video ---
|
||||
r"youtube\.com/embed|youtube-nocookie|ytimg": {
|
||||
"id": "youtube", "name": "YouTube", "category": "other",
|
||||
"provider": "Google LLC", "country": "US", "eu_adequate": False,
|
||||
"requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, 2-Klick empfohlen",
|
||||
},
|
||||
# --- Consent Management ---
|
||||
r"didomi|cookiebot|onetrust|usercentrics|consentmanager|quantcast": {
|
||||
"id": "cmp", "name": "Consent Management Platform", "category": "other",
|
||||
"provider": "Various", "country": "EU", "eu_adequate": True,
|
||||
"requires_consent": False, "legal_ref": "CMP vorhanden — gut",
|
||||
},
|
||||
}
|
||||
# ── Service Registry (imported from master) ──────────────────────────────────
|
||||
from compliance.services.service_registry import SERVICE_REGISTRY # noqa: E402
|
||||
|
||||
AI_TEXT_PATTERNS = [
|
||||
r"k(?:ue|ü)nstliche.?intelligenz",
|
||||
|
||||
@@ -466,6 +466,48 @@ Risiko-Score | 15/100 | 45/100 | 20/100 | 55/100 |
|
||||
| 4 | Phase 6 | PDF-Export | Druckbare Reports fuer Management |
|
||||
| 4 | Phase 7 | Recurring Scans | Automatische Ueberwachung |
|
||||
| 5 | Phase 8 | Multi-Website Vergleich | Wettbewerber-Benchmark |
|
||||
| 6 | Phase 9 | Authenticated Testing | Login-Bereich pruefen (§312k, Art. 17, 20) |
|
||||
|
||||
---
|
||||
|
||||
## Phase 9: Authenticated Website Testing (P3, 2 Tage)
|
||||
|
||||
### Konzept
|
||||
|
||||
Ein DSB gibt seine eigenen Credentials im SDK ein. Playwright loggt sich ein
|
||||
und prueft den Kundenbereich auf Pflichtfunktionen:
|
||||
|
||||
### Pruefbare Rechte nach Login
|
||||
|
||||
| Pruefung | Rechtsgrundlage | Methode |
|
||||
|----------|----------------|---------|
|
||||
| Kuendigungsbutton (2 Klicks) | §312k BGB | Navigation suchen, Klicks zaehlen |
|
||||
| Konto loeschen | Art. 17 DSGVO | "Konto loeschen" Button suchen |
|
||||
| Daten exportieren | Art. 20 DSGVO | "Daten herunterladen" suchen |
|
||||
| Einwilligungen widerrufen | Art. 7(3) DSGVO | Consent-Einstellungen suchen |
|
||||
| Profildaten einsehen | Art. 15 DSGVO | Profil-/Kontobereich pruefen |
|
||||
|
||||
### Sicherheit
|
||||
|
||||
- Credentials werden NUR fuer die Dauer des Tests im Browser-Kontext gehalten
|
||||
- Kein Speichern in DB, kein Logging, kein Senden an Dritte
|
||||
- Nach Test: Browser-Kontext wird zerstoert, Credentials verworfen
|
||||
- HTTPS-only (kein HTTP-Login)
|
||||
|
||||
### Implementierung
|
||||
|
||||
- Erweiterung des `consent-tester` Service um Login-Flow
|
||||
- Neuer Tab im Frontend: "Authentifizierter Test"
|
||||
- Credential-Eingabe als einmalige Formularfelder (nicht gespeichert)
|
||||
- Screenshots als Belege fuer den Report
|
||||
|
||||
### Dateien
|
||||
|
||||
| Datei | LOC | Zweck |
|
||||
|-------|-----|-------|
|
||||
| `consent-tester/services/authenticated_scanner.py` | ~200 | Login + Kundenbereich-Checks |
|
||||
| `consent-tester/main.py` | +30 | Neuer /authenticated-scan Endpoint |
|
||||
| Frontend: AuthenticatedTestTab | ~150 | Credential-Eingabe + Ergebnis |
|
||||
|
||||
## Investoren-Demo Szenario
|
||||
|
||||
|
||||
Reference in New Issue
Block a user