diff --git a/admin-compliance/app/sdk/sdk-flow/steps-betrieb.ts b/admin-compliance/app/sdk/sdk-flow/steps-betrieb.ts index ac7386e..41b9872 100644 --- a/admin-compliance/app/sdk/sdk-flow/steps-betrieb.ts +++ b/admin-compliance/app/sdk/sdk-flow/steps-betrieb.ts @@ -341,4 +341,26 @@ export const STEPS_BETRIEB: SDKFlowStep[] = [ url: '/sdk/control-library', completion: 100, }, + { + id: 'compliance-agent', + name: 'Compliance Agent', + nameShort: 'Agent', + package: 'betrieb', + seq: 5000, + checkpointId: 'CP-AGENT', + checkpointType: 'OPTIONAL', + checkpointReviewer: 'NONE', + description: 'Automatische Website-Analyse auf DSGVO-Konformitaet mit 3 Modi: Schnellanalyse, Website-Scan und Cookie-Consent-Test.', + descriptionLong: 'Der Compliance Agent analysiert Websites und Dokumente automatisch auf DSGVO-Konformitaet. Drei Modi: (1) Schnellanalyse — einzelne URL klassifizieren und bewerten via Qwen LLM + UCCA Assessment. (2) Website-Scan — 5-10 Unterseiten crawlen, 82 Drittanbieter-Dienste erkennen, SOLL/IST-Abgleich gegen Datenschutzerklaerung, Pflichtinhalte pruefen (Art. 13 DSGVO, §5 TMG). (3) Cookie-Consent-Test — Playwright Headless Browser testet was VOR und NACH Cookie-Einwilligung geladen wird (§25 TDDDG). Pre-Launch-Modus fuer interne Dokumente mit einbaufertigen Korrekturvorschlaegen. Post-Launch-Modus mit Abmahnrisiko-Warnungen. Textblock-Referenzierung zeigt Originaltext, Position in der DSE und Korrekturvorschlag. Email-Benachrichtigung an zustaendige Rolle.', + legalBasis: 'Art. 5, 13, 25 DSGVO, §5 TMG, §25 TDDDG, §312k BGB', + inputs: [], + outputs: ['scanResults', 'findings', 'corrections'], + prerequisiteSteps: [], + dbTables: [], + dbMode: 'none', + ragCollections: [], + isOptional: true, + url: '/sdk/agent', + completion: 80, + }, ] diff --git a/backend-compliance/compliance/api/agent_scan_routes.py b/backend-compliance/compliance/api/agent_scan_routes.py index 935656c..6554828 100644 --- a/backend-compliance/compliance/api/agent_scan_routes.py +++ b/backend-compliance/compliance/api/agent_scan_routes.py @@ -18,6 +18,9 @@ from compliance.services.dse_service_extractor import extract_dse_services, comp from compliance.services.smtp_sender import send_email from compliance.services.dse_parser import parse_dse from compliance.services.dse_matcher import build_text_references, TextReference +from compliance.services.mandatory_content_checker import ( + check_mandatory_documents, check_dse_mandatory_content, MandatoryFinding, +) logger = logging.getLogger(__name__) @@ -120,7 +123,16 @@ async def scan_website_endpoint(req: ScanRequest): # Step 7: Generate findings with text references services_info, findings = _build_findings(comparison, scan, is_live, text_refs) - # Step 8: Generate corrections for pre-launch mode + # Step 8: Check mandatory content (documents + DSE sections) + mandatory_findings = check_mandatory_documents(scan.pages_scanned, scan.missing_pages) + mandatory_findings += check_dse_mandatory_content(dse_sections, dse_text) + for mf in mandatory_findings: + findings.append(ScanFinding( + code=mf.code, severity=mf.severity, + text=f"{mf.text}" + (f" — {mf.suggestion}" if mf.suggestion else ""), + )) + + # Step 9: Generate corrections for pre-launch mode if not is_live and findings: await _add_corrections(findings, dse_text) diff --git a/backend-compliance/compliance/services/website_scanner.py b/backend-compliance/compliance/services/website_scanner.py index 18256a7..795f0ed 100644 --- a/backend-compliance/compliance/services/website_scanner.py +++ b/backend-compliance/compliance/services/website_scanner.py @@ -40,107 +40,8 @@ class ScanResult: missing_pages: dict = field(default_factory=dict) # url -> status_code -# ── Service Registry ────────────────────────────────────────────────────────── -# Each entry: regex pattern -> service metadata -SERVICE_REGISTRY: dict[str, dict] = { - # --- Tracking & Analytics --- - r"google.?analytics|gtag\(|UA-\d+|G-\w{5,}": { - "id": "google_analytics", "name": "Google Analytics", "category": "tracking", - "provider": "Google LLC", "country": "US", "eu_adequate": False, - "requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, §25 TDDDG", - }, - r"googletagmanager|gtm\.js": { - "id": "google_tag_manager", "name": "Google Tag Manager", "category": "tracking", - "provider": "Google LLC", "country": "US", "eu_adequate": False, - "requires_consent": True, "legal_ref": "Art. 44-49 DSGVO", - }, - r"facebook\.net/.*fbevents|fbq\(": { - "id": "facebook_pixel", "name": "Meta/Facebook Pixel", "category": "marketing", - "provider": "Meta Platforms", "country": "US", "eu_adequate": False, - "requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, §25 TDDDG", - }, - r"hotjar\.com|_hjSettings": { - "id": "hotjar", "name": "Hotjar", "category": "tracking", - "provider": "Hotjar Ltd", "country": "MT", "eu_adequate": True, - "requires_consent": True, "legal_ref": "§25 TDDDG (Session Recording)", - }, - r"clarity\.ms": { - "id": "ms_clarity", "name": "Microsoft Clarity", "category": "tracking", - "provider": "Microsoft", "country": "US", "eu_adequate": False, - "requires_consent": True, "legal_ref": "§25 TDDDG (Session Replay), Art. 44 DSGVO", - }, - r"matomo|piwik": { - "id": "matomo", "name": "Matomo", "category": "tracking", - "provider": "InnoCraft/Self-hosted", "country": "EU/Self", "eu_adequate": True, - "requires_consent": False, "legal_ref": "Cookieless moeglich, §25 TDDDG", - }, - r"plausible\.io": { - "id": "plausible", "name": "Plausible Analytics", "category": "tracking", - "provider": "Plausible Insights", "country": "EE", "eu_adequate": True, - "requires_consent": False, "legal_ref": "EU-Anbieter, cookieless", - }, - # --- CDN & Fonts --- - r"fonts\.googleapis\.com|fonts\.gstatic\.com": { - "id": "google_fonts", "name": "Google Fonts (remote)", "category": "cdn", - "provider": "Google LLC", "country": "US", "eu_adequate": False, - "requires_consent": True, "legal_ref": "LG Muenchen I, Az. 3 O 17493/20", - }, - r"cdn\.cloudflare\.com|cdnjs\.cloudflare\.com": { - "id": "cloudflare_cdn", "name": "Cloudflare CDN", "category": "cdn", - "provider": "Cloudflare Inc", "country": "US", "eu_adequate": False, - "requires_consent": False, "legal_ref": "Art. 44-49 DSGVO, berechtigtes Interesse", - }, - # --- Chatbots --- - r"widget\.intercom\.io|intercomcdn": { - "id": "intercom", "name": "Intercom", "category": "chatbot", - "provider": "Intercom Inc", "country": "US", "eu_adequate": False, - "requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, KI-gestuetzt", - }, - r"tidio\.co|tidioChatApi": { - "id": "tidio", "name": "Tidio Chat", "category": "chatbot", - "provider": "Tidio LLC", "country": "PL", "eu_adequate": True, - "requires_consent": False, "legal_ref": "EU-Anbieter", - }, - r"zendesk\.com/embeddable|zdassets": { - "id": "zendesk", "name": "Zendesk", "category": "chatbot", - "provider": "Zendesk Inc", "country": "US", "eu_adequate": False, - "requires_consent": True, "legal_ref": "Art. 44-49 DSGVO", - }, - # --- Payment --- - r"js\.stripe\.com|stripe\.com/v3": { - "id": "stripe", "name": "Stripe", "category": "payment", - "provider": "Stripe Inc", "country": "US", "eu_adequate": False, - "requires_consent": False, "legal_ref": "Art. 6(1)(b) Vertragserfuellung, SCCs", - }, - r"paypal\.com/sdk|paypalobjects": { - "id": "paypal", "name": "PayPal", "category": "payment", - "provider": "PayPal Holdings", "country": "US", "eu_adequate": False, - "requires_consent": False, "legal_ref": "Art. 6(1)(b) Vertragserfuellung", - }, - r"klarna\.com|klarna-payments": { - "id": "klarna", "name": "Klarna", "category": "payment", - "provider": "Klarna AB", "country": "SE", "eu_adequate": True, - "requires_consent": False, "legal_ref": "EU, aber Art. 22 DSGVO bei Bonitaetspruefung!", - }, - # --- Captcha --- - r"recaptcha|grecaptcha": { - "id": "recaptcha", "name": "Google reCAPTCHA", "category": "other", - "provider": "Google LLC", "country": "US", "eu_adequate": False, - "requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, §25 TDDDG", - }, - # --- Video --- - r"youtube\.com/embed|youtube-nocookie|ytimg": { - "id": "youtube", "name": "YouTube", "category": "other", - "provider": "Google LLC", "country": "US", "eu_adequate": False, - "requires_consent": True, "legal_ref": "Art. 44-49 DSGVO, 2-Klick empfohlen", - }, - # --- Consent Management --- - r"didomi|cookiebot|onetrust|usercentrics|consentmanager|quantcast": { - "id": "cmp", "name": "Consent Management Platform", "category": "other", - "provider": "Various", "country": "EU", "eu_adequate": True, - "requires_consent": False, "legal_ref": "CMP vorhanden — gut", - }, -} +# ── Service Registry (imported from master) ────────────────────────────────── +from compliance.services.service_registry import SERVICE_REGISTRY # noqa: E402 AI_TEXT_PATTERNS = [ r"k(?:ue|ü)nstliche.?intelligenz", diff --git a/zeroclaw/PLAN-compliance-agent-product.md b/zeroclaw/PLAN-compliance-agent-product.md index 1709d2f..166a163 100644 --- a/zeroclaw/PLAN-compliance-agent-product.md +++ b/zeroclaw/PLAN-compliance-agent-product.md @@ -466,6 +466,48 @@ Risiko-Score | 15/100 | 45/100 | 20/100 | 55/100 | | 4 | Phase 6 | PDF-Export | Druckbare Reports fuer Management | | 4 | Phase 7 | Recurring Scans | Automatische Ueberwachung | | 5 | Phase 8 | Multi-Website Vergleich | Wettbewerber-Benchmark | +| 6 | Phase 9 | Authenticated Testing | Login-Bereich pruefen (§312k, Art. 17, 20) | + +--- + +## Phase 9: Authenticated Website Testing (P3, 2 Tage) + +### Konzept + +Ein DSB gibt seine eigenen Credentials im SDK ein. Playwright loggt sich ein +und prueft den Kundenbereich auf Pflichtfunktionen: + +### Pruefbare Rechte nach Login + +| Pruefung | Rechtsgrundlage | Methode | +|----------|----------------|---------| +| Kuendigungsbutton (2 Klicks) | §312k BGB | Navigation suchen, Klicks zaehlen | +| Konto loeschen | Art. 17 DSGVO | "Konto loeschen" Button suchen | +| Daten exportieren | Art. 20 DSGVO | "Daten herunterladen" suchen | +| Einwilligungen widerrufen | Art. 7(3) DSGVO | Consent-Einstellungen suchen | +| Profildaten einsehen | Art. 15 DSGVO | Profil-/Kontobereich pruefen | + +### Sicherheit + +- Credentials werden NUR fuer die Dauer des Tests im Browser-Kontext gehalten +- Kein Speichern in DB, kein Logging, kein Senden an Dritte +- Nach Test: Browser-Kontext wird zerstoert, Credentials verworfen +- HTTPS-only (kein HTTP-Login) + +### Implementierung + +- Erweiterung des `consent-tester` Service um Login-Flow +- Neuer Tab im Frontend: "Authentifizierter Test" +- Credential-Eingabe als einmalige Formularfelder (nicht gespeichert) +- Screenshots als Belege fuer den Report + +### Dateien + +| Datei | LOC | Zweck | +|-------|-----|-------| +| `consent-tester/services/authenticated_scanner.py` | ~200 | Login + Kundenbereich-Checks | +| `consent-tester/main.py` | +30 | Neuer /authenticated-scan Endpoint | +| Frontend: AuthenticatedTestTab | ~150 | Credential-Eingabe + Ergebnis | ## Investoren-Demo Szenario