""" P6 + P53 + P55 — OEM-Cross-Industry-Library mit Autonomes Profiling. Vereinheitlicht 3 verwandte Themen: * P6 — Branchen-Knowledge-Base: was ist branchen-spezifisch (Automotive hat eCall, eHealth hat Patientendaten, Finance hat MaRisk). * P53 — OEM-Site-Profile-Library: bekannte Pattern pro OEM-Site (Mercedes hat cmm-cookie-banner, BMW hat ePaaS, VW hat cookiemgmt, Audi blocked Akamai 503). * P55 — Autonomes Profiling: bei jedem Lauf lernen wir Pattern dazu und persistieren sie in der Library. Backend-Service: Lookup-API + Auto-Lern-Hook bei jedem Snapshot-Save. """ from __future__ import annotations import json import logging import os from typing import Iterable from sqlalchemy import text as sa_text from sqlalchemy.orm import Session logger = logging.getLogger(__name__) # Branchen-spezifische zusaetzliche Compliance-Themen _INDUSTRY_PROFILES: dict[str, dict] = { "automotive": { "mandatory_regulations": [ "DSGVO", "TDDDG", "VO 2015/758 (eCall)", "VO 2018/858 (Typgenehmigung)", "VO 2019/2144 (Allgemeine Sicherheit)", "Cyber Security UN-R 155", "Software Update UN-R 156", ], "typical_cookie_vendors": [ "Adobe Analytics", "Adobe Target", "Salesforce LiveAgent", "AdForm", "The Trade Desk", "Google Marketing Platform", "Inbenta", "Datadog RUM", ], "vvt_required_processes": [ "Probefahrten-Buchung", "Haendler-Suche", "eCall-System", "We Connect / Connected Drive Services", "Konfigurator-Daten", ], "special_findings_to_watch": [ "eCall ohne Hinweis in DSE = Verstoss VO 2015/758 Art. 6(4)", "Connected-Car-Telemetrie ohne Einwilligung", "Haendler-Weitergabe nicht erwaehnt (Art. 13(1)(e))", ], }, "ecommerce": { "mandatory_regulations": [ "DSGVO", "TDDDG", "Fernabsatzgesetz", "Verbraucherrechterichtlinie (EU 2011/83)", "Geo-Blocking-Verordnung (EU 2018/302)", ], "typical_cookie_vendors": [ "Google Analytics", "Google Ads", "Meta Pixel", "Pinterest", "TikTok", "Criteo", "AppNexus", "Klaviyo", "Hotjar", ], "vvt_required_processes": [ "Bestellung", "Zahlung", "Versand", "Retoure", "Newsletter", "Account-Verwaltung", ], "special_findings_to_watch": [ "Widerrufsbelehrung muss 14-Tage-Frist + Wertersatz nennen", "Muster-Widerrufsformular als Anlage Pflicht", "Kundenkonto-Loeschung muss in DSR-Prozess sein", ], }, "saas": { "mandatory_regulations": [ "DSGVO", "TDDDG", "AI Act (wenn KI-Features)", "NIS-2 (wenn kritische Infrastruktur)", ], "typical_cookie_vendors": [ "Segment", "Amplitude", "Mixpanel", "Hotjar", "Intercom", "HubSpot", "Salesforce", "Stripe", ], "vvt_required_processes": [ "Login / Auth", "Trial-Signup", "Abrechnung", "Support-Tickets", "Telemetry / Usage-Analytics", ], "special_findings_to_watch": [ "B2B-AVV (Art. 28) statt Endkunden-DSE", "Sub-Prozessor-Liste muss vollstaendig sein", "Drittland (USA-Hosting) erfordert SCC + TIA", ], }, "banking": { "mandatory_regulations": [ "DSGVO", "TDDDG", "PSD2 (Payment Services Directive)", "MaRisk", "BAIT (BaFin)", "KWG", "GwG", ], "typical_cookie_vendors": [ "Adobe Analytics", "Glassbox", "ContentSquare", "Decibel", "Qualtrics", ], "vvt_required_processes": [ "Kontoeroeffnung", "Zahlungsverkehr", "Kreditpruefung", "Geldwaesche-Pruefung (GwG)", "Schufa-Anfrage", ], "special_findings_to_watch": [ "PSD2 Strong-Customer-Authentication Pflicht", "Bankgeheimnis = zusaetzlicher Schutz", "GwG-Pflicht-Identifikation erfordert spezielle DSE-Klausel", ], }, "healthcare": { "mandatory_regulations": [ "DSGVO Art. 9 (Gesundheitsdaten)", "Medizinprodukteverordnung (MDR)", "Patientendaten-Schutzgesetz (PDSG)", "DiGAV (Digitale-Gesundheitsanwendungen-Verordnung)", ], "typical_cookie_vendors": [ "Sehr restriktiv — i.d.R. nur essential", ], "vvt_required_processes": [ "Termin-Vereinbarung", "Anamnese-Bogen", "Befund-Versand", "ePA-Anbindung", ], "special_findings_to_watch": [ "Art. 9 DSGVO erfordert ausdrueckliche Einwilligung", "Schweigepflicht §203 StGB", "Drittland-Transfer fast immer unzulaessig", ], }, } def lookup_industry_profile(industry: str | None) -> dict | None: """Liefert das Branchenprofil oder None.""" if not industry: return None return _INDUSTRY_PROFILES.get(industry.lower()) # Site-Profile (gelernt aus vorherigen Snapshots) def load_site_profile(db: Session, site_domain: str) -> dict | None: """Liefert gespeichertes Profil fuer eine Site (CMP-Provider, bekannte Quirks etc.) oder None.""" if not site_domain: return None try: row = db.execute(sa_text( """ SELECT banner_provider, jsonb_array_length(coalesce(cmp_vendors, jsonb_build_array())) AS n_vendors, created_at FROM compliance.compliance_check_snapshots WHERE site_domain = :dom ORDER BY created_at DESC LIMIT 5 """ ), {"dom": site_domain}).fetchall() except Exception: return None if not row: return None providers = [r[0] for r in row if r[0]] vendor_counts = [r[1] for r in row if r[1] is not None] if not providers: return None # Most common provider from collections import Counter common_provider = Counter(providers).most_common(1)[0][0] avg_vendors = sum(vendor_counts) // max(1, len(vendor_counts)) return { "site_domain": site_domain, "common_provider": common_provider, "avg_vendor_count": avg_vendors, "historical_runs": len(row), "last_run": row[0][2].isoformat() if row[0][2] else None, } def build_industry_context_block_html( industry: str | None, site_profile: dict | None, ) -> str: """Eingangsblock in der Mail: 'Was wir in dieser Branche pruefen sollten' + 'Was wir ueber diese Site schon wissen'.""" parts: list[str] = [] profile = lookup_industry_profile(industry) if profile: regs = ", ".join(profile.get("mandatory_regulations", [])[:6]) watches = profile.get("special_findings_to_watch", [])[:3] watch_html = "".join( f'
' f'Geltende Spezial-Regulierungen: {regs}' f'
' f'