""" P104 — Cookie-Network-Tracing (Stufe 4). cookies_detailed[i].domain zeigt welche Domain das Cookie via Set-Cookie gesetzt hat. Wir vergleichen: * Site-Hauptdomain vs Cookie-Domain → First-Party / Third-Party * Cookie-Domain vs bekannte Vendoren → wer ist der echte Empfaenger * Vendor-Land vs EU/Drittland → Drittland-Transfer-Hinweis Defeat-Device-Pattern: "Funktional"-Cookie wird aber von doubleclick.net gesetzt → das ist physisch ein Third-Party-Tracking-Cookie, kein funktionales First-Party-Cookie. """ from __future__ import annotations import logging from urllib.parse import urlparse logger = logging.getLogger(__name__) # Vendor-Domain → bekannter Vendor + Land _DOMAIN_VENDORS: dict[str, tuple[str, str]] = { ".doubleclick.net": ("Google DoubleClick", "US"), ".google.com": ("Google", "US"), ".google-analytics.com": ("Google Analytics", "US"), ".googletagmanager.com": ("Google Tag Manager", "US"), ".googleadservices.com": ("Google Ads", "US"), ".gstatic.com": ("Google CDN", "US"), ".facebook.com": ("Meta / Facebook", "US"), ".facebook.net": ("Meta / Facebook", "US"), ".instagram.com": ("Meta / Instagram", "US"), ".linkedin.com": ("LinkedIn (Microsoft)", "US"), ".pinterest.com": ("Pinterest", "US"), ".pinimg.com": ("Pinterest", "US"), ".tiktok.com": ("TikTok (ByteDance)", "CN"), ".bing.com": ("Microsoft Bing", "US"), ".clarity.ms": ("Microsoft Clarity", "US"), ".criteo.com": ("Criteo", "FR"), ".adnxs.com": ("AppNexus / Xandr", "US"), ".rubiconproject.com": ("Rubicon Project", "US"), ".pubmatic.com": ("PubMatic", "US"), ".adobedtm.com": ("Adobe DTM", "US"), ".adobetarget.com": ("Adobe Target", "US"), ".demdex.net": ("Adobe Experience Cloud", "US"), ".omtrdc.net": ("Adobe Analytics", "US"), ".everesttech.net": ("Adobe Advertising Cloud", "US"), ".2o7.net": ("Adobe Analytics", "US"), ".adform.net": ("AdForm", "DK"), ".trade-desk.com": ("The Trade Desk", "US"), ".tradedesk.com": ("The Trade Desk", "US"), ".adsrvr.org": ("The Trade Desk", "US"), ".hotjar.com": ("Hotjar", "MT"), ".matomo.cloud": ("Matomo", "DE"), ".etracker.com": ("etracker", "DE"), ".etracker.de": ("etracker", "DE"), ".cloudflare.com": ("Cloudflare", "US"), ".cookielaw.org": ("OneTrust", "US"), ".cookiebot.com": ("Cookiebot (Cybot)", "DK"), ".usercentrics.eu": ("Usercentrics", "DE"), ".usercentrics.com": ("Usercentrics", "DE"), ".consensu.org": ("IAB Europe TCF", "BE"), ".datadoghq.eu": ("Datadog", "US"), ".datadoghq.com": ("Datadog", "US"), ".datadome.co": ("DataDome", "FR"), ".incapsula.com": ("Imperva Incapsula", "US"), ".imperva.com": ("Imperva", "US"), ".akamai.net": ("Akamai", "US"), ".akamaiedge.net": ("Akamai", "US"), ".salesforce.com": ("Salesforce", "US"), ".force.com": ("Salesforce", "US"), } _NON_EU_COUNTRIES = {"US", "CN", "RU", "IN", "JP", "BR", "AU"} def _registrable_domain(host: str) -> str: """vw.de von www.vw.de oder bla.vw.de oder vw.de""" h = (host or "").lstrip(".").lower() parts = h.split(".") if len(parts) >= 2: return ".".join(parts[-2:]) return h def _lookup_vendor_by_domain(cookie_domain: str) -> tuple[str, str] | None: if not cookie_domain: return None cd = cookie_domain.lower() if not cd.startswith("."): cd = "." + cd for suffix, (vendor, country) in _DOMAIN_VENDORS.items(): if cd.endswith(suffix): return (vendor, country) return None def trace_cookie_network( cookies_detailed: list[dict] | None, site_url: str | None = None, ) -> list[dict]: """Liefert Findings fuer Cookies die von externer/Drittland-Domain gesetzt werden waehrend sie als First-Party / essential deklariert sind.""" if not cookies_detailed: return [] site_host = "" if site_url: try: site_host = _registrable_domain(urlparse(site_url).netloc) except Exception: site_host = "" out: list[dict] = [] for ck in cookies_detailed: if not isinstance(ck, dict): continue name = (ck.get("name") or "").strip() domain = (ck.get("domain") or "").strip() declared = (ck.get("declared_category") or "").lower().strip() if not name or not domain: continue cookie_reg = _registrable_domain(domain) is_third_party = bool(site_host and cookie_reg != site_host) vendor_match = _lookup_vendor_by_domain(domain) if not vendor_match and not is_third_party: continue # Defeat-Device-Pattern: essential/functional + Third-Party if declared in ("essential", "functional", "necessary") and is_third_party: sev = "HIGH" if vendor_match else "MEDIUM" vendor_name = vendor_match[0] if vendor_match else cookie_reg country = vendor_match[1] if vendor_match else "" third_country = country in _NON_EU_COUNTRIES out.append({ "cookie": name, "declared": declared, "cookie_domain": domain, "site_domain": site_host, "vendor": vendor_name, "vendor_country": country, "third_country": third_country, "severity": sev, "label": ( f"Cookie '{name}' deklariert als '{declared}', " f"wird aber von externer Domain " f"{vendor_name} " f"({domain}) gesetzt" + (f" — Drittland: {country}" if third_country else "") ), }) elif vendor_match and declared in ("essential", "functional", "necessary"): # Auch wenn First-Party-Cookie aber bekannter Tracker-Vendor → # Mismatch (z.B. Google Tag Manager kann via CNAME als # First-Party erscheinen) out.append({ "cookie": name, "declared": declared, "cookie_domain": domain, "vendor": vendor_match[0], "vendor_country": vendor_match[1], "third_country": vendor_match[1] in _NON_EU_COUNTRIES, "severity": "MEDIUM", "label": ( f"Cookie '{name}' deklariert als '{declared}', " f"Domain {domain} gehoert aber zu " f"{vendor_match[0]} " f"({vendor_match[1]})" ), }) return out def build_network_trace_block_html(findings: list[dict]) -> str: if not findings: return "" n_third = sum(1 for f in findings if f.get("third_country")) items: list[str] = [] for f in findings[:30]: sev_color = "#dc2626" if f["severity"] == "HIGH" else "#d97706" country_flag = "" if f.get("third_country"): country_flag = ( f' DRITTLAND {f.get("vendor_country","")}' ) items.append( f'
  • {f["label"]}{country_flag}
  • ' ) return ( '
    ' '
    ' 'Cookie-Netzwerk-Verhalten (Defeat-Device-Heuristik)
    ' f'

    ' f'{len(findings)} Cookie{"s" if len(findings) != 1 else ""} ' f'mit Vendor-Domain-Diskrepanz' f'{f" — davon {n_third} mit Drittland-Transfer" if n_third else ""}' f'

    ' '

    ' 'Diese Cookies sind als "essential" oder "funktional" deklariert, ' 'werden aber von einer externen Domain gesetzt — typisch fuer ' 'getarnte Tracker. Drittland-Markierungen sind besonders kritisch: ' 'sie loesen Pflichten nach Art. 44-49 DSGVO aus (SCC / Angemessen-' 'heitsbeschluss / Schrems II Folge-Massnahmen).' '

    ' '
    ' )