2e87b74749
CI / detect-changes (push) Successful in 10s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / validate-canonical-controls (push) Successful in 15s
CI / nodejs-build (push) Successful in 2m35s
CI / test-go (push) Failing after 51s
CI / iace-gt-coverage (push) Successful in 27s
CI / loc-budget (push) Failing after 16s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-python-backend (push) Successful in 39s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
Drei zusammenhaengende Stufen 'Cookie-Verhalten ist anders als deklariert' — analog zum VW-Diesel-Skandal-Pattern (Pruefstand vs Realbetrieb). P103 (Stufe 3) — cookie_value_entropy.py: Klassifiziert Cookie-Werte als flag/short_id/long_token/uuid/hash/json_blob via Shannon-Entropy + Regex-Patterns. Wenn ein als 'essential' deklarierter Cookie einen 64-char-Base64-Wert hat → MEDIUM-Finding 'Defeat-Device-Heuristik'. P104 (Stufe 4) — cookie_network_tracer.py: Vergleicht Cookie-Domain mit Site-Hauptdomain + bekannten Tracker-Vendoren (50 Domains gemapped: doubleclick.net, facebook.com, demdex.net, omtrdc.net, adsrvr.org, hotjar.com, ...). Wenn ein als 'essential' deklariertes Cookie von externer Tracker-Domain gesetzt wird → HIGH. Drittland-Cookies werden als 'DRITTLAND US/CN/...' markiert (Schrems-II-Folge). P105 (Stufe 5) — tcf_vendor_authority.py: Ingest-Endpoint POST /api/compliance/agent/admin/tcf-ingest holt die IAB TCF v2 Global Vendor List (vendor-list.consensu.org/v3) und upserted sie in cookie_library mit source='iab_tcf_v2'. cross_reference_with_tcf fuzzy-matched cmp_vendors gegen die TCF-Liste — wenn Vendor in TCF als Marketing gefuehrt aber Site sagt 'Funktional' → HIGH (externe Authority widerspricht der Deklaration). Alle drei rendern eigene Mail-Bloecke im Bereich Cookies (nach cookie_audit_html, vor library_mismatch_html). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
217 lines
9.0 KiB
Python
217 lines
9.0 KiB
Python
"""
|
|
P104 — Cookie-Network-Tracing (Stufe 4).
|
|
|
|
cookies_detailed[i].domain zeigt welche Domain das Cookie via Set-Cookie
|
|
gesetzt hat. Wir vergleichen:
|
|
* Site-Hauptdomain vs Cookie-Domain → First-Party / Third-Party
|
|
* Cookie-Domain vs bekannte Vendoren → wer ist der echte Empfaenger
|
|
* Vendor-Land vs EU/Drittland → Drittland-Transfer-Hinweis
|
|
|
|
Defeat-Device-Pattern: "Funktional"-Cookie wird aber von doubleclick.net
|
|
gesetzt → das ist physisch ein Third-Party-Tracking-Cookie, kein
|
|
funktionales First-Party-Cookie.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from urllib.parse import urlparse
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# Vendor-Domain → bekannter Vendor + Land
|
|
_DOMAIN_VENDORS: dict[str, tuple[str, str]] = {
|
|
".doubleclick.net": ("Google DoubleClick", "US"),
|
|
".google.com": ("Google", "US"),
|
|
".google-analytics.com": ("Google Analytics", "US"),
|
|
".googletagmanager.com": ("Google Tag Manager", "US"),
|
|
".googleadservices.com": ("Google Ads", "US"),
|
|
".gstatic.com": ("Google CDN", "US"),
|
|
".facebook.com": ("Meta / Facebook", "US"),
|
|
".facebook.net": ("Meta / Facebook", "US"),
|
|
".instagram.com": ("Meta / Instagram", "US"),
|
|
".linkedin.com": ("LinkedIn (Microsoft)", "US"),
|
|
".pinterest.com": ("Pinterest", "US"),
|
|
".pinimg.com": ("Pinterest", "US"),
|
|
".tiktok.com": ("TikTok (ByteDance)", "CN"),
|
|
".bing.com": ("Microsoft Bing", "US"),
|
|
".clarity.ms": ("Microsoft Clarity", "US"),
|
|
".criteo.com": ("Criteo", "FR"),
|
|
".adnxs.com": ("AppNexus / Xandr", "US"),
|
|
".rubiconproject.com": ("Rubicon Project", "US"),
|
|
".pubmatic.com": ("PubMatic", "US"),
|
|
".adobedtm.com": ("Adobe DTM", "US"),
|
|
".adobetarget.com": ("Adobe Target", "US"),
|
|
".demdex.net": ("Adobe Experience Cloud", "US"),
|
|
".omtrdc.net": ("Adobe Analytics", "US"),
|
|
".everesttech.net": ("Adobe Advertising Cloud", "US"),
|
|
".2o7.net": ("Adobe Analytics", "US"),
|
|
".adform.net": ("AdForm", "DK"),
|
|
".trade-desk.com": ("The Trade Desk", "US"),
|
|
".tradedesk.com": ("The Trade Desk", "US"),
|
|
".adsrvr.org": ("The Trade Desk", "US"),
|
|
".hotjar.com": ("Hotjar", "MT"),
|
|
".matomo.cloud": ("Matomo", "DE"),
|
|
".etracker.com": ("etracker", "DE"),
|
|
".etracker.de": ("etracker", "DE"),
|
|
".cloudflare.com": ("Cloudflare", "US"),
|
|
".cookielaw.org": ("OneTrust", "US"),
|
|
".cookiebot.com": ("Cookiebot (Cybot)", "DK"),
|
|
".usercentrics.eu": ("Usercentrics", "DE"),
|
|
".usercentrics.com": ("Usercentrics", "DE"),
|
|
".consensu.org": ("IAB Europe TCF", "BE"),
|
|
".datadoghq.eu": ("Datadog", "US"),
|
|
".datadoghq.com": ("Datadog", "US"),
|
|
".datadome.co": ("DataDome", "FR"),
|
|
".incapsula.com": ("Imperva Incapsula", "US"),
|
|
".imperva.com": ("Imperva", "US"),
|
|
".akamai.net": ("Akamai", "US"),
|
|
".akamaiedge.net": ("Akamai", "US"),
|
|
".salesforce.com": ("Salesforce", "US"),
|
|
".force.com": ("Salesforce", "US"),
|
|
}
|
|
|
|
_NON_EU_COUNTRIES = {"US", "CN", "RU", "IN", "JP", "BR", "AU"}
|
|
|
|
|
|
def _registrable_domain(host: str) -> str:
|
|
"""vw.de von www.vw.de oder bla.vw.de oder vw.de"""
|
|
h = (host or "").lstrip(".").lower()
|
|
parts = h.split(".")
|
|
if len(parts) >= 2:
|
|
return ".".join(parts[-2:])
|
|
return h
|
|
|
|
|
|
def _lookup_vendor_by_domain(cookie_domain: str) -> tuple[str, str] | None:
|
|
if not cookie_domain:
|
|
return None
|
|
cd = cookie_domain.lower()
|
|
if not cd.startswith("."):
|
|
cd = "." + cd
|
|
for suffix, (vendor, country) in _DOMAIN_VENDORS.items():
|
|
if cd.endswith(suffix):
|
|
return (vendor, country)
|
|
return None
|
|
|
|
|
|
def trace_cookie_network(
|
|
cookies_detailed: list[dict] | None,
|
|
site_url: str | None = None,
|
|
) -> list[dict]:
|
|
"""Liefert Findings fuer Cookies die von externer/Drittland-Domain
|
|
gesetzt werden waehrend sie als First-Party / essential deklariert sind."""
|
|
if not cookies_detailed:
|
|
return []
|
|
site_host = ""
|
|
if site_url:
|
|
try:
|
|
site_host = _registrable_domain(urlparse(site_url).netloc)
|
|
except Exception:
|
|
site_host = ""
|
|
|
|
out: list[dict] = []
|
|
for ck in cookies_detailed:
|
|
if not isinstance(ck, dict):
|
|
continue
|
|
name = (ck.get("name") or "").strip()
|
|
domain = (ck.get("domain") or "").strip()
|
|
declared = (ck.get("declared_category") or "").lower().strip()
|
|
if not name or not domain:
|
|
continue
|
|
|
|
cookie_reg = _registrable_domain(domain)
|
|
is_third_party = bool(site_host and cookie_reg != site_host)
|
|
vendor_match = _lookup_vendor_by_domain(domain)
|
|
|
|
if not vendor_match and not is_third_party:
|
|
continue
|
|
|
|
# Defeat-Device-Pattern: essential/functional + Third-Party
|
|
if declared in ("essential", "functional", "necessary") and is_third_party:
|
|
sev = "HIGH" if vendor_match else "MEDIUM"
|
|
vendor_name = vendor_match[0] if vendor_match else cookie_reg
|
|
country = vendor_match[1] if vendor_match else ""
|
|
third_country = country in _NON_EU_COUNTRIES
|
|
out.append({
|
|
"cookie": name,
|
|
"declared": declared,
|
|
"cookie_domain": domain,
|
|
"site_domain": site_host,
|
|
"vendor": vendor_name,
|
|
"vendor_country": country,
|
|
"third_country": third_country,
|
|
"severity": sev,
|
|
"label": (
|
|
f"Cookie '{name}' deklariert als '{declared}', "
|
|
f"wird aber von externer Domain "
|
|
f"<strong>{vendor_name}</strong> "
|
|
f"({domain}) gesetzt"
|
|
+ (f" — Drittland: {country}" if third_country else "")
|
|
),
|
|
})
|
|
elif vendor_match and declared in ("essential", "functional", "necessary"):
|
|
# Auch wenn First-Party-Cookie aber bekannter Tracker-Vendor →
|
|
# Mismatch (z.B. Google Tag Manager kann via CNAME als
|
|
# First-Party erscheinen)
|
|
out.append({
|
|
"cookie": name,
|
|
"declared": declared,
|
|
"cookie_domain": domain,
|
|
"vendor": vendor_match[0],
|
|
"vendor_country": vendor_match[1],
|
|
"third_country": vendor_match[1] in _NON_EU_COUNTRIES,
|
|
"severity": "MEDIUM",
|
|
"label": (
|
|
f"Cookie '{name}' deklariert als '{declared}', "
|
|
f"Domain {domain} gehoert aber zu "
|
|
f"<strong>{vendor_match[0]}</strong> "
|
|
f"({vendor_match[1]})"
|
|
),
|
|
})
|
|
return out
|
|
|
|
|
|
def build_network_trace_block_html(findings: list[dict]) -> str:
|
|
if not findings:
|
|
return ""
|
|
n_third = sum(1 for f in findings if f.get("third_country"))
|
|
items: list[str] = []
|
|
for f in findings[:30]:
|
|
sev_color = "#dc2626" if f["severity"] == "HIGH" else "#d97706"
|
|
country_flag = ""
|
|
if f.get("third_country"):
|
|
country_flag = (
|
|
f' <span style="background:#fee2e2;color:#991b1b;'
|
|
f'padding:1px 5px;border-radius:8px;font-size:9px;'
|
|
f'font-weight:600">DRITTLAND {f.get("vendor_country","")}</span>'
|
|
)
|
|
items.append(
|
|
f'<li style="margin-bottom:6px;font-size:11px;line-height:1.5;'
|
|
f'color:{sev_color}">{f["label"]}{country_flag}</li>'
|
|
)
|
|
return (
|
|
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
|
'max-width:760px;margin:0 auto 16px;padding:14px 18px;'
|
|
'background:#fff7ed;border:1px solid #fed7aa;border-radius:8px">'
|
|
'<div style="font-size:11px;color:#9a3412;text-transform:uppercase;'
|
|
'letter-spacing:1.2px;margin-bottom:4px;font-weight:600">'
|
|
'Cookie-Netzwerk-Verhalten (Defeat-Device-Heuristik)</div>'
|
|
f'<h3 style="margin:0 0 6px;font-size:14px;color:#1e293b">'
|
|
f'{len(findings)} Cookie{"s" if len(findings) != 1 else ""} '
|
|
f'mit Vendor-Domain-Diskrepanz'
|
|
f'{f" — davon {n_third} mit Drittland-Transfer" if n_third else ""}'
|
|
f'</h3>'
|
|
'<p style="margin:0 0 10px;font-size:11px;color:#475569;line-height:1.5">'
|
|
'Diese Cookies sind als "essential" oder "funktional" deklariert, '
|
|
'werden aber von einer externen Domain gesetzt — typisch fuer '
|
|
'getarnte Tracker. Drittland-Markierungen sind besonders kritisch: '
|
|
'sie loesen Pflichten nach Art. 44-49 DSGVO aus (SCC / Angemessen-'
|
|
'heitsbeschluss / Schrems II Folge-Massnahmen).'
|
|
'</p>'
|
|
'<ul style="margin:0 0 0 18px;padding:0">'
|
|
+ "".join(items) +
|
|
'</ul></div>'
|
|
)
|