2e87b74749
CI / detect-changes (push) Successful in 10s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / validate-canonical-controls (push) Successful in 15s
CI / nodejs-build (push) Successful in 2m35s
CI / test-go (push) Failing after 51s
CI / iace-gt-coverage (push) Successful in 27s
CI / loc-budget (push) Failing after 16s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-python-backend (push) Successful in 39s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
Drei zusammenhaengende Stufen 'Cookie-Verhalten ist anders als deklariert' — analog zum VW-Diesel-Skandal-Pattern (Pruefstand vs Realbetrieb). P103 (Stufe 3) — cookie_value_entropy.py: Klassifiziert Cookie-Werte als flag/short_id/long_token/uuid/hash/json_blob via Shannon-Entropy + Regex-Patterns. Wenn ein als 'essential' deklarierter Cookie einen 64-char-Base64-Wert hat → MEDIUM-Finding 'Defeat-Device-Heuristik'. P104 (Stufe 4) — cookie_network_tracer.py: Vergleicht Cookie-Domain mit Site-Hauptdomain + bekannten Tracker-Vendoren (50 Domains gemapped: doubleclick.net, facebook.com, demdex.net, omtrdc.net, adsrvr.org, hotjar.com, ...). Wenn ein als 'essential' deklariertes Cookie von externer Tracker-Domain gesetzt wird → HIGH. Drittland-Cookies werden als 'DRITTLAND US/CN/...' markiert (Schrems-II-Folge). P105 (Stufe 5) — tcf_vendor_authority.py: Ingest-Endpoint POST /api/compliance/agent/admin/tcf-ingest holt die IAB TCF v2 Global Vendor List (vendor-list.consensu.org/v3) und upserted sie in cookie_library mit source='iab_tcf_v2'. cross_reference_with_tcf fuzzy-matched cmp_vendors gegen die TCF-Liste — wenn Vendor in TCF als Marketing gefuehrt aber Site sagt 'Funktional' → HIGH (externe Authority widerspricht der Deklaration). Alle drei rendern eigene Mail-Bloecke im Bereich Cookies (nach cookie_audit_html, vor library_mismatch_html). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
149 lines
5.5 KiB
Python
149 lines
5.5 KiB
Python
"""
|
|
P103 — Cookie-Value-Entropy-Check (Stufe 3).
|
|
|
|
Bewertet ob der Cookie-Wert zur deklarierten Kategorie passt:
|
|
* "Funktional" + 2-char-Wert ('1', 'de') → konsistent (Flag)
|
|
* "Funktional" + 64-char-Base64 → INKONSISTENT (Tracking-ID-Pattern)
|
|
* "Marketing" + 32+ char Hash → konsistent
|
|
* "Marketing" + 2-char-Wert → konsistent (Boolean-Opt-Out)
|
|
|
|
Defeat-Device-Pattern: Site deklariert "Funktional" um Consent zu
|
|
umgehen, aber Wert sieht wie pseudonymisierte Tracking-ID aus.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import math
|
|
import re
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _shannon_entropy(s: str) -> float:
|
|
if not s:
|
|
return 0.0
|
|
from collections import Counter
|
|
n = len(s)
|
|
counts = Counter(s)
|
|
return -sum((c / n) * math.log2(c / n) for c in counts.values())
|
|
|
|
|
|
_BASE64_RE = re.compile(r"^[A-Za-z0-9+/=_-]{20,}$")
|
|
_HEX_RE = re.compile(r"^[a-fA-F0-9]{16,}$")
|
|
_UUID_RE = re.compile(
|
|
r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-"
|
|
r"[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$"
|
|
)
|
|
_FLAG_VALUES = {"0", "1", "true", "false", "yes", "no",
|
|
"de", "en", "de-de", "en-us", "fr-fr",
|
|
"accept", "deny", "essential", "on", "off"}
|
|
|
|
|
|
def _classify_value_shape(value: str) -> str:
|
|
"""Returns one of: 'flag', 'short_id', 'long_token', 'uuid', 'hash',
|
|
'json_blob', 'unknown'."""
|
|
if not value:
|
|
return "flag"
|
|
v = value.strip()
|
|
if v.lower() in _FLAG_VALUES:
|
|
return "flag"
|
|
if len(v) <= 4:
|
|
return "flag"
|
|
if _UUID_RE.match(v):
|
|
return "uuid"
|
|
if _HEX_RE.match(v) and len(v) >= 32:
|
|
return "hash"
|
|
if _BASE64_RE.match(v) and len(v) >= 40:
|
|
return "long_token"
|
|
if v.startswith("{") or v.startswith("["):
|
|
return "json_blob"
|
|
if len(v) >= 16 and _shannon_entropy(v) > 3.5:
|
|
return "long_token"
|
|
if len(v) >= 6:
|
|
return "short_id"
|
|
return "flag"
|
|
|
|
|
|
def check_cookies_for_entropy_mismatch(
|
|
cookies_detailed: list[dict] | None,
|
|
) -> list[dict]:
|
|
"""Liefert Findings fuer Cookies deren Wert-Shape nicht zur
|
|
deklarierten Kategorie passt."""
|
|
out: list[dict] = []
|
|
if not cookies_detailed:
|
|
return out
|
|
for ck in cookies_detailed:
|
|
if not isinstance(ck, dict):
|
|
continue
|
|
name = (ck.get("name") or "").strip()
|
|
value = (ck.get("value") or "").strip()
|
|
declared = (ck.get("declared_category") or "").lower().strip()
|
|
if not name or not declared:
|
|
continue
|
|
shape = _classify_value_shape(value)
|
|
|
|
# Regel: 'essential' / 'functional' Cookies mit hoher
|
|
# Tracking-ID-Komplexitaet sind verdaechtig.
|
|
is_low_cat = declared in ("essential", "functional", "necessary")
|
|
is_id_shape = shape in ("uuid", "hash", "long_token")
|
|
if is_low_cat and is_id_shape:
|
|
out.append({
|
|
"cookie": name,
|
|
"declared": declared,
|
|
"value_shape": shape,
|
|
"value_len": len(value),
|
|
"severity": "MEDIUM",
|
|
"label": (
|
|
f"Cookie '{name}' deklariert als '{declared}', "
|
|
f"aber Wert ist ein {shape} ({len(value)} Zeichen) — "
|
|
"typisches Tracking-ID-Pattern"
|
|
),
|
|
"detail": (
|
|
"Funktionale/notwendige Cookies speichern normalerweise "
|
|
"kurze Flags (1, true, de-DE). Ein langer Hash/UUID-Wert "
|
|
"in einem als 'essential' deklarierten Cookie ist ein "
|
|
"Indikator fuer verstecktes Tracking — vergleichbar mit "
|
|
"einem 'Defeat Device', das auf dem Pruefstand harmlos "
|
|
"aussieht aber im Realbetrieb anderes tut."
|
|
),
|
|
})
|
|
return out
|
|
|
|
|
|
def build_entropy_block_html(findings: list[dict]) -> str:
|
|
if not findings:
|
|
return ""
|
|
items: list[str] = []
|
|
for f in findings[:25]:
|
|
items.append(
|
|
f'<li style="margin-bottom:6px;font-size:11px;line-height:1.5">'
|
|
f'<strong style="color:#d97706">{f["cookie"]}</strong> '
|
|
f'<span style="color:#64748b">(deklariert: '
|
|
f'<strong>{f["declared"]}</strong>) — Wert-Shape:</span> '
|
|
f'<code style="background:#fef3c7;padding:1px 4px;border-radius:2px">'
|
|
f'{f["value_shape"]}</code> '
|
|
f'<span style="color:#64748b">({f["value_len"]} Zeichen)</span>'
|
|
f'</li>'
|
|
)
|
|
return (
|
|
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
|
'max-width:760px;margin:0 auto 16px;padding:14px 18px;'
|
|
'background:#fffbeb;border:1px solid #fde68a;border-radius:8px">'
|
|
'<div style="font-size:11px;color:#92400e;text-transform:uppercase;'
|
|
'letter-spacing:1.2px;margin-bottom:4px;font-weight:600">'
|
|
'Cookie-Werte-Plausibilitaet (Defeat-Device-Heuristik)</div>'
|
|
f'<h3 style="margin:0 0 6px;font-size:14px;color:#1e293b">'
|
|
f'{len(findings)} Cookie{"s" if len(findings) != 1 else ""} '
|
|
'mit verdaechtigem Wert-Pattern</h3>'
|
|
'<p style="margin:0 0 10px;font-size:11px;color:#475569;line-height:1.5">'
|
|
'Diese Cookies sind als "essential" oder "funktional" deklariert, '
|
|
'ihr tatsaechlicher Wert sieht aber wie eine Tracking-ID aus '
|
|
'(UUID, Hash, langer Base64-Token). Empfehlung: pruefen ob diese '
|
|
'Cookies wirklich nur technisch notwendig sind oder de facto '
|
|
'pseudonymisierte User-Tracker.</p>'
|
|
'<ul style="margin:0 0 0 18px;padding:0">'
|
|
+ "".join(items) +
|
|
'</ul></div>'
|
|
)
|