c867478791
CI / loc-budget (push) Failing after 16s
Build + Deploy / build-admin-compliance (push) Successful in 14s
Build + Deploy / build-backend-compliance (push) Successful in 16s
Build + Deploy / build-ai-sdk (push) Successful in 20s
Build + Deploy / build-developer-portal (push) Successful in 12s
Build + Deploy / build-tts (push) Successful in 15s
Build + Deploy / build-document-crawler (push) Successful in 13s
Build + Deploy / build-dsms-gateway (push) Successful in 13s
Build + Deploy / build-dsms-node (push) Successful in 12s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / test-python-document-crawler (push) Successful in 26s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m49s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Successful in 45s
CI / test-python-backend (push) Successful in 38s
CI / test-python-dsms-gateway (push) Successful in 23s
CI / validate-canonical-controls (push) Successful in 15s
Build + Deploy / trigger-orca (push) Successful in 2m23s
Phase 1-2 of the closed quality loop: - GVL cache (consent-tester/services/gvl_cache.py): downloads and caches IAB Global Vendor List with 24h TTL, resolves vendor IDs to names, purposes, policy URLs, retention, country - Vendor extraction (consent_interceptor.py): extract_tcf_vendors() reads __tcfapi after accept phase, resolves via GVL - Scan response: tcf_vendors field added to /scan endpoint - VVT mapper (vendor_vvt_mapper.py): maps TCF vendors to VVT format with purpose labels, Rechtsgrundlage, Drittland detection - Vendor cross-check (banner_cookie_cross_check.py): checks all TCF vendors against DSI text — missing vendors, undocumented transfers - Compliance check integrates Step 3d: TCF vendors vs DSI Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
226 lines
8.7 KiB
Python
226 lines
8.7 KiB
Python
"""
|
|
Banner ↔ Cookie-Richtlinie Cross-Check.
|
|
|
|
Compares what the cookie banner actually does (Playwright scan)
|
|
with what the cookie policy text promises. Finds mismatches:
|
|
- Services used but not documented
|
|
- Tracking before consent despite claiming consent-only
|
|
- False "no tracking" claims contradicted by actual cookies
|
|
"""
|
|
|
|
import logging
|
|
import re
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def cross_check_banner_vs_cookie(
|
|
banner_result: dict,
|
|
cookie_text: str,
|
|
) -> list[dict]:
|
|
"""Cross-check: Does the banner behavior match the cookie policy text?
|
|
|
|
Returns list of CheckItem-compatible dicts for mismatches.
|
|
"""
|
|
findings: list[dict] = []
|
|
cookie_lower = cookie_text.lower()
|
|
|
|
phases = banner_result.get("phases", {})
|
|
before = phases.get("before_consent", {})
|
|
after_accept = phases.get("after_accept", {})
|
|
|
|
# 1. Services found by banner but not mentioned in cookie policy
|
|
tracking_before = before.get("tracking_services", [])
|
|
all_tracking = set(tracking_before)
|
|
for s in after_accept.get("tracking_services", []):
|
|
if isinstance(s, str):
|
|
all_tracking.add(s)
|
|
|
|
for service in all_tracking:
|
|
service_lower = service.lower()
|
|
mentioned = any(kw in cookie_lower for kw in [
|
|
service_lower,
|
|
service_lower.replace(" ", ""),
|
|
service_lower.split()[0] if " " in service_lower else service_lower,
|
|
])
|
|
if not mentioned:
|
|
findings.append({
|
|
"id": f"cross-{service_lower.replace(' ', '_')[:20]}",
|
|
"label": f"Dienst '{service}' fehlt in Cookie-Richtlinie",
|
|
"passed": False,
|
|
"severity": "HIGH",
|
|
"level": 2,
|
|
"parent": None,
|
|
"skipped": False,
|
|
"matched_text": "",
|
|
"hint": (
|
|
f"Der Banner-Scan hat '{service}' auf der Website erkannt, "
|
|
f"aber die Cookie-Richtlinie erwaehnt diesen Dienst nicht. "
|
|
f"Gemaess Art. 13(1)(e) DSGVO muessen alle Empfaenger/"
|
|
f"Dienste in der DSI benannt werden."
|
|
),
|
|
"source": "cross_check",
|
|
})
|
|
|
|
# 2. Tracking before consent but cookie policy says "nur mit Einwilligung"
|
|
if tracking_before:
|
|
consent_words = re.findall(
|
|
r"(?:einwilligung|consent|zustimmung).*?"
|
|
r"(?:erforderlich|notwendig|vorher|vorab)",
|
|
cookie_lower,
|
|
)
|
|
if consent_words:
|
|
findings.append({
|
|
"id": "cross-tracking-before-consent",
|
|
"label": "Tracking vor Consent trotz Einwilligungspflicht",
|
|
"passed": False,
|
|
"severity": "CRITICAL",
|
|
"level": 2,
|
|
"parent": None,
|
|
"skipped": False,
|
|
"matched_text": ", ".join(tracking_before[:3]),
|
|
"hint": (
|
|
f"Die Cookie-Richtlinie spricht von Einwilligungspflicht, "
|
|
f"aber der Banner-Scan hat {len(tracking_before)} Tracking-"
|
|
f"Dienst(e) VOR Consent erkannt: {', '.join(tracking_before[:3])}. "
|
|
f"Dies ist ein Verstoss gegen §25 Abs. 1 TDDDG."
|
|
),
|
|
"source": "cross_check",
|
|
})
|
|
|
|
# 3. Banner sets tracking cookies but policy claims "no tracking"
|
|
accept_cookies = after_accept.get("cookies", [])
|
|
tracking_prefixes = ("_ga", "_gid", "_fbp", "_fbc", "IDE", "_gcl",
|
|
"fr", "_pin", "_tt_", "li_sugr", "_hj")
|
|
tracking_cookies = [c for c in accept_cookies
|
|
if any(c.startswith(p) for p in tracking_prefixes)]
|
|
if tracking_cookies:
|
|
no_tracking_claim = any(kw in cookie_lower for kw in [
|
|
"keine tracking", "kein tracking", "keine marketing",
|
|
"keine werbe", "no tracking",
|
|
])
|
|
if no_tracking_claim:
|
|
findings.append({
|
|
"id": "cross-false-no-tracking",
|
|
"label": "Cookie-Richtlinie behauptet 'kein Tracking' — "
|
|
"Banner widerspricht",
|
|
"passed": False,
|
|
"severity": "CRITICAL",
|
|
"level": 2,
|
|
"parent": None,
|
|
"skipped": False,
|
|
"matched_text": ", ".join(tracking_cookies[:5]),
|
|
"hint": (
|
|
f"Die Cookie-Richtlinie behauptet kein Tracking, aber "
|
|
f"nach Akzeptieren wurden Tracking-Cookies gesetzt: "
|
|
f"{', '.join(tracking_cookies[:5])}. "
|
|
f"Transparenzverstoss nach Art. 5(1)(a) DSGVO."
|
|
),
|
|
"source": "cross_check",
|
|
})
|
|
|
|
# 4. Banner detected but no cookie policy provided
|
|
banner_detected = banner_result.get("banner_detected", False)
|
|
if banner_detected and len(cookie_lower.strip()) < 100:
|
|
findings.append({
|
|
"id": "cross-banner-no-policy",
|
|
"label": "Cookie-Banner vorhanden aber keine Cookie-Richtlinie",
|
|
"passed": False,
|
|
"severity": "HIGH",
|
|
"level": 2,
|
|
"parent": None,
|
|
"skipped": False,
|
|
"matched_text": "",
|
|
"hint": (
|
|
"Die Website zeigt ein Cookie-Banner, aber es wurde keine "
|
|
"Cookie-Richtlinie eingereicht. Eine separate Cookie-Richtlinie "
|
|
"oder ein Cookie-Abschnitt in der DSI ist nach §25 TDDDG "
|
|
"erforderlich wenn nicht-essentielle Cookies verwendet werden."
|
|
),
|
|
"source": "cross_check",
|
|
})
|
|
|
|
logger.info("Cross-check: %d findings (%d services, %d tracking before)",
|
|
len(findings), len(all_tracking), len(tracking_before))
|
|
return findings
|
|
|
|
|
|
def cross_check_vendors_vs_dsi(
|
|
vendors: list[dict],
|
|
dsi_text: str,
|
|
) -> list[dict]:
|
|
"""Cross-check: Are all TCF vendors documented in the DSI?
|
|
|
|
Checks per vendor:
|
|
1. Is the vendor mentioned by name?
|
|
2. Is third-country transfer documented (if non-EU)?
|
|
3. Is storage duration mentioned?
|
|
|
|
Returns list of CheckItem-compatible dicts.
|
|
"""
|
|
findings: list[dict] = []
|
|
dsi_lower = dsi_text.lower()
|
|
|
|
for v in vendors:
|
|
name = v.get("name", "")
|
|
name_lower = name.lower()
|
|
if not name_lower:
|
|
continue
|
|
|
|
# Check if vendor is mentioned in DSI
|
|
mentioned = any(kw in dsi_lower for kw in [
|
|
name_lower,
|
|
name_lower.replace(" ", ""),
|
|
name_lower.split()[0] if " " in name_lower else name_lower,
|
|
])
|
|
|
|
if not mentioned:
|
|
findings.append({
|
|
"id": f"vendor-{v.get('vendor_id', name_lower[:20])}",
|
|
"label": f"Verarbeiter '{name}' fehlt in DSI",
|
|
"passed": False,
|
|
"severity": "HIGH",
|
|
"level": 2,
|
|
"parent": None,
|
|
"skipped": False,
|
|
"matched_text": "",
|
|
"hint": (
|
|
f"Der Cookie-Banner listet '{name}' als Verarbeiter "
|
|
f"({v.get('zweck_kurz', 'unbekannt')}), aber die DSI "
|
|
f"erwaehnt diesen Dienst nicht. Art. 13(1)(e) DSGVO "
|
|
f"verlangt die Benennung aller Empfaenger."
|
|
),
|
|
"source": "vendor_cross_check",
|
|
})
|
|
|
|
# Check third-country transfer documentation
|
|
if v.get("drittland") and mentioned:
|
|
country = v.get("land", "Drittland")
|
|
transfer_mentioned = any(kw in dsi_lower for kw in [
|
|
name_lower + ".*" + "usa",
|
|
name_lower + ".*" + "drittland",
|
|
"scc", "standardvertragsklausel", "data privacy framework",
|
|
"angemessenheitsbeschluss",
|
|
])
|
|
if not transfer_mentioned:
|
|
findings.append({
|
|
"id": f"vendor-transfer-{v.get('vendor_id', '')}",
|
|
"label": f"Drittlandtransfer fuer '{name}' nicht dokumentiert",
|
|
"passed": False,
|
|
"severity": "MEDIUM",
|
|
"level": 2,
|
|
"parent": None,
|
|
"skipped": False,
|
|
"matched_text": "",
|
|
"hint": (
|
|
f"'{name}' verarbeitet Daten in {country} (ausserhalb EWR). "
|
|
f"Die DSI muss den Transfermechanismus benennen: "
|
|
f"SCC (Art. 46(2)(c)) oder DPF (Angemessenheitsbeschluss)."
|
|
),
|
|
"source": "vendor_cross_check",
|
|
})
|
|
|
|
logger.info("Vendor cross-check: %d findings for %d vendors",
|
|
len(findings), len(vendors))
|
|
return findings
|