feat: Deep consent verification — DataLayer, Storage, GCM, TCF

5 verification layers added to the 3-phase banner test:

1. DataLayer/GTM Interception: Proxy on window.dataLayer captures
   all push() events. Distinguishes safe lifecycle events (gtm.js,
   gtm.dom) from tracking events (page_view, conversion, purchase).
   Flags tracking events before consent as violations.

2. localStorage/sessionStorage Monitoring: Intercepts setItem() to
   detect tracking keys (_ga, _fbp, amplitude, mixpanel, etc.)
   written before consent.

3. Google Consent Mode v2 Runtime Verification: Reads actual GCM
   state (analytics_storage, ad_storage) per phase. Verifies
   default=denied before consent, stays denied after reject,
   switches to granted after accept.

4. TCF v2.2 State: Reads __tcfapi('getTCData') if available.
   Verifies consent purpose states match user choice.

5. Cookie Attribute Analysis: Domain (1st vs 3rd party), expires
   (>13 months), secure flag for tracking cookies.

10 new L2 checks with expert hints (EDPB, CNIL, §25 TDDDG).
All interceptor calls wrapped in try/except for graceful fallback.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-05-10 08:58:44 +02:00
parent 99ef9873ad
commit d2dc0c9fe4
4 changed files with 499 additions and 0 deletions
@@ -23,6 +23,12 @@ from services.script_analyzer import (
find_violations_before_consent, find_violations_after_reject, Violation,
)
from services.banner_text_checker import check_banner_text as _check_banner_text
from services.consent_interceptor import (
INIT_SCRIPT as _INTERCEPTOR_INIT,
collect_intercepted_data as _collect_intercepted,
get_consent_state as _get_consent_state,
analyze_phase_data as _analyze_phase,
)
logger = logging.getLogger(__name__)
@@ -57,6 +63,8 @@ class ConsentTestResult:
banner_text_violations: list[Violation] = field(default_factory=list)
banner_has_impressum_link: bool = False
banner_has_dse_link: bool = False
# Deep verification (per-phase intercepted data)
deep_verification: dict = field(default_factory=dict)
async def run_consent_test(
@@ -94,6 +102,7 @@ async def run_consent_test(
timezone_id="Europe/Berlin",
)
page_a = await ctx_a.new_page()
await page_a.add_init_script(_INTERCEPTOR_INIT)
if HAS_STEALTH:
await stealth_async(page_a)
scripts_a = []
@@ -102,6 +111,19 @@ async def run_consent_test(
await page_a.goto(url, wait_until="networkidle", timeout=30000)
await page_a.wait_for_timeout(wait_ms)
# Deep verification: Phase A
try:
intercepted_a = await _collect_intercepted(page_a)
consent_state_a = await _get_consent_state(page_a)
deep_violations_a = _analyze_phase("before_consent", intercepted_a, consent_state_a)
result.deep_verification["before_consent"] = {
"intercepted": intercepted_a,
"consent_state": consent_state_a,
"violations": deep_violations_a,
}
except Exception as exc:
logger.warning("Phase A deep verification failed: %s", exc)
result.before_scripts = _get_page_scripts(scripts_a)
result.before_cookies = _get_cookie_names(await ctx_a.cookies())
result.before_tracking = find_tracking_services(result.before_scripts)
@@ -135,6 +157,7 @@ async def run_consent_test(
timezone_id="Europe/Berlin",
)
page_b = await ctx_b.new_page()
await page_b.add_init_script(_INTERCEPTOR_INIT)
if HAS_STEALTH:
await stealth_async(page_b)
scripts_b = []
@@ -150,6 +173,19 @@ async def run_consent_test(
else:
logger.warning("Could not click reject button")
# Deep verification: Phase B
try:
intercepted_b = await _collect_intercepted(page_b)
consent_state_b = await _get_consent_state(page_b)
deep_violations_b = _analyze_phase("after_reject", intercepted_b, consent_state_b)
result.deep_verification["after_reject"] = {
"intercepted": intercepted_b,
"consent_state": consent_state_b,
"violations": deep_violations_b,
}
except Exception as exc:
logger.warning("Phase B deep verification failed: %s", exc)
result.reject_scripts = _get_page_scripts(scripts_b)
result.reject_cookies = _get_cookie_names(await ctx_b.cookies())
reject_tracking = find_tracking_services(result.reject_scripts)
@@ -169,6 +205,7 @@ async def run_consent_test(
timezone_id="Europe/Berlin",
)
page_c = await ctx_c.new_page()
await page_c.add_init_script(_INTERCEPTOR_INIT)
if HAS_STEALTH:
await stealth_async(page_c)
scripts_c = []
@@ -184,6 +221,19 @@ async def run_consent_test(
else:
logger.warning("Could not click accept button")
# Deep verification: Phase C
try:
intercepted_c = await _collect_intercepted(page_c)
consent_state_c = await _get_consent_state(page_c)
deep_violations_c = _analyze_phase("after_accept", intercepted_c, consent_state_c)
result.deep_verification["after_accept"] = {
"intercepted": intercepted_c,
"consent_state": consent_state_c,
"violations": deep_violations_c,
}
except Exception as exc:
logger.warning("Phase C deep verification failed: %s", exc)
result.accept_scripts = _get_page_scripts(scripts_c)
result.accept_cookies = _get_cookie_names(await ctx_c.cookies())
accept_tracking = find_tracking_services(result.accept_scripts)