feat: 4 banner check upgrades — 30 CMPs, stealth, Shadow DOM, categories
Build + Deploy / build-admin-compliance (push) Successful in 2m17s
Build + Deploy / build-backend-compliance (push) Successful in 3m17s
Build + Deploy / build-ai-sdk (push) Successful in 56s
Build + Deploy / build-developer-portal (push) Successful in 1m37s
Build + Deploy / build-tts (push) Successful in 1m33s
Build + Deploy / build-document-crawler (push) Successful in 42s
Build + Deploy / build-dsms-gateway (push) Successful in 33s
Build + Deploy / build-dsms-node (push) Successful in 16s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 25s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 3m33s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Failing after 1m18s
CI / test-python-backend (push) Successful in 53s
CI / test-python-document-crawler (push) Successful in 36s
CI / test-python-dsms-gateway (push) Successful in 33s
CI / validate-canonical-controls (push) Successful in 24s
Build + Deploy / trigger-orca (push) Successful in 3m19s

1. 30 CMP selectors (was 10): Added Sourcepoint, Iubenda, Complianz,
   CookieFirst, HubSpot, Osano, Piwik PRO, Cookie Consent (Insites),
   Axeptio, Termly, CookieScript, Civic UK, GDPR Cookie Compliance,
   CookieHub, Ketch, Admiral, Sibbo, Evidon, LiveRamp, Adsimple.
   Plus improved generic fallback: role=dialog, aria-label, data-* attrs.

2. Playwright stealth mode: playwright-stealth against bot detection.
   Removes WebDriver flag, simulates plugins, realistic viewport/locale.
   Launch args: --disable-blink-features=AutomationControlled.

3. Shadow DOM: Recursive JS-based search through shadowRoot elements
   for consent banners. Fallback click via page.evaluate() when
   normal Playwright selectors can't penetrate Shadow DOM.

4. Category selection UI: User can choose which cookie categories to
   test (Notwendig, Statistik, Marketing, Funktional, Praeferenzen).
   Pill-style checkboxes in BannerCheckTab, forwarded through API chain.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-05-09 08:42:30 +02:00
parent 0371eecc03
commit 4bfb438c92
7 changed files with 510 additions and 48 deletions
+75 -12
View File
@@ -11,6 +11,12 @@ from dataclasses import dataclass, field
from playwright.async_api import async_playwright, Page, BrowserContext
try:
from playwright_stealth import stealth_async
HAS_STEALTH = True
except ImportError:
HAS_STEALTH = False
from services.banner_detector import detect_banner, click_button, BannerInfo
from services.script_analyzer import (
classify_scripts, find_tracking_services,
@@ -53,22 +59,43 @@ class ConsentTestResult:
banner_has_dse_link: bool = False
async def run_consent_test(url: str, wait_secs: int = 10) -> ConsentTestResult:
"""Run 3-phase consent test on a URL."""
async def run_consent_test(
url: str, wait_secs: int = 10, categories: list[str] | None = None,
) -> ConsentTestResult:
"""Run 3-phase consent test on a URL.
Args:
url: Website URL to test.
wait_secs: Seconds to wait per phase.
categories: Optional list of category names to test (empty = test all).
"""
result = ConsentTestResult()
wait_ms = wait_secs * 1000
filter_cats = categories or []
async with async_playwright() as p:
browser = await p.chromium.launch(
headless=True,
args=["--no-sandbox", "--disable-dev-shm-usage"],
args=[
"--no-sandbox",
"--disable-dev-shm-usage",
"--disable-blink-features=AutomationControlled",
"--window-size=1920,1080",
],
)
try:
# ── Phase A: Before consent ──────────────────────────
logger.info("Phase A: First visit (no interaction)")
ctx_a = await browser.new_context(user_agent=USER_AGENT)
ctx_a = await browser.new_context(
user_agent=USER_AGENT,
viewport={"width": 1920, "height": 1080},
locale="de-DE",
timezone_id="Europe/Berlin",
)
page_a = await ctx_a.new_page()
if HAS_STEALTH:
await stealth_async(page_a)
scripts_a = []
page_a.on("request", lambda req: _collect_script(req, scripts_a))
@@ -101,8 +128,15 @@ async def run_consent_test(url: str, wait_secs: int = 10) -> ConsentTestResult:
# ── Phase B: After rejecting ─────────────────────────
logger.info("Phase B: Reject consent (%s)", banner.provider)
ctx_b = await browser.new_context(user_agent=USER_AGENT)
ctx_b = await browser.new_context(
user_agent=USER_AGENT,
viewport={"width": 1920, "height": 1080},
locale="de-DE",
timezone_id="Europe/Berlin",
)
page_b = await ctx_b.new_page()
if HAS_STEALTH:
await stealth_async(page_b)
scripts_b = []
page_b.on("request", lambda req: _collect_script(req, scripts_b))
@@ -128,8 +162,15 @@ async def run_consent_test(url: str, wait_secs: int = 10) -> ConsentTestResult:
# ── Phase C: After accepting ─────────────────────────
logger.info("Phase C: Accept consent (%s)", banner.provider)
ctx_c = await browser.new_context(user_agent=USER_AGENT)
ctx_c = await browser.new_context(
user_agent=USER_AGENT,
viewport={"width": 1920, "height": 1080},
locale="de-DE",
timezone_id="Europe/Berlin",
)
page_c = await ctx_c.new_page()
if HAS_STEALTH:
await stealth_async(page_c)
scripts_c = []
page_c.on("request", lambda req: _collect_script(req, scripts_c))
@@ -154,18 +195,40 @@ async def run_consent_test(url: str, wait_secs: int = 10) -> ConsentTestResult:
try:
from services.category_tester import detect_categories, test_single_category
ctx_cat = await browser.new_context(user_agent=USER_AGENT)
ctx_cat = await browser.new_context(
user_agent=USER_AGENT,
viewport={"width": 1920, "height": 1080},
locale="de-DE",
timezone_id="Europe/Berlin",
)
page_cat = await ctx_cat.new_page()
if HAS_STEALTH:
await stealth_async(page_cat)
await page_cat.goto(url, wait_until="networkidle", timeout=20000)
await page_cat.wait_for_timeout(2000)
categories = await detect_categories(page_cat, banner)
detected_cats = await detect_categories(page_cat, banner)
await page_cat.close()
if categories:
logger.info("Testing %d categories individually", len(categories))
for cat in categories:
cat_ctx = await browser.new_context(user_agent=USER_AGENT)
# Filter to requested categories if specified
if filter_cats and detected_cats:
detected_cats = [
c for c in detected_cats if c.name in filter_cats
]
logger.info(
"Filtered to %d categories (requested: %s)",
len(detected_cats), filter_cats,
)
if detected_cats:
logger.info("Testing %d categories individually", len(detected_cats))
for cat in detected_cats:
cat_ctx = await browser.new_context(
user_agent=USER_AGENT,
viewport={"width": 1920, "height": 1080},
locale="de-DE",
timezone_id="Europe/Berlin",
)
cat_result = await test_single_category(cat_ctx, url, cat, banner, wait_ms)
result.category_tests.append(cat_result)
await cat_ctx.close()