f3e44cf59f
banner_detector.py, script_analyzer.py, category_tester.py, authenticated_scanner.py were only on the feature branch — needed for consent-tester to start. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
150 lines
4.9 KiB
Python
150 lines
4.9 KiB
Python
"""
|
|
Banner Detector — identifies Consent Management Platforms and their buttons.
|
|
|
|
Supports 10+ CMPs with specific selectors + generic fallback.
|
|
"""
|
|
|
|
from dataclasses import dataclass
|
|
|
|
from playwright.async_api import Page, Locator
|
|
|
|
|
|
@dataclass
|
|
class BannerInfo:
|
|
detected: bool
|
|
provider: str
|
|
accept_selector: str
|
|
reject_selector: str
|
|
|
|
|
|
# CMP-specific selectors (ordered by market share)
|
|
CMP_SELECTORS = [
|
|
{
|
|
"name": "Didomi",
|
|
"detect": "#didomi-host, [class*='didomi']",
|
|
"accept": "#didomi-notice-agree-button",
|
|
"reject": "#didomi-notice-disagree-button, .didomi-components-button--secondary",
|
|
},
|
|
{
|
|
"name": "OneTrust",
|
|
"detect": "#onetrust-banner-sdk, [class*='onetrust']",
|
|
"accept": "#onetrust-accept-btn-handler",
|
|
"reject": "#onetrust-reject-all-handler, .onetrust-close-btn-handler",
|
|
},
|
|
{
|
|
"name": "Cookiebot",
|
|
"detect": "#CybotCookiebotDialog, [class*='CybotCookiebot']",
|
|
"accept": "#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll",
|
|
"reject": "#CybotCookiebotDialogBodyButtonDecline",
|
|
},
|
|
{
|
|
"name": "Usercentrics",
|
|
"detect": "#usercentrics-root, [data-testid='uc-banner']",
|
|
"accept": "[data-testid='uc-accept-all-button']",
|
|
"reject": "[data-testid='uc-deny-all-button']",
|
|
},
|
|
{
|
|
"name": "CookieYes",
|
|
"detect": ".cky-consent-container, [class*='cky-']",
|
|
"accept": ".cky-btn-accept",
|
|
"reject": ".cky-btn-reject, .cky-btn-customize",
|
|
},
|
|
{
|
|
"name": "Quantcast",
|
|
"detect": ".qc-cmp2-container, [class*='qc-cmp']",
|
|
"accept": "[class*='qc-cmp2-summary-buttons'] button:first-child",
|
|
"reject": "[class*='qc-cmp2-summary-buttons'] button:last-child",
|
|
},
|
|
{
|
|
"name": "Borlabs",
|
|
"detect": "#BorlabsCookieBox, [class*='BorlabsCookie']",
|
|
"accept": "#BorlabsCookieBox .cookie-accept, [data-cookie-accept]",
|
|
"reject": "#BorlabsCookieBox .cookie-refuse, [data-cookie-refuse]",
|
|
},
|
|
{
|
|
"name": "Consentmanager",
|
|
"detect": "#cmpbox, [class*='cmpbox']",
|
|
"accept": ".cmpboxbtn.cmpboxbtnyes",
|
|
"reject": ".cmpboxbtn.cmpboxbtnno",
|
|
},
|
|
{
|
|
"name": "Klaro",
|
|
"detect": ".klaro, [class*='klaro']",
|
|
"accept": ".klaro .cm-btn-accept",
|
|
"reject": ".klaro .cm-btn-decline",
|
|
},
|
|
{
|
|
"name": "TarteAuCitron",
|
|
"detect": "#tarteaucitronRoot, [class*='tarteaucitron']",
|
|
"accept": "#tarteaucitronPersonalize2",
|
|
"reject": "#tarteaucitronAllDenied2",
|
|
},
|
|
]
|
|
|
|
# Generic fallback patterns (text-based)
|
|
GENERIC_ACCEPT_TEXTS = [
|
|
"Alle akzeptieren", "Alles akzeptieren", "Alle Cookies akzeptieren",
|
|
"Accept all", "Accept All Cookies", "Akzeptieren", "Zustimmen",
|
|
"Einverstanden", "Ich stimme zu", "Ja, einverstanden",
|
|
]
|
|
|
|
GENERIC_REJECT_TEXTS = [
|
|
"Nur notwendige", "Nur essentielle", "Ablehnen", "Alle ablehnen",
|
|
"Reject", "Reject all", "Nur erforderliche", "Nur technisch notwendige",
|
|
"Decline", "Nein", "Nicht einverstanden",
|
|
]
|
|
|
|
|
|
async def detect_banner(page: Page) -> BannerInfo:
|
|
"""Detect which CMP is used and return button selectors."""
|
|
# Try CMP-specific selectors first
|
|
for cmp in CMP_SELECTORS:
|
|
try:
|
|
count = await page.locator(cmp["detect"]).count()
|
|
if count > 0:
|
|
return BannerInfo(
|
|
detected=True,
|
|
provider=cmp["name"],
|
|
accept_selector=cmp["accept"],
|
|
reject_selector=cmp["reject"],
|
|
)
|
|
except Exception:
|
|
continue
|
|
|
|
# Generic fallback — search for buttons by text
|
|
for text in GENERIC_ACCEPT_TEXTS:
|
|
try:
|
|
btn = page.get_by_text(text, exact=False)
|
|
if await btn.count() > 0:
|
|
accept = f'button:has-text("{text}")'
|
|
# Try to find reject button nearby
|
|
reject = ""
|
|
for rtext in GENERIC_REJECT_TEXTS:
|
|
rbtn = page.get_by_text(rtext, exact=False)
|
|
if await rbtn.count() > 0:
|
|
reject = f'button:has-text("{rtext}")'
|
|
break
|
|
return BannerInfo(
|
|
detected=True,
|
|
provider="Generic",
|
|
accept_selector=accept,
|
|
reject_selector=reject,
|
|
)
|
|
except Exception:
|
|
continue
|
|
|
|
return BannerInfo(detected=False, provider="", accept_selector="", reject_selector="")
|
|
|
|
|
|
async def click_button(page: Page, selector: str, timeout: int = 5000) -> bool:
|
|
"""Try to click a consent button. Returns True if clicked successfully."""
|
|
if not selector:
|
|
return False
|
|
try:
|
|
locator = page.locator(selector).first
|
|
await locator.wait_for(state="visible", timeout=timeout)
|
|
await locator.click()
|
|
return True
|
|
except Exception:
|
|
return False
|