""" Banner Detector — identifies Consent Management Platforms and their buttons. Supports 30 CMPs with specific selectors + generic fallback + Shadow DOM. """ from dataclasses import dataclass from playwright.async_api import Page @dataclass class BannerInfo: detected: bool provider: str accept_selector: str reject_selector: str # CMP-specific selectors (ordered by market share) CMP_SELECTORS = [ { "name": "Didomi", "detect": "#didomi-host, [class*='didomi']", "accept": "#didomi-notice-agree-button", "reject": "#didomi-notice-disagree-button, .didomi-components-button--secondary", }, { "name": "OneTrust", "detect": "#onetrust-banner-sdk, [class*='onetrust']", "accept": "#onetrust-accept-btn-handler", "reject": "#onetrust-reject-all-handler, .onetrust-close-btn-handler", }, { "name": "Cookiebot", "detect": "#CybotCookiebotDialog, [class*='CybotCookiebot']", "accept": "#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll", "reject": "#CybotCookiebotDialogBodyButtonDecline", }, { "name": "Usercentrics", "detect": "#usercentrics-root, [data-testid='uc-banner']", "accept": "[data-testid='uc-accept-all-button']", "reject": "[data-testid='uc-deny-all-button']", }, { "name": "CookieYes", "detect": ".cky-consent-container, [class*='cky-']", "accept": ".cky-btn-accept", "reject": ".cky-btn-reject, .cky-btn-customize", }, { "name": "Quantcast", "detect": ".qc-cmp2-container, [class*='qc-cmp']", "accept": "[class*='qc-cmp2-summary-buttons'] button:first-child", "reject": "[class*='qc-cmp2-summary-buttons'] button:last-child", }, { "name": "Borlabs", "detect": "#BorlabsCookieBox, [class*='BorlabsCookie']", "accept": "#BorlabsCookieBox .cookie-accept, [data-cookie-accept]", "reject": "#BorlabsCookieBox .cookie-refuse, [data-cookie-refuse]", }, { "name": "Consentmanager", "detect": "#cmpbox, [class*='cmpbox']", "accept": ".cmpboxbtn.cmpboxbtnyes", "reject": ".cmpboxbtn.cmpboxbtnno", }, { "name": "Klaro", "detect": ".klaro, [class*='klaro']", "accept": ".klaro .cm-btn-accept", "reject": ".klaro .cm-btn-decline", }, { "name": "TarteAuCitron", "detect": "#tarteaucitronRoot, [class*='tarteaucitron']", "accept": "#tarteaucitronPersonalize2", "reject": "#tarteaucitronAllDenied2", }, # --- 20 additional CMPs --- { "name": "Sourcepoint", "detect": "div[id^='sp_message']", "accept": ".sp_choice_type_11", "reject": ".sp_choice_type_13", }, { "name": "Axeptio", "detect": "#axeptio_widget", "accept": "[data-ax='accept']", "reject": "[data-ax='decline']", }, { "name": "Iubenda", "detect": "#iubenda-cs-banner", "accept": ".iubenda-cs-accept-btn", "reject": ".iubenda-cs-reject-btn", }, { "name": "Termly", "detect": "#termly-code-snippet-support", "accept": "[data-tid='banner-accept']", "reject": "[data-tid='banner-decline']", }, { "name": "CookieFirst", "detect": "#cookiefirst-root", "accept": "[data-cookiefirst-action='accept']", "reject": "[data-cookiefirst-action='reject']", }, { "name": "Complianz", "detect": "#cmplz-cookiebanner-container", "accept": ".cmplz-accept", "reject": ".cmplz-deny", }, { "name": "CookieScript", "detect": "#cookiescript_injected", "accept": "#cookiescript_accept", "reject": "#cookiescript_reject", }, { "name": "HubSpot", "detect": "#hs-eu-cookie-confirmation", "accept": "#hs-eu-confirmation-button", "reject": "#hs-eu-decline-button", }, { "name": "Civic UK", "detect": "#ccc, .ccc-content", "accept": "#ccc-recommended-settings", "reject": "#ccc-reject-settings", }, { "name": "GDPR Cookie Compliance", "detect": "#moove_gdpr_cookie_modal", "accept": ".moove-gdpr-modal-allow-all", "reject": ".moove-gdpr-modal-save-settings", }, { "name": "CookieHub", "detect": "#ch2-container", "accept": "#ch2-btn-accept", "reject": "#ch2-btn-decline", }, { "name": "Osano", "detect": ".osano-cm-dialog", "accept": ".osano-cm-accept-all", "reject": ".osano-cm-deny", }, { "name": "Ketch", "detect": "#ketch-consent", "accept": "[data-testid='accept-button']", "reject": "[data-testid='decline-button']", }, { "name": "Piwik PRO", "detect": "#ppms_cm_popup_overlay", "accept": "#ppms_cm_agree-to-all", "reject": "#ppms_cm_reject-all", }, { "name": "Cookie Consent (Insites)", "detect": ".cc-window", "accept": ".cc-btn.cc-allow", "reject": ".cc-btn.cc-deny", }, { "name": "Admiral", "detect": "[id^='admiral-']", "accept": "[class*='admiral-accept']", "reject": "[class*='admiral-reject']", }, { "name": "Sibbo", "detect": "#sibbo-cmp-layout", "accept": "#sibbo-cmp-accept-all", "reject": "#sibbo-cmp-reject-all", }, { "name": "Evidon", "detect": "#_evidon_banner", "accept": "#_evidon-accept-button", "reject": "#_evidon-decline-button", }, { "name": "LiveRamp", "detect": "#_lr-cookie-consent", "accept": "#_lr-accept-all", "reject": "#_lr-reject-all", }, { "name": "Adsimple", "detect": "#adconsent-usp-banner", "accept": ".adconsent-accept-all", "reject": ".adconsent-reject-all", }, ] # Generic fallback patterns (text-based) GENERIC_ACCEPT_TEXTS = [ "Alle akzeptieren", "Alles akzeptieren", "Alle Cookies akzeptieren", "Accept all", "Accept All Cookies", "Akzeptieren", "Zustimmen", "Einverstanden", "Ich stimme zu", "Ja, einverstanden", ] GENERIC_REJECT_TEXTS = [ "Nur notwendige", "Nur essentielle", "Ablehnen", "Alle ablehnen", "Reject", "Reject all", "Nur erforderliche", "Nur technisch notwendige", "Decline", "Nein", "Nicht einverstanden", ] # Attribute-based generic selectors for consent buttons _GENERIC_ATTR_ACCEPT = [ "[data-consent='accept']", "[data-cookie='accept']", "[data-gdpr='accept']", "[data-consent-accept]", "[data-cookie-accept]", ] _GENERIC_ATTR_REJECT = [ "[data-consent='reject']", "[data-cookie='reject']", "[data-gdpr='reject']", "[data-consent-reject]", "[data-cookie-reject]", ] # Dialog / aria selectors to find consent containers _DIALOG_SELECTORS = [ "[role='dialog']", "[aria-label*='cookie' i]", "[aria-label*='consent' i]", "[aria-label*='datenschutz' i]", "[aria-label*='Cookie' i]", ] # JavaScript for recursive Shadow DOM search _SHADOW_DETECT_JS = """ () => { const KEYWORDS = /cookie|consent|datenschutz|privacy/i; const results = []; function walk(root) { for (const el of root.querySelectorAll('*')) { if (el.shadowRoot) { const shadow = el.shadowRoot; const text = shadow.innerHTML || ''; if (KEYWORDS.test(text)) { const buttons = []; for (const btn of shadow.querySelectorAll( 'button, a[role="button"], [role="button"]' )) { const t = (btn.textContent || '').trim(); if (t.length > 0 && t.length < 80) { buttons.push(t); } } if (buttons.length > 0) { const tag = el.tagName.toLowerCase(); const id = el.id ? '#' + el.id : ''; results.push({ host: tag + id, buttons: buttons, preview: text.substring(0, 200) }); } } walk(shadow); } } } walk(document); return results.length > 0 ? results[0] : null; } """ _SHADOW_CLICK_JS = """ (textPattern) => { const regex = new RegExp(textPattern, 'i'); function walk(root) { for (const el of root.querySelectorAll('*')) { if (el.shadowRoot) { const btns = el.shadowRoot.querySelectorAll( 'button, a[role="button"], [role="button"]' ); for (const btn of btns) { if (regex.test(btn.textContent || '')) { btn.click(); return true; } } const found = walk(el.shadowRoot); if (found) return true; } } return false; } return walk(document); } """ async def _detect_in_shadow_dom(page: Page) -> BannerInfo | None: """Search Shadow DOM roots for consent banners as last-resort fallback.""" try: result = await page.evaluate(_SHADOW_DETECT_JS) if not result: return None buttons = result.get("buttons", []) host = result.get("host", "") accept_pat = "" reject_pat = "" accept_kw = ("accept", "akzeptieren", "zustimmen", "agree", "allow", "einverstanden", "alle") reject_kw = ("reject", "ablehnen", "deny", "decline", "refuse", "notwendig", "necessary", "essential") for text in buttons: low = text.lower() if not accept_pat and any(k in low for k in accept_kw): accept_pat = text elif not reject_pat and any(k in low for k in reject_kw): reject_pat = text if not accept_pat and not reject_pat: return None return BannerInfo( detected=True, provider=f"ShadowDOM({host})", accept_selector=f"shadow-click:{accept_pat}" if accept_pat else "", reject_selector=f"shadow-click:{reject_pat}" if reject_pat else "", ) except Exception: return None async def _click_in_shadow_dom(page: Page, text_pattern: str) -> bool: """Click a button inside a Shadow DOM root matching the text pattern.""" try: return await page.evaluate(_SHADOW_CLICK_JS, text_pattern) except Exception: return False async def _detect_generic_dialog(page: Page) -> BannerInfo | None: """Detect consent banners in dialog/aria containers.""" consent_kw = ("cookie", "consent", "datenschutz", "privacy") for sel in _DIALOG_SELECTORS: try: containers = page.locator(sel) count = await containers.count() if count == 0: continue container = containers.first text = (await container.inner_text(timeout=2000)).lower() if not any(kw in text for kw in consent_kw): continue # Found a consent dialog — look for accept/reject buttons accept = "" reject = "" for asel in _GENERIC_ATTR_ACCEPT: if await container.locator(asel).count() > 0: accept = f"{sel} {asel}" break for rsel in _GENERIC_ATTR_REJECT: if await container.locator(rsel).count() > 0: reject = f"{sel} {rsel}" break if not accept: for t in GENERIC_ACCEPT_TEXTS: if await container.get_by_text(t, exact=False).count() > 0: accept = f'{sel} button:has-text("{t}")' break if not reject: for t in GENERIC_REJECT_TEXTS: if await container.get_by_text(t, exact=False).count() > 0: reject = f'{sel} button:has-text("{t}")' break if accept or reject: return BannerInfo( detected=True, provider="Generic (dialog)", accept_selector=accept, reject_selector=reject, ) except Exception: continue return None async def _detect_generic_attr(page: Page) -> BannerInfo | None: """Detect consent buttons by data-consent/data-cookie/data-gdpr attributes.""" accept = "" reject = "" for sel in _GENERIC_ATTR_ACCEPT: try: if await page.locator(sel).count() > 0: accept = sel break except Exception: continue for sel in _GENERIC_ATTR_REJECT: try: if await page.locator(sel).count() > 0: reject = sel break except Exception: continue if accept or reject: return BannerInfo( detected=True, provider="Generic (attr)", accept_selector=accept, reject_selector=reject, ) return None async def detect_banner(page: Page) -> BannerInfo: """Detect which CMP is used and return button selectors.""" # 1. Try CMP-specific selectors for cmp in CMP_SELECTORS: try: if await page.locator(cmp["detect"]).count() > 0: return BannerInfo( detected=True, provider=cmp["name"], accept_selector=cmp["accept"], reject_selector=cmp["reject"], ) except Exception: continue # 2. Generic fallback — search buttons by text for text in GENERIC_ACCEPT_TEXTS: try: btn = page.get_by_text(text, exact=False) if await btn.count() > 0: accept = f'button:has-text("{text}")' reject = "" for rtext in GENERIC_REJECT_TEXTS: if await page.get_by_text(rtext, exact=False).count() > 0: reject = f'button:has-text("{rtext}")' break return BannerInfo( detected=True, provider="Generic", accept_selector=accept, reject_selector=reject, ) except Exception: continue # 3. Generic fallback — dialog/aria containers with consent keywords dialog_result = await _detect_generic_dialog(page) if dialog_result: return dialog_result # 4. Generic fallback — data-consent/data-cookie/data-gdpr attributes attr_result = await _detect_generic_attr(page) if attr_result: return attr_result # 5. Shadow DOM fallback — search inside shadow roots shadow_result = await _detect_in_shadow_dom(page) if shadow_result: return shadow_result return BannerInfo(detected=False, provider="", accept_selector="", reject_selector="") async def click_button(page: Page, selector: str, timeout: int = 5000) -> bool: """Try to click a consent button. Returns True if clicked successfully.""" if not selector: return False # Handle Shadow DOM selectors if selector.startswith("shadow-click:"): text_pattern = selector[len("shadow-click:"):] return await _click_in_shadow_dom(page, text_pattern) try: locator = page.locator(selector).first await locator.wait_for(state="visible", timeout=timeout) await locator.click() return True except Exception: # Fallback: try Shadow DOM click with selector text # Extract button text from selector like 'button:has-text("Accept all")' if ':has-text("' in selector: text = selector.split(':has-text("')[1].rstrip('")') return await _click_in_shadow_dom(page, text) return False