Files
breakpilot-compliance/consent-tester/services/banner_detector.py
T
Benjamin Admin f3e44cf59f fix: restore all missing consent-tester service modules
banner_detector.py, script_analyzer.py, category_tester.py, authenticated_scanner.py
were only on the feature branch — needed for consent-tester to start.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-05 00:14:26 +02:00

150 lines
4.9 KiB
Python

"""
Banner Detector — identifies Consent Management Platforms and their buttons.
Supports 10+ CMPs with specific selectors + generic fallback.
"""
from dataclasses import dataclass
from playwright.async_api import Page, Locator
@dataclass
class BannerInfo:
detected: bool
provider: str
accept_selector: str
reject_selector: str
# CMP-specific selectors (ordered by market share)
CMP_SELECTORS = [
{
"name": "Didomi",
"detect": "#didomi-host, [class*='didomi']",
"accept": "#didomi-notice-agree-button",
"reject": "#didomi-notice-disagree-button, .didomi-components-button--secondary",
},
{
"name": "OneTrust",
"detect": "#onetrust-banner-sdk, [class*='onetrust']",
"accept": "#onetrust-accept-btn-handler",
"reject": "#onetrust-reject-all-handler, .onetrust-close-btn-handler",
},
{
"name": "Cookiebot",
"detect": "#CybotCookiebotDialog, [class*='CybotCookiebot']",
"accept": "#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll",
"reject": "#CybotCookiebotDialogBodyButtonDecline",
},
{
"name": "Usercentrics",
"detect": "#usercentrics-root, [data-testid='uc-banner']",
"accept": "[data-testid='uc-accept-all-button']",
"reject": "[data-testid='uc-deny-all-button']",
},
{
"name": "CookieYes",
"detect": ".cky-consent-container, [class*='cky-']",
"accept": ".cky-btn-accept",
"reject": ".cky-btn-reject, .cky-btn-customize",
},
{
"name": "Quantcast",
"detect": ".qc-cmp2-container, [class*='qc-cmp']",
"accept": "[class*='qc-cmp2-summary-buttons'] button:first-child",
"reject": "[class*='qc-cmp2-summary-buttons'] button:last-child",
},
{
"name": "Borlabs",
"detect": "#BorlabsCookieBox, [class*='BorlabsCookie']",
"accept": "#BorlabsCookieBox .cookie-accept, [data-cookie-accept]",
"reject": "#BorlabsCookieBox .cookie-refuse, [data-cookie-refuse]",
},
{
"name": "Consentmanager",
"detect": "#cmpbox, [class*='cmpbox']",
"accept": ".cmpboxbtn.cmpboxbtnyes",
"reject": ".cmpboxbtn.cmpboxbtnno",
},
{
"name": "Klaro",
"detect": ".klaro, [class*='klaro']",
"accept": ".klaro .cm-btn-accept",
"reject": ".klaro .cm-btn-decline",
},
{
"name": "TarteAuCitron",
"detect": "#tarteaucitronRoot, [class*='tarteaucitron']",
"accept": "#tarteaucitronPersonalize2",
"reject": "#tarteaucitronAllDenied2",
},
]
# Generic fallback patterns (text-based)
GENERIC_ACCEPT_TEXTS = [
"Alle akzeptieren", "Alles akzeptieren", "Alle Cookies akzeptieren",
"Accept all", "Accept All Cookies", "Akzeptieren", "Zustimmen",
"Einverstanden", "Ich stimme zu", "Ja, einverstanden",
]
GENERIC_REJECT_TEXTS = [
"Nur notwendige", "Nur essentielle", "Ablehnen", "Alle ablehnen",
"Reject", "Reject all", "Nur erforderliche", "Nur technisch notwendige",
"Decline", "Nein", "Nicht einverstanden",
]
async def detect_banner(page: Page) -> BannerInfo:
"""Detect which CMP is used and return button selectors."""
# Try CMP-specific selectors first
for cmp in CMP_SELECTORS:
try:
count = await page.locator(cmp["detect"]).count()
if count > 0:
return BannerInfo(
detected=True,
provider=cmp["name"],
accept_selector=cmp["accept"],
reject_selector=cmp["reject"],
)
except Exception:
continue
# Generic fallback — search for buttons by text
for text in GENERIC_ACCEPT_TEXTS:
try:
btn = page.get_by_text(text, exact=False)
if await btn.count() > 0:
accept = f'button:has-text("{text}")'
# Try to find reject button nearby
reject = ""
for rtext in GENERIC_REJECT_TEXTS:
rbtn = page.get_by_text(rtext, exact=False)
if await rbtn.count() > 0:
reject = f'button:has-text("{rtext}")'
break
return BannerInfo(
detected=True,
provider="Generic",
accept_selector=accept,
reject_selector=reject,
)
except Exception:
continue
return BannerInfo(detected=False, provider="", accept_selector="", reject_selector="")
async def click_button(page: Page, selector: str, timeout: int = 5000) -> bool:
"""Try to click a consent button. Returns True if clicked successfully."""
if not selector:
return False
try:
locator = page.locator(selector).first
await locator.wait_for(state="visible", timeout=timeout)
await locator.click()
return True
except Exception:
return False