4bfb438c92
Build + Deploy / build-admin-compliance (push) Successful in 2m17s
Build + Deploy / build-backend-compliance (push) Successful in 3m17s
Build + Deploy / build-ai-sdk (push) Successful in 56s
Build + Deploy / build-developer-portal (push) Successful in 1m37s
Build + Deploy / build-tts (push) Successful in 1m33s
Build + Deploy / build-document-crawler (push) Successful in 42s
Build + Deploy / build-dsms-gateway (push) Successful in 33s
Build + Deploy / build-dsms-node (push) Successful in 16s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 25s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 3m33s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Failing after 1m18s
CI / test-python-backend (push) Successful in 53s
CI / test-python-document-crawler (push) Successful in 36s
CI / test-python-dsms-gateway (push) Successful in 33s
CI / validate-canonical-controls (push) Successful in 24s
Build + Deploy / trigger-orca (push) Successful in 3m19s
1. 30 CMP selectors (was 10): Added Sourcepoint, Iubenda, Complianz, CookieFirst, HubSpot, Osano, Piwik PRO, Cookie Consent (Insites), Axeptio, Termly, CookieScript, Civic UK, GDPR Cookie Compliance, CookieHub, Ketch, Admiral, Sibbo, Evidon, LiveRamp, Adsimple. Plus improved generic fallback: role=dialog, aria-label, data-* attrs. 2. Playwright stealth mode: playwright-stealth against bot detection. Removes WebDriver flag, simulates plugins, realistic viewport/locale. Launch args: --disable-blink-features=AutomationControlled. 3. Shadow DOM: Recursive JS-based search through shadowRoot elements for consent banners. Fallback click via page.evaluate() when normal Playwright selectors can't penetrate Shadow DOM. 4. Category selection UI: User can choose which cookie categories to test (Notwendig, Statistik, Marketing, Funktional, Praeferenzen). Pill-style checkboxes in BannerCheckTab, forwarded through API chain. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
482 lines
16 KiB
Python
482 lines
16 KiB
Python
"""
|
|
Banner Detector — identifies Consent Management Platforms and their buttons.
|
|
|
|
Supports 30 CMPs with specific selectors + generic fallback + Shadow DOM.
|
|
"""
|
|
|
|
from dataclasses import dataclass
|
|
|
|
from playwright.async_api import Page
|
|
|
|
|
|
@dataclass
|
|
class BannerInfo:
|
|
detected: bool
|
|
provider: str
|
|
accept_selector: str
|
|
reject_selector: str
|
|
|
|
|
|
# CMP-specific selectors (ordered by market share)
|
|
CMP_SELECTORS = [
|
|
{
|
|
"name": "Didomi",
|
|
"detect": "#didomi-host, [class*='didomi']",
|
|
"accept": "#didomi-notice-agree-button",
|
|
"reject": "#didomi-notice-disagree-button, .didomi-components-button--secondary",
|
|
},
|
|
{
|
|
"name": "OneTrust",
|
|
"detect": "#onetrust-banner-sdk, [class*='onetrust']",
|
|
"accept": "#onetrust-accept-btn-handler",
|
|
"reject": "#onetrust-reject-all-handler, .onetrust-close-btn-handler",
|
|
},
|
|
{
|
|
"name": "Cookiebot",
|
|
"detect": "#CybotCookiebotDialog, [class*='CybotCookiebot']",
|
|
"accept": "#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll",
|
|
"reject": "#CybotCookiebotDialogBodyButtonDecline",
|
|
},
|
|
{
|
|
"name": "Usercentrics",
|
|
"detect": "#usercentrics-root, [data-testid='uc-banner']",
|
|
"accept": "[data-testid='uc-accept-all-button']",
|
|
"reject": "[data-testid='uc-deny-all-button']",
|
|
},
|
|
{
|
|
"name": "CookieYes",
|
|
"detect": ".cky-consent-container, [class*='cky-']",
|
|
"accept": ".cky-btn-accept",
|
|
"reject": ".cky-btn-reject, .cky-btn-customize",
|
|
},
|
|
{
|
|
"name": "Quantcast",
|
|
"detect": ".qc-cmp2-container, [class*='qc-cmp']",
|
|
"accept": "[class*='qc-cmp2-summary-buttons'] button:first-child",
|
|
"reject": "[class*='qc-cmp2-summary-buttons'] button:last-child",
|
|
},
|
|
{
|
|
"name": "Borlabs",
|
|
"detect": "#BorlabsCookieBox, [class*='BorlabsCookie']",
|
|
"accept": "#BorlabsCookieBox .cookie-accept, [data-cookie-accept]",
|
|
"reject": "#BorlabsCookieBox .cookie-refuse, [data-cookie-refuse]",
|
|
},
|
|
{
|
|
"name": "Consentmanager",
|
|
"detect": "#cmpbox, [class*='cmpbox']",
|
|
"accept": ".cmpboxbtn.cmpboxbtnyes",
|
|
"reject": ".cmpboxbtn.cmpboxbtnno",
|
|
},
|
|
{
|
|
"name": "Klaro",
|
|
"detect": ".klaro, [class*='klaro']",
|
|
"accept": ".klaro .cm-btn-accept",
|
|
"reject": ".klaro .cm-btn-decline",
|
|
},
|
|
{
|
|
"name": "TarteAuCitron",
|
|
"detect": "#tarteaucitronRoot, [class*='tarteaucitron']",
|
|
"accept": "#tarteaucitronPersonalize2",
|
|
"reject": "#tarteaucitronAllDenied2",
|
|
},
|
|
# --- 20 additional CMPs ---
|
|
{
|
|
"name": "Sourcepoint",
|
|
"detect": "div[id^='sp_message']",
|
|
"accept": ".sp_choice_type_11",
|
|
"reject": ".sp_choice_type_13",
|
|
},
|
|
{
|
|
"name": "Axeptio",
|
|
"detect": "#axeptio_widget",
|
|
"accept": "[data-ax='accept']",
|
|
"reject": "[data-ax='decline']",
|
|
},
|
|
{
|
|
"name": "Iubenda",
|
|
"detect": "#iubenda-cs-banner",
|
|
"accept": ".iubenda-cs-accept-btn",
|
|
"reject": ".iubenda-cs-reject-btn",
|
|
},
|
|
{
|
|
"name": "Termly",
|
|
"detect": "#termly-code-snippet-support",
|
|
"accept": "[data-tid='banner-accept']",
|
|
"reject": "[data-tid='banner-decline']",
|
|
},
|
|
{
|
|
"name": "CookieFirst",
|
|
"detect": "#cookiefirst-root",
|
|
"accept": "[data-cookiefirst-action='accept']",
|
|
"reject": "[data-cookiefirst-action='reject']",
|
|
},
|
|
{
|
|
"name": "Complianz",
|
|
"detect": "#cmplz-cookiebanner-container",
|
|
"accept": ".cmplz-accept",
|
|
"reject": ".cmplz-deny",
|
|
},
|
|
{
|
|
"name": "CookieScript",
|
|
"detect": "#cookiescript_injected",
|
|
"accept": "#cookiescript_accept",
|
|
"reject": "#cookiescript_reject",
|
|
},
|
|
{
|
|
"name": "HubSpot",
|
|
"detect": "#hs-eu-cookie-confirmation",
|
|
"accept": "#hs-eu-confirmation-button",
|
|
"reject": "#hs-eu-decline-button",
|
|
},
|
|
{
|
|
"name": "Civic UK",
|
|
"detect": "#ccc, .ccc-content",
|
|
"accept": "#ccc-recommended-settings",
|
|
"reject": "#ccc-reject-settings",
|
|
},
|
|
{
|
|
"name": "GDPR Cookie Compliance",
|
|
"detect": "#moove_gdpr_cookie_modal",
|
|
"accept": ".moove-gdpr-modal-allow-all",
|
|
"reject": ".moove-gdpr-modal-save-settings",
|
|
},
|
|
{
|
|
"name": "CookieHub",
|
|
"detect": "#ch2-container",
|
|
"accept": "#ch2-btn-accept",
|
|
"reject": "#ch2-btn-decline",
|
|
},
|
|
{
|
|
"name": "Osano",
|
|
"detect": ".osano-cm-dialog",
|
|
"accept": ".osano-cm-accept-all",
|
|
"reject": ".osano-cm-deny",
|
|
},
|
|
{
|
|
"name": "Ketch",
|
|
"detect": "#ketch-consent",
|
|
"accept": "[data-testid='accept-button']",
|
|
"reject": "[data-testid='decline-button']",
|
|
},
|
|
{
|
|
"name": "Piwik PRO",
|
|
"detect": "#ppms_cm_popup_overlay",
|
|
"accept": "#ppms_cm_agree-to-all",
|
|
"reject": "#ppms_cm_reject-all",
|
|
},
|
|
{
|
|
"name": "Cookie Consent (Insites)",
|
|
"detect": ".cc-window",
|
|
"accept": ".cc-btn.cc-allow",
|
|
"reject": ".cc-btn.cc-deny",
|
|
},
|
|
{
|
|
"name": "Admiral",
|
|
"detect": "[id^='admiral-']",
|
|
"accept": "[class*='admiral-accept']",
|
|
"reject": "[class*='admiral-reject']",
|
|
},
|
|
{
|
|
"name": "Sibbo",
|
|
"detect": "#sibbo-cmp-layout",
|
|
"accept": "#sibbo-cmp-accept-all",
|
|
"reject": "#sibbo-cmp-reject-all",
|
|
},
|
|
{
|
|
"name": "Evidon",
|
|
"detect": "#_evidon_banner",
|
|
"accept": "#_evidon-accept-button",
|
|
"reject": "#_evidon-decline-button",
|
|
},
|
|
{
|
|
"name": "LiveRamp",
|
|
"detect": "#_lr-cookie-consent",
|
|
"accept": "#_lr-accept-all",
|
|
"reject": "#_lr-reject-all",
|
|
},
|
|
{
|
|
"name": "Adsimple",
|
|
"detect": "#adconsent-usp-banner",
|
|
"accept": ".adconsent-accept-all",
|
|
"reject": ".adconsent-reject-all",
|
|
},
|
|
]
|
|
|
|
# Generic fallback patterns (text-based)
|
|
GENERIC_ACCEPT_TEXTS = [
|
|
"Alle akzeptieren", "Alles akzeptieren", "Alle Cookies akzeptieren",
|
|
"Accept all", "Accept All Cookies", "Akzeptieren", "Zustimmen",
|
|
"Einverstanden", "Ich stimme zu", "Ja, einverstanden",
|
|
]
|
|
|
|
GENERIC_REJECT_TEXTS = [
|
|
"Nur notwendige", "Nur essentielle", "Ablehnen", "Alle ablehnen",
|
|
"Reject", "Reject all", "Nur erforderliche", "Nur technisch notwendige",
|
|
"Decline", "Nein", "Nicht einverstanden",
|
|
]
|
|
|
|
# Attribute-based generic selectors for consent buttons
|
|
_GENERIC_ATTR_ACCEPT = [
|
|
"[data-consent='accept']", "[data-cookie='accept']", "[data-gdpr='accept']",
|
|
"[data-consent-accept]", "[data-cookie-accept]",
|
|
]
|
|
_GENERIC_ATTR_REJECT = [
|
|
"[data-consent='reject']", "[data-cookie='reject']", "[data-gdpr='reject']",
|
|
"[data-consent-reject]", "[data-cookie-reject]",
|
|
]
|
|
|
|
# Dialog / aria selectors to find consent containers
|
|
_DIALOG_SELECTORS = [
|
|
"[role='dialog']",
|
|
"[aria-label*='cookie' i]", "[aria-label*='consent' i]",
|
|
"[aria-label*='datenschutz' i]", "[aria-label*='Cookie' i]",
|
|
]
|
|
|
|
# JavaScript for recursive Shadow DOM search
|
|
_SHADOW_DETECT_JS = """
|
|
() => {
|
|
const KEYWORDS = /cookie|consent|datenschutz|privacy/i;
|
|
const results = [];
|
|
function walk(root) {
|
|
for (const el of root.querySelectorAll('*')) {
|
|
if (el.shadowRoot) {
|
|
const shadow = el.shadowRoot;
|
|
const text = shadow.innerHTML || '';
|
|
if (KEYWORDS.test(text)) {
|
|
const buttons = [];
|
|
for (const btn of shadow.querySelectorAll(
|
|
'button, a[role="button"], [role="button"]'
|
|
)) {
|
|
const t = (btn.textContent || '').trim();
|
|
if (t.length > 0 && t.length < 80) {
|
|
buttons.push(t);
|
|
}
|
|
}
|
|
if (buttons.length > 0) {
|
|
const tag = el.tagName.toLowerCase();
|
|
const id = el.id ? '#' + el.id : '';
|
|
results.push({
|
|
host: tag + id,
|
|
buttons: buttons,
|
|
preview: text.substring(0, 200)
|
|
});
|
|
}
|
|
}
|
|
walk(shadow);
|
|
}
|
|
}
|
|
}
|
|
walk(document);
|
|
return results.length > 0 ? results[0] : null;
|
|
}
|
|
"""
|
|
|
|
_SHADOW_CLICK_JS = """
|
|
(textPattern) => {
|
|
const regex = new RegExp(textPattern, 'i');
|
|
function walk(root) {
|
|
for (const el of root.querySelectorAll('*')) {
|
|
if (el.shadowRoot) {
|
|
const btns = el.shadowRoot.querySelectorAll(
|
|
'button, a[role="button"], [role="button"]'
|
|
);
|
|
for (const btn of btns) {
|
|
if (regex.test(btn.textContent || '')) {
|
|
btn.click();
|
|
return true;
|
|
}
|
|
}
|
|
const found = walk(el.shadowRoot);
|
|
if (found) return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
return walk(document);
|
|
}
|
|
"""
|
|
|
|
|
|
async def _detect_in_shadow_dom(page: Page) -> BannerInfo | None:
|
|
"""Search Shadow DOM roots for consent banners as last-resort fallback."""
|
|
try:
|
|
result = await page.evaluate(_SHADOW_DETECT_JS)
|
|
if not result:
|
|
return None
|
|
buttons = result.get("buttons", [])
|
|
host = result.get("host", "")
|
|
accept_pat = ""
|
|
reject_pat = ""
|
|
accept_kw = ("accept", "akzeptieren", "zustimmen", "agree", "allow",
|
|
"einverstanden", "alle")
|
|
reject_kw = ("reject", "ablehnen", "deny", "decline", "refuse",
|
|
"notwendig", "necessary", "essential")
|
|
for text in buttons:
|
|
low = text.lower()
|
|
if not accept_pat and any(k in low for k in accept_kw):
|
|
accept_pat = text
|
|
elif not reject_pat and any(k in low for k in reject_kw):
|
|
reject_pat = text
|
|
if not accept_pat and not reject_pat:
|
|
return None
|
|
return BannerInfo(
|
|
detected=True,
|
|
provider=f"ShadowDOM({host})",
|
|
accept_selector=f"shadow-click:{accept_pat}" if accept_pat else "",
|
|
reject_selector=f"shadow-click:{reject_pat}" if reject_pat else "",
|
|
)
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
async def _click_in_shadow_dom(page: Page, text_pattern: str) -> bool:
|
|
"""Click a button inside a Shadow DOM root matching the text pattern."""
|
|
try:
|
|
return await page.evaluate(_SHADOW_CLICK_JS, text_pattern)
|
|
except Exception:
|
|
return False
|
|
|
|
|
|
async def _detect_generic_dialog(page: Page) -> BannerInfo | None:
|
|
"""Detect consent banners in dialog/aria containers."""
|
|
consent_kw = ("cookie", "consent", "datenschutz", "privacy")
|
|
for sel in _DIALOG_SELECTORS:
|
|
try:
|
|
containers = page.locator(sel)
|
|
count = await containers.count()
|
|
if count == 0:
|
|
continue
|
|
container = containers.first
|
|
text = (await container.inner_text(timeout=2000)).lower()
|
|
if not any(kw in text for kw in consent_kw):
|
|
continue
|
|
# Found a consent dialog — look for accept/reject buttons
|
|
accept = ""
|
|
reject = ""
|
|
for asel in _GENERIC_ATTR_ACCEPT:
|
|
if await container.locator(asel).count() > 0:
|
|
accept = f"{sel} {asel}"
|
|
break
|
|
for rsel in _GENERIC_ATTR_REJECT:
|
|
if await container.locator(rsel).count() > 0:
|
|
reject = f"{sel} {rsel}"
|
|
break
|
|
if not accept:
|
|
for t in GENERIC_ACCEPT_TEXTS:
|
|
if await container.get_by_text(t, exact=False).count() > 0:
|
|
accept = f'{sel} button:has-text("{t}")'
|
|
break
|
|
if not reject:
|
|
for t in GENERIC_REJECT_TEXTS:
|
|
if await container.get_by_text(t, exact=False).count() > 0:
|
|
reject = f'{sel} button:has-text("{t}")'
|
|
break
|
|
if accept or reject:
|
|
return BannerInfo(
|
|
detected=True, provider="Generic (dialog)",
|
|
accept_selector=accept, reject_selector=reject,
|
|
)
|
|
except Exception:
|
|
continue
|
|
return None
|
|
|
|
|
|
async def _detect_generic_attr(page: Page) -> BannerInfo | None:
|
|
"""Detect consent buttons by data-consent/data-cookie/data-gdpr attributes."""
|
|
accept = ""
|
|
reject = ""
|
|
for sel in _GENERIC_ATTR_ACCEPT:
|
|
try:
|
|
if await page.locator(sel).count() > 0:
|
|
accept = sel
|
|
break
|
|
except Exception:
|
|
continue
|
|
for sel in _GENERIC_ATTR_REJECT:
|
|
try:
|
|
if await page.locator(sel).count() > 0:
|
|
reject = sel
|
|
break
|
|
except Exception:
|
|
continue
|
|
if accept or reject:
|
|
return BannerInfo(
|
|
detected=True, provider="Generic (attr)",
|
|
accept_selector=accept, reject_selector=reject,
|
|
)
|
|
return None
|
|
|
|
|
|
async def detect_banner(page: Page) -> BannerInfo:
|
|
"""Detect which CMP is used and return button selectors."""
|
|
# 1. Try CMP-specific selectors
|
|
for cmp in CMP_SELECTORS:
|
|
try:
|
|
if await page.locator(cmp["detect"]).count() > 0:
|
|
return BannerInfo(
|
|
detected=True, provider=cmp["name"],
|
|
accept_selector=cmp["accept"],
|
|
reject_selector=cmp["reject"],
|
|
)
|
|
except Exception:
|
|
continue
|
|
|
|
# 2. Generic fallback — search buttons by text
|
|
for text in GENERIC_ACCEPT_TEXTS:
|
|
try:
|
|
btn = page.get_by_text(text, exact=False)
|
|
if await btn.count() > 0:
|
|
accept = f'button:has-text("{text}")'
|
|
reject = ""
|
|
for rtext in GENERIC_REJECT_TEXTS:
|
|
if await page.get_by_text(rtext, exact=False).count() > 0:
|
|
reject = f'button:has-text("{rtext}")'
|
|
break
|
|
return BannerInfo(
|
|
detected=True, provider="Generic",
|
|
accept_selector=accept, reject_selector=reject,
|
|
)
|
|
except Exception:
|
|
continue
|
|
|
|
# 3. Generic fallback — dialog/aria containers with consent keywords
|
|
dialog_result = await _detect_generic_dialog(page)
|
|
if dialog_result:
|
|
return dialog_result
|
|
|
|
# 4. Generic fallback — data-consent/data-cookie/data-gdpr attributes
|
|
attr_result = await _detect_generic_attr(page)
|
|
if attr_result:
|
|
return attr_result
|
|
|
|
# 5. Shadow DOM fallback — search inside shadow roots
|
|
shadow_result = await _detect_in_shadow_dom(page)
|
|
if shadow_result:
|
|
return shadow_result
|
|
|
|
return BannerInfo(detected=False, provider="", accept_selector="", reject_selector="")
|
|
|
|
|
|
async def click_button(page: Page, selector: str, timeout: int = 5000) -> bool:
|
|
"""Try to click a consent button. Returns True if clicked successfully."""
|
|
if not selector:
|
|
return False
|
|
|
|
# Handle Shadow DOM selectors
|
|
if selector.startswith("shadow-click:"):
|
|
text_pattern = selector[len("shadow-click:"):]
|
|
return await _click_in_shadow_dom(page, text_pattern)
|
|
|
|
try:
|
|
locator = page.locator(selector).first
|
|
await locator.wait_for(state="visible", timeout=timeout)
|
|
await locator.click()
|
|
return True
|
|
except Exception:
|
|
# Fallback: try Shadow DOM click with selector text
|
|
# Extract button text from selector like 'button:has-text("Accept all")'
|
|
if ':has-text("' in selector:
|
|
text = selector.split(':has-text("')[1].rstrip('")')
|
|
return await _click_in_shadow_dom(page, text)
|
|
return False
|