Files
breakpilot-compliance/consent-tester/services/banner_detector.py
T
Benjamin Admin 64e3a47b8c
Build + Deploy / build-admin-compliance (push) Successful in 1m53s
Build + Deploy / build-backend-compliance (push) Successful in 10s
Build + Deploy / build-ai-sdk (push) Successful in 9s
Build + Deploy / build-developer-portal (push) Successful in 10s
Build + Deploy / build-tts (push) Successful in 12s
Build + Deploy / build-document-crawler (push) Successful in 10s
Build + Deploy / build-dsms-gateway (push) Successful in 10s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 15s
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m40s
CI / dep-audit (push) Has been skipped
Build + Deploy / build-dsms-node (push) Successful in 13s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Failing after 44s
CI / test-python-backend (push) Successful in 35s
CI / test-python-document-crawler (push) Successful in 25s
CI / test-python-dsms-gateway (push) Successful in 22s
CI / validate-canonical-controls (push) Successful in 14s
Build + Deploy / trigger-orca (push) Successful in 2m29s
fix(iace): confirmation dialog for ungrouping + undo/regroup
- X button replaced with confirmation dialog: "Als eigenen Punkt fuehren" / "Abbrechen"
- Dialog explains the action and that it's reversible
- Ungrouped items show orange "Zurueck in Block" button
- Info bar shows count of ungrouped items + "alle zuruecksetzen" link
- No destructive action without user confirmation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-13 15:19:39 +02:00

503 lines
16 KiB
Python

"""
Banner Detector — identifies Consent Management Platforms and their buttons.
Supports 30 CMPs with specific selectors + generic fallback + Shadow DOM.
"""
from dataclasses import dataclass
from playwright.async_api import Page
@dataclass
class BannerInfo:
detected: bool
provider: str
accept_selector: str
reject_selector: str
# CMP-specific selectors (ordered by market share)
CMP_SELECTORS = [
{
"name": "Didomi",
"detect": "#didomi-host, [class*='didomi']",
"accept": "#didomi-notice-agree-button",
"reject": "#didomi-notice-disagree-button, .didomi-components-button--secondary",
},
{
"name": "OneTrust",
"detect": "#onetrust-banner-sdk, [class*='onetrust']",
"accept": "#onetrust-accept-btn-handler",
"reject": "#onetrust-reject-all-handler, .onetrust-close-btn-handler",
},
{
"name": "Cookiebot",
"detect": "#CybotCookiebotDialog, [class*='CybotCookiebot']",
"accept": "#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll",
"reject": "#CybotCookiebotDialogBodyButtonDecline",
},
{
"name": "Usercentrics",
"detect": "#usercentrics-root, [data-testid='uc-banner']",
"accept": "[data-testid='uc-accept-all-button']",
"reject": "[data-testid='uc-deny-all-button']",
},
{
"name": "CookieYes",
"detect": ".cky-consent-container, [class*='cky-']",
"accept": ".cky-btn-accept",
"reject": ".cky-btn-reject, .cky-btn-customize",
},
{
"name": "Quantcast",
"detect": ".qc-cmp2-container, [class*='qc-cmp']",
"accept": "[class*='qc-cmp2-summary-buttons'] button:first-child",
"reject": "[class*='qc-cmp2-summary-buttons'] button:last-child",
},
{
"name": "Borlabs",
"detect": "#BorlabsCookieBox, [class*='BorlabsCookie']",
"accept": "#BorlabsCookieBox .cookie-accept, [data-cookie-accept]",
"reject": "#BorlabsCookieBox .cookie-refuse, [data-cookie-refuse]",
},
{
"name": "Consentmanager",
"detect": "#cmpbox, [class*='cmpbox']",
"accept": ".cmpboxbtn.cmpboxbtnyes",
"reject": ".cmpboxbtn.cmpboxbtnno",
},
{
"name": "Klaro",
"detect": ".klaro, [class*='klaro']",
"accept": ".klaro .cm-btn-accept",
"reject": ".klaro .cm-btn-decline",
},
{
"name": "TarteAuCitron",
"detect": "#tarteaucitronRoot, [class*='tarteaucitron']",
"accept": "#tarteaucitronPersonalize2",
"reject": "#tarteaucitronAllDenied2",
},
# --- 20 additional CMPs ---
{
"name": "Sourcepoint",
"detect": "div[id^='sp_message']",
"accept": ".sp_choice_type_11",
"reject": ".sp_choice_type_13",
},
{
"name": "Axeptio",
"detect": "#axeptio_widget",
"accept": "[data-ax='accept']",
"reject": "[data-ax='decline']",
},
{
"name": "Iubenda",
"detect": "#iubenda-cs-banner",
"accept": ".iubenda-cs-accept-btn",
"reject": ".iubenda-cs-reject-btn",
},
{
"name": "Termly",
"detect": "#termly-code-snippet-support",
"accept": "[data-tid='banner-accept']",
"reject": "[data-tid='banner-decline']",
},
{
"name": "CookieFirst",
"detect": "#cookiefirst-root",
"accept": "[data-cookiefirst-action='accept']",
"reject": "[data-cookiefirst-action='reject']",
},
{
"name": "Complianz",
"detect": "#cmplz-cookiebanner-container",
"accept": ".cmplz-accept",
"reject": ".cmplz-deny",
},
{
"name": "CookieScript",
"detect": "#cookiescript_injected",
"accept": "#cookiescript_accept",
"reject": "#cookiescript_reject",
},
{
"name": "HubSpot",
"detect": "#hs-eu-cookie-confirmation",
"accept": "#hs-eu-confirmation-button",
"reject": "#hs-eu-decline-button",
},
{
"name": "Civic UK",
"detect": "#ccc, .ccc-content",
"accept": "#ccc-recommended-settings",
"reject": "#ccc-reject-settings",
},
{
"name": "GDPR Cookie Compliance",
"detect": "#moove_gdpr_cookie_modal",
"accept": ".moove-gdpr-modal-allow-all",
"reject": ".moove-gdpr-modal-save-settings",
},
{
"name": "CookieHub",
"detect": "#ch2-container",
"accept": "#ch2-btn-accept",
"reject": "#ch2-btn-decline",
},
{
"name": "Osano",
"detect": ".osano-cm-dialog",
"accept": ".osano-cm-accept-all",
"reject": ".osano-cm-deny",
},
{
"name": "Ketch",
"detect": "#ketch-consent",
"accept": "[data-testid='accept-button']",
"reject": "[data-testid='decline-button']",
},
{
"name": "Piwik PRO",
"detect": "#ppms_cm_popup_overlay",
"accept": "#ppms_cm_agree-to-all",
"reject": "#ppms_cm_reject-all",
},
{
"name": "Cookie Consent (Insites)",
"detect": ".cc-window",
"accept": ".cc-btn.cc-allow",
"reject": ".cc-btn.cc-deny",
},
{
"name": "Admiral",
"detect": "[id^='admiral-']",
"accept": "[class*='admiral-accept']",
"reject": "[class*='admiral-reject']",
},
{
"name": "Sibbo",
"detect": "#sibbo-cmp-layout",
"accept": "#sibbo-cmp-accept-all",
"reject": "#sibbo-cmp-reject-all",
},
{
"name": "Evidon",
"detect": "#_evidon_banner",
"accept": "#_evidon-accept-button",
"reject": "#_evidon-decline-button",
},
{
"name": "LiveRamp",
"detect": "#_lr-cookie-consent",
"accept": "#_lr-accept-all",
"reject": "#_lr-reject-all",
},
{
"name": "Adsimple",
"detect": "#adconsent-usp-banner",
"accept": ".adconsent-accept-all",
"reject": ".adconsent-reject-all",
},
]
# Generic fallback patterns (text-based)
GENERIC_ACCEPT_TEXTS = [
"Alle akzeptieren", "Alles akzeptieren", "Alle Cookies akzeptieren",
"Accept all", "Accept All Cookies", "Akzeptieren", "Zustimmen",
"Einverstanden", "Ich stimme zu", "Ja, einverstanden",
]
GENERIC_REJECT_TEXTS = [
"Nur notwendige", "Nur essentielle", "Ablehnen", "Alle ablehnen",
"Reject", "Reject all", "Nur erforderliche", "Nur technisch notwendige",
"Decline", "Nein", "Nicht einverstanden",
]
# Attribute-based generic selectors for consent buttons
_GENERIC_ATTR_ACCEPT = [
"[data-consent='accept']", "[data-cookie='accept']", "[data-gdpr='accept']",
"[data-consent-accept]", "[data-cookie-accept]",
]
_GENERIC_ATTR_REJECT = [
"[data-consent='reject']", "[data-cookie='reject']", "[data-gdpr='reject']",
"[data-consent-reject]", "[data-cookie-reject]",
]
# Dialog / aria selectors to find consent containers
_DIALOG_SELECTORS = [
"[role='dialog']",
"[aria-label*='cookie' i]", "[aria-label*='consent' i]",
"[aria-label*='datenschutz' i]", "[aria-label*='Cookie' i]",
]
# JavaScript for recursive Shadow DOM search
_SHADOW_DETECT_JS = """
() => {
const KEYWORDS = /cookie|consent|datenschutz|privacy/i;
const results = [];
function walk(root) {
for (const el of root.querySelectorAll('*')) {
if (el.shadowRoot) {
const shadow = el.shadowRoot;
const text = shadow.innerHTML || '';
if (KEYWORDS.test(text)) {
const buttons = [];
for (const btn of shadow.querySelectorAll(
'button, a[role="button"], [role="button"]'
)) {
const t = (btn.textContent || '').trim();
if (t.length > 0 && t.length < 80) {
buttons.push(t);
}
}
if (buttons.length > 0) {
const tag = el.tagName.toLowerCase();
const id = el.id ? '#' + el.id : '';
results.push({
host: tag + id,
buttons: buttons,
preview: text.substring(0, 200)
});
}
}
walk(shadow);
}
}
}
walk(document);
return results.length > 0 ? results[0] : null;
}
"""
_SHADOW_CLICK_JS = """
(textPattern) => {
const regex = new RegExp(textPattern, 'i');
function walk(root) {
for (const el of root.querySelectorAll('*')) {
if (el.shadowRoot) {
const btns = el.shadowRoot.querySelectorAll(
'button, a[role="button"], [role="button"]'
);
for (const btn of btns) {
if (regex.test(btn.textContent || '')) {
btn.click();
return true;
}
}
const found = walk(el.shadowRoot);
if (found) return true;
}
}
return false;
}
return walk(document);
}
"""
async def _detect_in_shadow_dom(page: Page) -> BannerInfo | None:
"""Search Shadow DOM roots for consent banners as last-resort fallback."""
try:
result = await page.evaluate(_SHADOW_DETECT_JS)
if not result:
return None
buttons = result.get("buttons", [])
host = result.get("host", "")
accept_pat = ""
reject_pat = ""
accept_kw = ("accept", "akzeptieren", "zustimmen", "agree", "allow",
"einverstanden", "alle")
reject_kw = ("reject", "ablehnen", "deny", "decline", "refuse",
"notwendig", "necessary", "essential")
for text in buttons:
low = text.lower()
if not accept_pat and any(k in low for k in accept_kw):
accept_pat = text
elif not reject_pat and any(k in low for k in reject_kw):
reject_pat = text
if not accept_pat and not reject_pat:
return None
return BannerInfo(
detected=True,
provider=f"ShadowDOM({host})",
accept_selector=f"shadow-click:{accept_pat}" if accept_pat else "",
reject_selector=f"shadow-click:{reject_pat}" if reject_pat else "",
)
except Exception:
return None
async def _click_in_shadow_dom(page: Page, text_pattern: str) -> bool:
"""Click a button inside a Shadow DOM root matching the text pattern."""
try:
return await page.evaluate(_SHADOW_CLICK_JS, text_pattern)
except Exception:
return False
async def _detect_generic_dialog(page: Page) -> BannerInfo | None:
"""Detect consent banners in dialog/aria containers."""
consent_kw = ("cookie", "consent", "datenschutz", "privacy")
for sel in _DIALOG_SELECTORS:
try:
containers = page.locator(sel)
count = await containers.count()
if count == 0:
continue
container = containers.first
text = (await container.inner_text(timeout=2000)).lower()
if not any(kw in text for kw in consent_kw):
continue
# Found a consent dialog — look for accept/reject buttons
accept = ""
reject = ""
for asel in _GENERIC_ATTR_ACCEPT:
if await container.locator(asel).count() > 0:
accept = f"{sel} {asel}"
break
for rsel in _GENERIC_ATTR_REJECT:
if await container.locator(rsel).count() > 0:
reject = f"{sel} {rsel}"
break
if not accept:
for t in GENERIC_ACCEPT_TEXTS:
if await container.get_by_text(t, exact=False).count() > 0:
accept = f'{sel} button:has-text("{t}")'
break
if not reject:
for t in GENERIC_REJECT_TEXTS:
if await container.get_by_text(t, exact=False).count() > 0:
reject = f'{sel} button:has-text("{t}")'
break
if accept or reject:
return BannerInfo(
detected=True, provider="Generic (dialog)",
accept_selector=accept, reject_selector=reject,
)
except Exception:
continue
return None
async def _detect_generic_attr(page: Page) -> BannerInfo | None:
"""Detect consent buttons by data-consent/data-cookie/data-gdpr attributes."""
accept = ""
reject = ""
for sel in _GENERIC_ATTR_ACCEPT:
try:
if await page.locator(sel).count() > 0:
accept = sel
break
except Exception:
continue
for sel in _GENERIC_ATTR_REJECT:
try:
if await page.locator(sel).count() > 0:
reject = sel
break
except Exception:
continue
if accept or reject:
return BannerInfo(
detected=True, provider="Generic (attr)",
accept_selector=accept, reject_selector=reject,
)
return None
async def detect_banner(page: Page) -> BannerInfo:
"""Detect which CMP is used and return button selectors."""
# 1. Try CMP-specific selectors
for cmp in CMP_SELECTORS:
try:
if await page.locator(cmp["detect"]).count() > 0:
return BannerInfo(
detected=True, provider=cmp["name"],
accept_selector=cmp["accept"],
reject_selector=cmp["reject"],
)
except Exception:
continue
# 2. Generic fallback — search buttons by text
for text in GENERIC_ACCEPT_TEXTS:
try:
btn = page.get_by_text(text, exact=False)
if await btn.count() > 0:
accept = f'button:has-text("{text}")'
reject = ""
for rtext in GENERIC_REJECT_TEXTS:
if await page.get_by_text(rtext, exact=False).count() > 0:
reject = f'button:has-text("{rtext}")'
break
return BannerInfo(
detected=True, provider="Generic",
accept_selector=accept, reject_selector=reject,
)
except Exception:
continue
# 3. Generic fallback — dialog/aria containers with consent keywords
dialog_result = await _detect_generic_dialog(page)
if dialog_result:
return dialog_result
# 4. Generic fallback — data-consent/data-cookie/data-gdpr attributes
attr_result = await _detect_generic_attr(page)
if attr_result:
return attr_result
# 5. Shadow DOM fallback — search inside shadow roots
shadow_result = await _detect_in_shadow_dom(page)
if shadow_result:
return shadow_result
return BannerInfo(detected=False, provider="", accept_selector="", reject_selector="")
async def click_button(page: Page, selector: str, timeout: int = 5000) -> bool:
"""Try to click a consent button. Returns True if clicked successfully."""
if not selector:
return False
# Handle Shadow DOM selectors
if selector.startswith("shadow-click:"):
text_pattern = selector[len("shadow-click:"):]
return await _click_in_shadow_dom(page, text_pattern)
# 1. Try main document
try:
locator = page.locator(selector).first
await locator.wait_for(state="visible", timeout=timeout)
await locator.click()
return True
except Exception:
pass
# 2. Fallback: try inside iframes (Sourcepoint, Quantcast, etc.)
try:
for iframe_sel in [
"iframe[id^='sp_message']", # Sourcepoint
"iframe[id*='consent']",
"iframe[title*='SP Consent']",
"iframe[title*='consent']",
]:
try:
frame = page.frame_locator(iframe_sel)
btn = frame.locator(selector).first
if await btn.count() > 0:
await btn.click(timeout=timeout)
return True
except Exception:
continue
except Exception:
pass
# 3. Fallback: Shadow DOM
if ':has-text("' in selector:
text = selector.split(':has-text("')[1].rstrip('")')
return await _click_in_shadow_dom(page, text)
return False