feat: 3 new banner legal checks (11 total) + extract banner_text_checker
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Successful in 46s
CI / validate-canonical-controls (push) Successful in 14s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 18s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m32s
CI / test-python-backend (push) Successful in 40s
CI / test-python-document-crawler (push) Successful in 25s
CI / test-python-dsms-gateway (push) Successful in 21s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Successful in 46s
CI / validate-canonical-controls (push) Successful in 14s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 18s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m32s
CI / test-python-backend (push) Successful in 40s
CI / test-python-document-crawler (push) Successful in 25s
CI / test-python-dsms-gateway (push) Successful in 21s
New checks (from EUIPO reference case): - Check 9: Third-party DSE link — detects when consent dialog links to external domain's privacy policy instead of own DSE (Art. 13 DSGVO) - Check 10: Dark-pattern language — detects "muessen/erforderlich" for non-essential cookies suggesting false technical necessity (EDPB Rn. 70) - Check 11: Non-modal dismiss = consent — detects when clicking outside dialog closes it (possibly treating as consent, Planet49 violation) Refactor: extracted _check_banner_text (375 LOC) from consent_scanner.py into services/banner_text_checker.py to keep both files under 500 LOC. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,399 @@
|
||||
"""
|
||||
Banner text legal checks — extracted from consent_scanner.py.
|
||||
|
||||
11 checks for cookie banner legal compliance:
|
||||
1. Impressum link accessible (§5 TMG)
|
||||
2. DSE link in banner (Art. 13 DSGVO)
|
||||
3. Wrong DSE consent wording (Art. 13 DSGVO)
|
||||
4. Reject button visible (§25 TDDDG)
|
||||
5. Pre-ticked checkboxes (Planet49)
|
||||
6. Dark pattern button size (EDPB 05/2020)
|
||||
7. Cookie wall (Phase B check)
|
||||
8. Re-access to settings (Art. 7(3) DSGVO)
|
||||
9. Third-party DSE link (Art. 13 DSGVO)
|
||||
10. Dark-pattern language (EDPB 05/2020)
|
||||
11. Non-modal dismiss = consent (Planet49)
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from services.script_analyzer import Violation
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def check_banner_text(page) -> dict:
|
||||
"""Check cookie banner text for legal issues.
|
||||
|
||||
1. Impressum link must be accessible even with banner overlay (§5 TMG)
|
||||
2. DSE link must be accessible from banner
|
||||
3. "Zustimmung zur Datenschutzerklärung" is WRONG — DSE is an information
|
||||
obligation (Art. 13 DSGVO), not something users "agree" to
|
||||
"""
|
||||
violations = []
|
||||
has_impressum = False
|
||||
has_dse = False
|
||||
|
||||
try:
|
||||
# Get banner text and links
|
||||
banner_text = ""
|
||||
banner_links = []
|
||||
|
||||
# Try common banner container selectors
|
||||
for selector in [
|
||||
"#CybotCookiebotDialog", "#onetrust-banner-sdk", "#didomi-host",
|
||||
"#usercentrics-root", ".cky-consent-container", "#cmpbox",
|
||||
'[class*="cookie-banner"]', '[class*="consent-banner"]',
|
||||
'[class*="cookie-notice"]', '[role="dialog"]',
|
||||
]:
|
||||
try:
|
||||
el = page.locator(selector).first
|
||||
if await el.count() > 0:
|
||||
banner_text = (await el.text_content() or "").strip()
|
||||
# Get links inside banner
|
||||
links = await el.locator("a[href]").all()
|
||||
for link in links:
|
||||
href = await link.get_attribute("href") or ""
|
||||
text = (await link.text_content() or "").strip()
|
||||
banner_links.append({"href": href.lower(), "text": text.lower()})
|
||||
if banner_text:
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if not banner_text:
|
||||
return {"violations": violations, "has_impressum": False, "has_dse": False}
|
||||
|
||||
banner_lower = banner_text.lower()
|
||||
|
||||
# Check 1: Impressum link in or accessible through banner
|
||||
has_impressum = any(
|
||||
"impressum" in l["href"] or "impressum" in l["text"] or
|
||||
"imprint" in l["href"] or "legal notice" in l["text"]
|
||||
for l in banner_links
|
||||
)
|
||||
# Also check if impressum is visible behind/around banner
|
||||
if not has_impressum:
|
||||
try:
|
||||
imp_visible = await page.locator('a[href*="impressum"], a[href*="imprint"]').first
|
||||
if await imp_visible.count() > 0 and await imp_visible.is_visible():
|
||||
has_impressum = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not has_impressum:
|
||||
violations.append(Violation(
|
||||
service="Cookie-Banner",
|
||||
severity="HIGH",
|
||||
text="Impressum nicht aus dem Cookie-Banner erreichbar. "
|
||||
"Bei ueberlagerndem Banner muss ein Impressum-Link im Banner vorhanden sein (§5 TMG).",
|
||||
legal_ref="§5 TMG, LG Rostock Az. 3 O 22/19",
|
||||
))
|
||||
|
||||
# Check 2: DSE link in banner
|
||||
has_dse = any(
|
||||
"datenschutz" in l["href"] or "datenschutz" in l["text"] or
|
||||
"privacy" in l["href"] or "privacy" in l["text"] or
|
||||
"dsgvo" in l["href"]
|
||||
for l in banner_links
|
||||
)
|
||||
if not has_dse:
|
||||
violations.append(Violation(
|
||||
service="Cookie-Banner",
|
||||
severity="MEDIUM",
|
||||
text="Kein Link zur Datenschutzerklaerung im Cookie-Banner. "
|
||||
"Nutzer sollten vor der Einwilligung die DSE einsehen koennen.",
|
||||
legal_ref="Art. 13 DSGVO, ErwGr. 42 DSGVO (informierte Einwilligung)",
|
||||
))
|
||||
|
||||
# Check 3: Wrong wording — "Zustimmung zur Datenschutzerklärung"
|
||||
wrong_dse_consent_patterns = [
|
||||
"stimme der datenschutz",
|
||||
"stimme den datenschutz",
|
||||
"akzeptiere die datenschutz",
|
||||
"akzeptiere die privacy",
|
||||
"agree to the privacy policy",
|
||||
"accept the privacy",
|
||||
"datenschutzerklaerung zustimmen",
|
||||
"datenschutzrichtlinie akzeptieren",
|
||||
"datenschutzrichtlinie zustimmen",
|
||||
"i agree to the privacy",
|
||||
"i accept the privacy",
|
||||
]
|
||||
for pattern in wrong_dse_consent_patterns:
|
||||
if pattern in banner_lower:
|
||||
violations.append(Violation(
|
||||
service="Cookie-Banner",
|
||||
severity="HIGH",
|
||||
text=f"Falsche Formulierung im Banner: 'Zustimmung zur Datenschutzerklaerung'. "
|
||||
f"Die DSE ist eine Informationspflicht (Art. 13 DSGVO) — man kann sie "
|
||||
f"nur zur Kenntnis nehmen, nicht 'zustimmen'. "
|
||||
f"Korrekt: 'Ich habe die Datenschutzinformationen zur Kenntnis genommen'.",
|
||||
legal_ref="Art. 13 DSGVO, ErwGr. 42 (informierte Einwilligung ≠ Zustimmung zur DSE)",
|
||||
))
|
||||
break
|
||||
|
||||
# Check 4: Reject button visible (no hidden reject)
|
||||
reject_texts = ["ablehnen", "reject", "nur notwendige", "alle ablehnen", "decline"]
|
||||
has_visible_reject = any(t in banner_lower for t in reject_texts)
|
||||
if not has_visible_reject:
|
||||
violations.append(Violation(
|
||||
service="Cookie-Banner",
|
||||
severity="HIGH",
|
||||
text="Kein sichtbarer 'Ablehnen'-Button im Banner erkannt. "
|
||||
"Die Ablehnung muss ebenso einfach sein wie die Zustimmung.",
|
||||
legal_ref="§25 Abs. 1 TDDDG, EDPB Guidelines 05/2020 (Consent)",
|
||||
))
|
||||
|
||||
# Check 5: Pre-ticked checkboxes (EuGH Planet49)
|
||||
try:
|
||||
pre_checked = await page.evaluate("""
|
||||
() => {
|
||||
const banner = document.querySelector(
|
||||
'#CybotCookiebotDialog, #onetrust-banner-sdk, #didomi-host, '
|
||||
+ '#usercentrics-root, .cky-consent-container, #cmpbox, '
|
||||
+ '[class*="cookie-banner"], [class*="consent-banner"], [role="dialog"]'
|
||||
);
|
||||
if (!banner) return [];
|
||||
const checked = banner.querySelectorAll(
|
||||
'input[type="checkbox"]:checked:not([disabled])'
|
||||
);
|
||||
return [...checked]
|
||||
.filter(cb => {
|
||||
const label = cb.closest('label')?.textContent || cb.getAttribute('aria-label') || '';
|
||||
const isNecessary = /notwendig|necessary|essential|erforderlich/i.test(label);
|
||||
return !isNecessary;
|
||||
})
|
||||
.map(cb => cb.closest('label')?.textContent?.trim() || cb.id || 'unknown');
|
||||
}
|
||||
""")
|
||||
if pre_checked:
|
||||
violations.append(Violation(
|
||||
service="Cookie-Banner",
|
||||
severity="HIGH",
|
||||
text=f"Vorausgewaehlte Checkboxen im Banner: {', '.join(pre_checked[:3])}. "
|
||||
f"Einwilligung muss durch aktive Handlung erfolgen — vorausgefuellte "
|
||||
f"Checkboxen sind ungueltig.",
|
||||
legal_ref="Art. 4(11) DSGVO, EuGH C-673/17 (Planet49)",
|
||||
))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Check 6: Dark Pattern — button size/prominence comparison
|
||||
try:
|
||||
button_info = await page.evaluate("""
|
||||
() => {
|
||||
const banner = document.querySelector(
|
||||
'#CybotCookiebotDialog, #onetrust-banner-sdk, #didomi-host, '
|
||||
+ '#usercentrics-root, .cky-consent-container, #cmpbox, '
|
||||
+ '[class*="cookie-banner"], [class*="consent-banner"], [role="dialog"]'
|
||||
);
|
||||
if (!banner) return null;
|
||||
const buttons = [...banner.querySelectorAll('button, a[role="button"], [class*="btn"]')];
|
||||
return buttons.slice(0, 6).map(b => {
|
||||
const style = window.getComputedStyle(b);
|
||||
const rect = b.getBoundingClientRect();
|
||||
return {
|
||||
text: b.textContent?.trim()?.substring(0, 40) || '',
|
||||
width: rect.width,
|
||||
height: rect.height,
|
||||
area: rect.width * rect.height,
|
||||
bgColor: style.backgroundColor,
|
||||
fontSize: parseFloat(style.fontSize),
|
||||
visible: rect.width > 0 && rect.height > 0,
|
||||
};
|
||||
});
|
||||
}
|
||||
""")
|
||||
if button_info and len(button_info) >= 2:
|
||||
accept_btn = None
|
||||
reject_btn = None
|
||||
accept_kw = ["akzeptieren", "accept", "zustimmen", "agree", "einverstanden", "ok"]
|
||||
reject_kw = ["ablehnen", "reject", "notwendige", "decline", "nein"]
|
||||
|
||||
for btn in button_info:
|
||||
text_lower = btn["text"].lower()
|
||||
if any(k in text_lower for k in accept_kw):
|
||||
accept_btn = btn
|
||||
elif any(k in text_lower for k in reject_kw):
|
||||
reject_btn = btn
|
||||
|
||||
if accept_btn and reject_btn:
|
||||
area_ratio = accept_btn["area"] / max(reject_btn["area"], 1)
|
||||
if area_ratio > 2.5:
|
||||
violations.append(Violation(
|
||||
service="Cookie-Banner",
|
||||
severity="MEDIUM",
|
||||
text=f"Dark Pattern: 'Akzeptieren'-Button ist {area_ratio:.1f}x groesser als "
|
||||
f"'Ablehnen'-Button. Beide Optionen muessen gleichwertig dargestellt werden.",
|
||||
legal_ref="EDPB Guidelines 05/2020, §25 TDDDG, DSK Orientierungshilfe Telemedien",
|
||||
))
|
||||
size_ratio = accept_btn["fontSize"] / max(reject_btn["fontSize"], 1)
|
||||
if size_ratio > 1.5:
|
||||
violations.append(Violation(
|
||||
service="Cookie-Banner",
|
||||
severity="MEDIUM",
|
||||
text=f"Dark Pattern: Schriftgroesse 'Akzeptieren' ({accept_btn['fontSize']:.0f}px) "
|
||||
f"vs. 'Ablehnen' ({reject_btn['fontSize']:.0f}px). "
|
||||
f"Unterschiedliche Schriftgroessen sind ein Dark Pattern.",
|
||||
legal_ref="EDPB Guidelines 05/2020 (gleichwertige Darstellung)",
|
||||
))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Check 7: Cookie Wall — does rejecting block the site?
|
||||
# (This is checked in Phase B — if after reject the page is not navigable)
|
||||
|
||||
# Check 8: Re-access to settings (Art. 7(3) — revocation as easy as consent)
|
||||
try:
|
||||
settings_accessible = False
|
||||
settings_selectors = [
|
||||
'[class*="cookie-settings"]', '[class*="privacy-settings"]',
|
||||
'a[href*="cookie"]', 'a[href*="datenschutz-einstellungen"]',
|
||||
'[class*="consent-settings"]', '#ot-sdk-btn',
|
||||
'.cky-btn-revisit', '#CybotCookiebotDialogBodyButtonDetails',
|
||||
'[data-testid="uc-footer-link"]',
|
||||
]
|
||||
for sel in settings_selectors:
|
||||
try:
|
||||
if await page.locator(sel).count() > 0:
|
||||
settings_accessible = True
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Also check footer for cookie settings link
|
||||
if not settings_accessible:
|
||||
footer_text = ""
|
||||
try:
|
||||
footer = page.locator("footer").first
|
||||
if await footer.count() > 0:
|
||||
footer_text = (await footer.text_content() or "").lower()
|
||||
except Exception:
|
||||
pass
|
||||
if any(kw in footer_text for kw in ["cookie-einstellungen", "cookie settings",
|
||||
"datenschutz-einstellungen", "privacy settings"]):
|
||||
settings_accessible = True
|
||||
|
||||
if not settings_accessible:
|
||||
violations.append(Violation(
|
||||
service="Cookie-Banner",
|
||||
severity="MEDIUM",
|
||||
text="Kein erneuter Zugang zu Cookie-Einstellungen gefunden. "
|
||||
"Der Widerruf der Einwilligung muss ebenso einfach sein wie "
|
||||
"die Erteilung (Art. 7 Abs. 3 DSGVO).",
|
||||
legal_ref="Art. 7 Abs. 3 DSGVO (Widerruf so einfach wie Einwilligung)",
|
||||
))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Check 9: Third-party DSE link — consent links to external domain DSE
|
||||
try:
|
||||
page_domain = page.url.split("/")[2].replace("www.", "")
|
||||
for link in banner_links:
|
||||
href = link["href"]
|
||||
if not href.startswith("http"):
|
||||
continue
|
||||
link_domain = href.split("/")[2].replace("www.", "") if len(href.split("/")) > 2 else ""
|
||||
if not link_domain:
|
||||
continue
|
||||
is_dse_link = any(kw in link["text"] for kw in [
|
||||
"datenschutz", "privacy", "dsgvo", "data protection",
|
||||
])
|
||||
if is_dse_link and link_domain != page_domain:
|
||||
violations.append(Violation(
|
||||
service="Cookie-Banner",
|
||||
severity="HIGH",
|
||||
text=f"Consent verweist auf Datenschutzerklaerung von {link_domain} "
|
||||
f"statt auf eigene DSE. Der Verantwortliche muss eine eigene "
|
||||
f"Datenschutzerklaerung bereitstellen (Art. 13 DSGVO). "
|
||||
f"Ein Verweis auf die DSE eines Drittanbieters/Auftragsverarbeiters "
|
||||
f"reicht nicht aus.",
|
||||
legal_ref="Art. 13 DSGVO (Informationspflichten), Art. 26 DSGVO (gemeinsame Verantwortlichkeit)",
|
||||
))
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Check 10: Dark-Pattern language — "muessen/erforderlich" for non-essential
|
||||
dark_pattern_phrases = [
|
||||
("muessen heruntergeladen werden", "heruntergeladen"),
|
||||
("muessen akzeptiert werden", "akzeptiert"),
|
||||
("muessen gesetzt werden", "gesetzt"),
|
||||
("cookies sind erforderlich", "erforderlich"),
|
||||
("cookies are required", "required"),
|
||||
("must be downloaded", "downloaded"),
|
||||
("must be accepted", "accepted"),
|
||||
("sind zwingend notwendig", "zwingend"),
|
||||
("unbedingt erforderlich", "unbedingt"),
|
||||
]
|
||||
for phrase, keyword in dark_pattern_phrases:
|
||||
if phrase in banner_lower:
|
||||
# Check if context is about non-essential cookies
|
||||
context_essential = any(kw in banner_lower for kw in [
|
||||
"technisch notwendig", "essential", "strictly necessary",
|
||||
"unbedingt erforderlich fuer den betrieb",
|
||||
])
|
||||
if not context_essential:
|
||||
violations.append(Violation(
|
||||
service="Cookie-Banner",
|
||||
severity="MEDIUM",
|
||||
text=f"Dark-Pattern-Sprache: '{phrase}' suggeriert technische "
|
||||
f"Notwendigkeit fuer nicht-essentielle Cookies. Nutzer koennten "
|
||||
f"den Eindruck gewinnen, eine Zustimmung sei alternativlos.",
|
||||
legal_ref="EDPB Guidelines 05/2020 Rn. 70, Art. 7(4) DSGVO (freiwillige Einwilligung)",
|
||||
))
|
||||
break
|
||||
|
||||
# Check 11: Modal dismiss = consent (click outside closes + sets consent)
|
||||
try:
|
||||
dismiss_is_consent = await page.evaluate("""
|
||||
() => {
|
||||
const dialog = document.querySelector(
|
||||
'#CybotCookiebotDialog, #onetrust-banner-sdk, #didomi-host, '
|
||||
+ '#usercentrics-root, .cky-consent-container, #cmpbox, '
|
||||
+ '[class*="cookie-banner"], [class*="consent-banner"], [role="dialog"]'
|
||||
);
|
||||
if (!dialog) return { hasOverlay: false, overlayCloses: false };
|
||||
// Check for overlay/backdrop elements
|
||||
const overlays = document.querySelectorAll(
|
||||
'.overlay, .backdrop, .modal-backdrop, '
|
||||
+ '[class*="overlay"], [class*="backdrop"], '
|
||||
+ '[class*="dimmer"], .cdk-overlay-backdrop'
|
||||
);
|
||||
let overlayHasClick = false;
|
||||
for (const ov of overlays) {
|
||||
const listeners = getEventListeners ? getEventListeners(ov) : {};
|
||||
if (listeners.click && listeners.click.length > 0) {
|
||||
overlayHasClick = true;
|
||||
}
|
||||
}
|
||||
// Alternative: check if dialog is non-modal (no inert on background)
|
||||
const isModal = dialog.getAttribute('aria-modal') === 'true' ||
|
||||
dialog.hasAttribute('open');
|
||||
return {
|
||||
hasOverlay: overlays.length > 0,
|
||||
overlayHasClick: overlayHasClick,
|
||||
isModal: isModal,
|
||||
dialogRole: dialog.getAttribute('role'),
|
||||
};
|
||||
}
|
||||
""")
|
||||
if dismiss_is_consent and dismiss_is_consent.get("hasOverlay") and not dismiss_is_consent.get("isModal"):
|
||||
violations.append(Violation(
|
||||
service="Cookie-Banner",
|
||||
severity="HIGH",
|
||||
text="Consent-Dialog ist nicht modal — Klick auf den Hintergrund kann "
|
||||
"das Fenster schliessen und als Einwilligung gewertet werden. "
|
||||
"Ein versehentlicher Klick ist keine aktive Einwilligung. "
|
||||
"Der Dialog muss modal sein (nur explizite Buttons als Optionen).",
|
||||
legal_ref="EuGH C-673/17 Planet49 (aktive Handlung), Art. 7(1) DSGVO (Nachweispflicht), "
|
||||
"EDPB Guidelines 05/2020 Rn. 77 (silence/inactivity ≠ consent)",
|
||||
))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Banner text check failed: %s", e)
|
||||
|
||||
return {"violations": violations, "has_impressum": has_impressum, "has_dse": has_dse}
|
||||
@@ -0,0 +1,213 @@
|
||||
"""
|
||||
Consent Scanner — Playwright-based 3-phase cookie consent test.
|
||||
|
||||
Phase A: Before consent (first visit)
|
||||
Phase B: After rejecting consent
|
||||
Phase C: After accepting consent
|
||||
"""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from playwright.async_api import async_playwright, Page, BrowserContext
|
||||
|
||||
from services.banner_detector import detect_banner, click_button, BannerInfo
|
||||
from services.script_analyzer import (
|
||||
classify_scripts, find_tracking_services,
|
||||
find_violations_before_consent, find_violations_after_reject, Violation,
|
||||
)
|
||||
from services.banner_text_checker import check_banner_text as _check_banner_text
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
USER_AGENT = (
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ConsentTestResult:
|
||||
banner_detected: bool = False
|
||||
banner_provider: str = ""
|
||||
# Phase A: Before consent
|
||||
before_scripts: list[str] = field(default_factory=list)
|
||||
before_cookies: list[str] = field(default_factory=list)
|
||||
before_tracking: list[str] = field(default_factory=list)
|
||||
before_violations: list[Violation] = field(default_factory=list)
|
||||
# Phase B: After reject
|
||||
reject_scripts: list[str] = field(default_factory=list)
|
||||
reject_cookies: list[str] = field(default_factory=list)
|
||||
reject_new_tracking: list[str] = field(default_factory=list)
|
||||
reject_violations: list[Violation] = field(default_factory=list)
|
||||
# Phase C: After accept
|
||||
accept_scripts: list[str] = field(default_factory=list)
|
||||
accept_cookies: list[str] = field(default_factory=list)
|
||||
accept_new_tracking: list[str] = field(default_factory=list)
|
||||
accept_undocumented: list[str] = field(default_factory=list)
|
||||
# Phase D-F: Per-category tests
|
||||
category_tests: list = field(default_factory=list) # list[CategoryTestResult]
|
||||
# Banner text checks
|
||||
banner_text_violations: list[Violation] = field(default_factory=list)
|
||||
banner_has_impressum_link: bool = False
|
||||
banner_has_dse_link: bool = False
|
||||
|
||||
|
||||
async def run_consent_test(url: str, wait_secs: int = 10) -> ConsentTestResult:
|
||||
"""Run 3-phase consent test on a URL."""
|
||||
result = ConsentTestResult()
|
||||
wait_ms = wait_secs * 1000
|
||||
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(
|
||||
headless=True,
|
||||
args=["--no-sandbox", "--disable-dev-shm-usage"],
|
||||
)
|
||||
|
||||
try:
|
||||
# ── Phase A: Before consent ──────────────────────────
|
||||
logger.info("Phase A: First visit (no interaction)")
|
||||
ctx_a = await browser.new_context(user_agent=USER_AGENT)
|
||||
page_a = await ctx_a.new_page()
|
||||
scripts_a = []
|
||||
page_a.on("request", lambda req: _collect_script(req, scripts_a))
|
||||
|
||||
await page_a.goto(url, wait_until="networkidle", timeout=30000)
|
||||
await page_a.wait_for_timeout(wait_ms)
|
||||
|
||||
result.before_scripts = _get_page_scripts(scripts_a)
|
||||
result.before_cookies = _get_cookie_names(await ctx_a.cookies())
|
||||
result.before_tracking = find_tracking_services(result.before_scripts)
|
||||
result.before_violations = find_violations_before_consent(result.before_scripts)
|
||||
|
||||
# Detect banner
|
||||
banner = await detect_banner(page_a)
|
||||
result.banner_detected = banner.detected
|
||||
result.banner_provider = banner.provider
|
||||
|
||||
# Check banner text for legal issues
|
||||
if banner.detected:
|
||||
banner_violations = await _check_banner_text(page_a)
|
||||
result.banner_text_violations = banner_violations["violations"]
|
||||
result.banner_has_impressum_link = banner_violations["has_impressum"]
|
||||
result.banner_has_dse_link = banner_violations["has_dse"]
|
||||
|
||||
await ctx_a.close()
|
||||
|
||||
if not banner.detected:
|
||||
logger.info("No consent banner detected — skipping Phase B/C")
|
||||
await browser.close()
|
||||
return result
|
||||
|
||||
# ── Phase B: After rejecting ─────────────────────────
|
||||
logger.info("Phase B: Reject consent (%s)", banner.provider)
|
||||
ctx_b = await browser.new_context(user_agent=USER_AGENT)
|
||||
page_b = await ctx_b.new_page()
|
||||
scripts_b = []
|
||||
page_b.on("request", lambda req: _collect_script(req, scripts_b))
|
||||
|
||||
await page_b.goto(url, wait_until="networkidle", timeout=30000)
|
||||
await page_b.wait_for_timeout(3000)
|
||||
|
||||
clicked = await click_button(page_b, banner.reject_selector)
|
||||
if clicked:
|
||||
logger.info("Reject button clicked, waiting %ds", wait_secs)
|
||||
await page_b.wait_for_timeout(wait_ms)
|
||||
else:
|
||||
logger.warning("Could not click reject button")
|
||||
|
||||
result.reject_scripts = _get_page_scripts(scripts_b)
|
||||
result.reject_cookies = _get_cookie_names(await ctx_b.cookies())
|
||||
reject_tracking = find_tracking_services(result.reject_scripts)
|
||||
result.reject_new_tracking = [t for t in reject_tracking if t not in result.before_tracking]
|
||||
result.reject_violations = find_violations_after_reject(
|
||||
result.before_scripts, result.reject_scripts,
|
||||
)
|
||||
|
||||
await ctx_b.close()
|
||||
|
||||
# ── Phase C: After accepting ─────────────────────────
|
||||
logger.info("Phase C: Accept consent (%s)", banner.provider)
|
||||
ctx_c = await browser.new_context(user_agent=USER_AGENT)
|
||||
page_c = await ctx_c.new_page()
|
||||
scripts_c = []
|
||||
page_c.on("request", lambda req: _collect_script(req, scripts_c))
|
||||
|
||||
await page_c.goto(url, wait_until="networkidle", timeout=30000)
|
||||
await page_c.wait_for_timeout(3000)
|
||||
|
||||
clicked = await click_button(page_c, banner.accept_selector)
|
||||
if clicked:
|
||||
logger.info("Accept button clicked, waiting %ds", wait_secs)
|
||||
await page_c.wait_for_timeout(wait_ms)
|
||||
else:
|
||||
logger.warning("Could not click accept button")
|
||||
|
||||
result.accept_scripts = _get_page_scripts(scripts_c)
|
||||
result.accept_cookies = _get_cookie_names(await ctx_c.cookies())
|
||||
accept_tracking = find_tracking_services(result.accept_scripts)
|
||||
result.accept_new_tracking = [t for t in accept_tracking if t not in result.before_tracking]
|
||||
|
||||
await ctx_c.close()
|
||||
|
||||
# ── Phase D-F: Per-category tests ────────────────────────
|
||||
try:
|
||||
from services.category_tester import detect_categories, test_single_category
|
||||
|
||||
ctx_cat = await browser.new_context(user_agent=USER_AGENT)
|
||||
page_cat = await ctx_cat.new_page()
|
||||
await page_cat.goto(url, wait_until="networkidle", timeout=20000)
|
||||
await page_cat.wait_for_timeout(2000)
|
||||
|
||||
categories = await detect_categories(page_cat, banner)
|
||||
await page_cat.close()
|
||||
|
||||
if categories:
|
||||
logger.info("Testing %d categories individually", len(categories))
|
||||
for cat in categories:
|
||||
cat_ctx = await browser.new_context(user_agent=USER_AGENT)
|
||||
cat_result = await test_single_category(cat_ctx, url, cat, banner, wait_ms)
|
||||
result.category_tests.append(cat_result)
|
||||
await cat_ctx.close()
|
||||
else:
|
||||
logger.info("No categories detected — skipping per-category tests")
|
||||
|
||||
await ctx_cat.close()
|
||||
except Exception as cat_err:
|
||||
logger.warning("Category tests failed (non-blocking): %s", cat_err)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Consent test failed: %s", e)
|
||||
finally:
|
||||
await browser.close()
|
||||
|
||||
logger.info(
|
||||
"Consent test complete: banner=%s, violations_before=%d, violations_reject=%d, categories=%d",
|
||||
result.banner_provider, len(result.before_violations), len(result.reject_violations),
|
||||
len(result.category_tests),
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def _collect_script(request, scripts: list[str]):
|
||||
"""Collect script request URLs."""
|
||||
if request.resource_type in ("script", "image", "xhr", "fetch"):
|
||||
scripts.append(request.url)
|
||||
|
||||
|
||||
def _get_page_scripts(collected: list[str]) -> list[str]:
|
||||
"""Deduplicate and filter script URLs."""
|
||||
seen = set()
|
||||
result = []
|
||||
for url in collected:
|
||||
domain = url.split("/")[2] if "/" in url and len(url.split("/")) > 2 else url
|
||||
if domain not in seen:
|
||||
seen.add(domain)
|
||||
result.append(url)
|
||||
return result[:50] # Cap at 50
|
||||
|
||||
|
||||
def _get_cookie_names(cookies: list[dict]) -> list[str]:
|
||||
"""Extract cookie names from Playwright cookie list."""
|
||||
return sorted(set(c.get("name", "") for c in cookies if c.get("name")))
|
||||
|
||||
Reference in New Issue
Block a user