feat: 3 new banner legal checks (11 total) + extract banner_text_checker
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Successful in 46s
CI / validate-canonical-controls (push) Successful in 14s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 18s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m32s
CI / test-python-backend (push) Successful in 40s
CI / test-python-document-crawler (push) Successful in 25s
CI / test-python-dsms-gateway (push) Successful in 21s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Successful in 46s
CI / validate-canonical-controls (push) Successful in 14s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 18s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m32s
CI / test-python-backend (push) Successful in 40s
CI / test-python-document-crawler (push) Successful in 25s
CI / test-python-dsms-gateway (push) Successful in 21s
New checks (from EUIPO reference case): - Check 9: Third-party DSE link — detects when consent dialog links to external domain's privacy policy instead of own DSE (Art. 13 DSGVO) - Check 10: Dark-pattern language — detects "muessen/erforderlich" for non-essential cookies suggesting false technical necessity (EDPB Rn. 70) - Check 11: Non-modal dismiss = consent — detects when clicking outside dialog closes it (possibly treating as consent, Planet49 violation) Refactor: extracted _check_banner_text (375 LOC) from consent_scanner.py into services/banner_text_checker.py to keep both files under 500 LOC. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,399 @@
|
|||||||
|
"""
|
||||||
|
Banner text legal checks — extracted from consent_scanner.py.
|
||||||
|
|
||||||
|
11 checks for cookie banner legal compliance:
|
||||||
|
1. Impressum link accessible (§5 TMG)
|
||||||
|
2. DSE link in banner (Art. 13 DSGVO)
|
||||||
|
3. Wrong DSE consent wording (Art. 13 DSGVO)
|
||||||
|
4. Reject button visible (§25 TDDDG)
|
||||||
|
5. Pre-ticked checkboxes (Planet49)
|
||||||
|
6. Dark pattern button size (EDPB 05/2020)
|
||||||
|
7. Cookie wall (Phase B check)
|
||||||
|
8. Re-access to settings (Art. 7(3) DSGVO)
|
||||||
|
9. Third-party DSE link (Art. 13 DSGVO)
|
||||||
|
10. Dark-pattern language (EDPB 05/2020)
|
||||||
|
11. Non-modal dismiss = consent (Planet49)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from services.script_analyzer import Violation
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
async def check_banner_text(page) -> dict:
|
||||||
|
"""Check cookie banner text for legal issues.
|
||||||
|
|
||||||
|
1. Impressum link must be accessible even with banner overlay (§5 TMG)
|
||||||
|
2. DSE link must be accessible from banner
|
||||||
|
3. "Zustimmung zur Datenschutzerklärung" is WRONG — DSE is an information
|
||||||
|
obligation (Art. 13 DSGVO), not something users "agree" to
|
||||||
|
"""
|
||||||
|
violations = []
|
||||||
|
has_impressum = False
|
||||||
|
has_dse = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get banner text and links
|
||||||
|
banner_text = ""
|
||||||
|
banner_links = []
|
||||||
|
|
||||||
|
# Try common banner container selectors
|
||||||
|
for selector in [
|
||||||
|
"#CybotCookiebotDialog", "#onetrust-banner-sdk", "#didomi-host",
|
||||||
|
"#usercentrics-root", ".cky-consent-container", "#cmpbox",
|
||||||
|
'[class*="cookie-banner"]', '[class*="consent-banner"]',
|
||||||
|
'[class*="cookie-notice"]', '[role="dialog"]',
|
||||||
|
]:
|
||||||
|
try:
|
||||||
|
el = page.locator(selector).first
|
||||||
|
if await el.count() > 0:
|
||||||
|
banner_text = (await el.text_content() or "").strip()
|
||||||
|
# Get links inside banner
|
||||||
|
links = await el.locator("a[href]").all()
|
||||||
|
for link in links:
|
||||||
|
href = await link.get_attribute("href") or ""
|
||||||
|
text = (await link.text_content() or "").strip()
|
||||||
|
banner_links.append({"href": href.lower(), "text": text.lower()})
|
||||||
|
if banner_text:
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not banner_text:
|
||||||
|
return {"violations": violations, "has_impressum": False, "has_dse": False}
|
||||||
|
|
||||||
|
banner_lower = banner_text.lower()
|
||||||
|
|
||||||
|
# Check 1: Impressum link in or accessible through banner
|
||||||
|
has_impressum = any(
|
||||||
|
"impressum" in l["href"] or "impressum" in l["text"] or
|
||||||
|
"imprint" in l["href"] or "legal notice" in l["text"]
|
||||||
|
for l in banner_links
|
||||||
|
)
|
||||||
|
# Also check if impressum is visible behind/around banner
|
||||||
|
if not has_impressum:
|
||||||
|
try:
|
||||||
|
imp_visible = await page.locator('a[href*="impressum"], a[href*="imprint"]').first
|
||||||
|
if await imp_visible.count() > 0 and await imp_visible.is_visible():
|
||||||
|
has_impressum = True
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if not has_impressum:
|
||||||
|
violations.append(Violation(
|
||||||
|
service="Cookie-Banner",
|
||||||
|
severity="HIGH",
|
||||||
|
text="Impressum nicht aus dem Cookie-Banner erreichbar. "
|
||||||
|
"Bei ueberlagerndem Banner muss ein Impressum-Link im Banner vorhanden sein (§5 TMG).",
|
||||||
|
legal_ref="§5 TMG, LG Rostock Az. 3 O 22/19",
|
||||||
|
))
|
||||||
|
|
||||||
|
# Check 2: DSE link in banner
|
||||||
|
has_dse = any(
|
||||||
|
"datenschutz" in l["href"] or "datenschutz" in l["text"] or
|
||||||
|
"privacy" in l["href"] or "privacy" in l["text"] or
|
||||||
|
"dsgvo" in l["href"]
|
||||||
|
for l in banner_links
|
||||||
|
)
|
||||||
|
if not has_dse:
|
||||||
|
violations.append(Violation(
|
||||||
|
service="Cookie-Banner",
|
||||||
|
severity="MEDIUM",
|
||||||
|
text="Kein Link zur Datenschutzerklaerung im Cookie-Banner. "
|
||||||
|
"Nutzer sollten vor der Einwilligung die DSE einsehen koennen.",
|
||||||
|
legal_ref="Art. 13 DSGVO, ErwGr. 42 DSGVO (informierte Einwilligung)",
|
||||||
|
))
|
||||||
|
|
||||||
|
# Check 3: Wrong wording — "Zustimmung zur Datenschutzerklärung"
|
||||||
|
wrong_dse_consent_patterns = [
|
||||||
|
"stimme der datenschutz",
|
||||||
|
"stimme den datenschutz",
|
||||||
|
"akzeptiere die datenschutz",
|
||||||
|
"akzeptiere die privacy",
|
||||||
|
"agree to the privacy policy",
|
||||||
|
"accept the privacy",
|
||||||
|
"datenschutzerklaerung zustimmen",
|
||||||
|
"datenschutzrichtlinie akzeptieren",
|
||||||
|
"datenschutzrichtlinie zustimmen",
|
||||||
|
"i agree to the privacy",
|
||||||
|
"i accept the privacy",
|
||||||
|
]
|
||||||
|
for pattern in wrong_dse_consent_patterns:
|
||||||
|
if pattern in banner_lower:
|
||||||
|
violations.append(Violation(
|
||||||
|
service="Cookie-Banner",
|
||||||
|
severity="HIGH",
|
||||||
|
text=f"Falsche Formulierung im Banner: 'Zustimmung zur Datenschutzerklaerung'. "
|
||||||
|
f"Die DSE ist eine Informationspflicht (Art. 13 DSGVO) — man kann sie "
|
||||||
|
f"nur zur Kenntnis nehmen, nicht 'zustimmen'. "
|
||||||
|
f"Korrekt: 'Ich habe die Datenschutzinformationen zur Kenntnis genommen'.",
|
||||||
|
legal_ref="Art. 13 DSGVO, ErwGr. 42 (informierte Einwilligung ≠ Zustimmung zur DSE)",
|
||||||
|
))
|
||||||
|
break
|
||||||
|
|
||||||
|
# Check 4: Reject button visible (no hidden reject)
|
||||||
|
reject_texts = ["ablehnen", "reject", "nur notwendige", "alle ablehnen", "decline"]
|
||||||
|
has_visible_reject = any(t in banner_lower for t in reject_texts)
|
||||||
|
if not has_visible_reject:
|
||||||
|
violations.append(Violation(
|
||||||
|
service="Cookie-Banner",
|
||||||
|
severity="HIGH",
|
||||||
|
text="Kein sichtbarer 'Ablehnen'-Button im Banner erkannt. "
|
||||||
|
"Die Ablehnung muss ebenso einfach sein wie die Zustimmung.",
|
||||||
|
legal_ref="§25 Abs. 1 TDDDG, EDPB Guidelines 05/2020 (Consent)",
|
||||||
|
))
|
||||||
|
|
||||||
|
# Check 5: Pre-ticked checkboxes (EuGH Planet49)
|
||||||
|
try:
|
||||||
|
pre_checked = await page.evaluate("""
|
||||||
|
() => {
|
||||||
|
const banner = document.querySelector(
|
||||||
|
'#CybotCookiebotDialog, #onetrust-banner-sdk, #didomi-host, '
|
||||||
|
+ '#usercentrics-root, .cky-consent-container, #cmpbox, '
|
||||||
|
+ '[class*="cookie-banner"], [class*="consent-banner"], [role="dialog"]'
|
||||||
|
);
|
||||||
|
if (!banner) return [];
|
||||||
|
const checked = banner.querySelectorAll(
|
||||||
|
'input[type="checkbox"]:checked:not([disabled])'
|
||||||
|
);
|
||||||
|
return [...checked]
|
||||||
|
.filter(cb => {
|
||||||
|
const label = cb.closest('label')?.textContent || cb.getAttribute('aria-label') || '';
|
||||||
|
const isNecessary = /notwendig|necessary|essential|erforderlich/i.test(label);
|
||||||
|
return !isNecessary;
|
||||||
|
})
|
||||||
|
.map(cb => cb.closest('label')?.textContent?.trim() || cb.id || 'unknown');
|
||||||
|
}
|
||||||
|
""")
|
||||||
|
if pre_checked:
|
||||||
|
violations.append(Violation(
|
||||||
|
service="Cookie-Banner",
|
||||||
|
severity="HIGH",
|
||||||
|
text=f"Vorausgewaehlte Checkboxen im Banner: {', '.join(pre_checked[:3])}. "
|
||||||
|
f"Einwilligung muss durch aktive Handlung erfolgen — vorausgefuellte "
|
||||||
|
f"Checkboxen sind ungueltig.",
|
||||||
|
legal_ref="Art. 4(11) DSGVO, EuGH C-673/17 (Planet49)",
|
||||||
|
))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Check 6: Dark Pattern — button size/prominence comparison
|
||||||
|
try:
|
||||||
|
button_info = await page.evaluate("""
|
||||||
|
() => {
|
||||||
|
const banner = document.querySelector(
|
||||||
|
'#CybotCookiebotDialog, #onetrust-banner-sdk, #didomi-host, '
|
||||||
|
+ '#usercentrics-root, .cky-consent-container, #cmpbox, '
|
||||||
|
+ '[class*="cookie-banner"], [class*="consent-banner"], [role="dialog"]'
|
||||||
|
);
|
||||||
|
if (!banner) return null;
|
||||||
|
const buttons = [...banner.querySelectorAll('button, a[role="button"], [class*="btn"]')];
|
||||||
|
return buttons.slice(0, 6).map(b => {
|
||||||
|
const style = window.getComputedStyle(b);
|
||||||
|
const rect = b.getBoundingClientRect();
|
||||||
|
return {
|
||||||
|
text: b.textContent?.trim()?.substring(0, 40) || '',
|
||||||
|
width: rect.width,
|
||||||
|
height: rect.height,
|
||||||
|
area: rect.width * rect.height,
|
||||||
|
bgColor: style.backgroundColor,
|
||||||
|
fontSize: parseFloat(style.fontSize),
|
||||||
|
visible: rect.width > 0 && rect.height > 0,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
|
""")
|
||||||
|
if button_info and len(button_info) >= 2:
|
||||||
|
accept_btn = None
|
||||||
|
reject_btn = None
|
||||||
|
accept_kw = ["akzeptieren", "accept", "zustimmen", "agree", "einverstanden", "ok"]
|
||||||
|
reject_kw = ["ablehnen", "reject", "notwendige", "decline", "nein"]
|
||||||
|
|
||||||
|
for btn in button_info:
|
||||||
|
text_lower = btn["text"].lower()
|
||||||
|
if any(k in text_lower for k in accept_kw):
|
||||||
|
accept_btn = btn
|
||||||
|
elif any(k in text_lower for k in reject_kw):
|
||||||
|
reject_btn = btn
|
||||||
|
|
||||||
|
if accept_btn and reject_btn:
|
||||||
|
area_ratio = accept_btn["area"] / max(reject_btn["area"], 1)
|
||||||
|
if area_ratio > 2.5:
|
||||||
|
violations.append(Violation(
|
||||||
|
service="Cookie-Banner",
|
||||||
|
severity="MEDIUM",
|
||||||
|
text=f"Dark Pattern: 'Akzeptieren'-Button ist {area_ratio:.1f}x groesser als "
|
||||||
|
f"'Ablehnen'-Button. Beide Optionen muessen gleichwertig dargestellt werden.",
|
||||||
|
legal_ref="EDPB Guidelines 05/2020, §25 TDDDG, DSK Orientierungshilfe Telemedien",
|
||||||
|
))
|
||||||
|
size_ratio = accept_btn["fontSize"] / max(reject_btn["fontSize"], 1)
|
||||||
|
if size_ratio > 1.5:
|
||||||
|
violations.append(Violation(
|
||||||
|
service="Cookie-Banner",
|
||||||
|
severity="MEDIUM",
|
||||||
|
text=f"Dark Pattern: Schriftgroesse 'Akzeptieren' ({accept_btn['fontSize']:.0f}px) "
|
||||||
|
f"vs. 'Ablehnen' ({reject_btn['fontSize']:.0f}px). "
|
||||||
|
f"Unterschiedliche Schriftgroessen sind ein Dark Pattern.",
|
||||||
|
legal_ref="EDPB Guidelines 05/2020 (gleichwertige Darstellung)",
|
||||||
|
))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Check 7: Cookie Wall — does rejecting block the site?
|
||||||
|
# (This is checked in Phase B — if after reject the page is not navigable)
|
||||||
|
|
||||||
|
# Check 8: Re-access to settings (Art. 7(3) — revocation as easy as consent)
|
||||||
|
try:
|
||||||
|
settings_accessible = False
|
||||||
|
settings_selectors = [
|
||||||
|
'[class*="cookie-settings"]', '[class*="privacy-settings"]',
|
||||||
|
'a[href*="cookie"]', 'a[href*="datenschutz-einstellungen"]',
|
||||||
|
'[class*="consent-settings"]', '#ot-sdk-btn',
|
||||||
|
'.cky-btn-revisit', '#CybotCookiebotDialogBodyButtonDetails',
|
||||||
|
'[data-testid="uc-footer-link"]',
|
||||||
|
]
|
||||||
|
for sel in settings_selectors:
|
||||||
|
try:
|
||||||
|
if await page.locator(sel).count() > 0:
|
||||||
|
settings_accessible = True
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Also check footer for cookie settings link
|
||||||
|
if not settings_accessible:
|
||||||
|
footer_text = ""
|
||||||
|
try:
|
||||||
|
footer = page.locator("footer").first
|
||||||
|
if await footer.count() > 0:
|
||||||
|
footer_text = (await footer.text_content() or "").lower()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
if any(kw in footer_text for kw in ["cookie-einstellungen", "cookie settings",
|
||||||
|
"datenschutz-einstellungen", "privacy settings"]):
|
||||||
|
settings_accessible = True
|
||||||
|
|
||||||
|
if not settings_accessible:
|
||||||
|
violations.append(Violation(
|
||||||
|
service="Cookie-Banner",
|
||||||
|
severity="MEDIUM",
|
||||||
|
text="Kein erneuter Zugang zu Cookie-Einstellungen gefunden. "
|
||||||
|
"Der Widerruf der Einwilligung muss ebenso einfach sein wie "
|
||||||
|
"die Erteilung (Art. 7 Abs. 3 DSGVO).",
|
||||||
|
legal_ref="Art. 7 Abs. 3 DSGVO (Widerruf so einfach wie Einwilligung)",
|
||||||
|
))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Check 9: Third-party DSE link — consent links to external domain DSE
|
||||||
|
try:
|
||||||
|
page_domain = page.url.split("/")[2].replace("www.", "")
|
||||||
|
for link in banner_links:
|
||||||
|
href = link["href"]
|
||||||
|
if not href.startswith("http"):
|
||||||
|
continue
|
||||||
|
link_domain = href.split("/")[2].replace("www.", "") if len(href.split("/")) > 2 else ""
|
||||||
|
if not link_domain:
|
||||||
|
continue
|
||||||
|
is_dse_link = any(kw in link["text"] for kw in [
|
||||||
|
"datenschutz", "privacy", "dsgvo", "data protection",
|
||||||
|
])
|
||||||
|
if is_dse_link and link_domain != page_domain:
|
||||||
|
violations.append(Violation(
|
||||||
|
service="Cookie-Banner",
|
||||||
|
severity="HIGH",
|
||||||
|
text=f"Consent verweist auf Datenschutzerklaerung von {link_domain} "
|
||||||
|
f"statt auf eigene DSE. Der Verantwortliche muss eine eigene "
|
||||||
|
f"Datenschutzerklaerung bereitstellen (Art. 13 DSGVO). "
|
||||||
|
f"Ein Verweis auf die DSE eines Drittanbieters/Auftragsverarbeiters "
|
||||||
|
f"reicht nicht aus.",
|
||||||
|
legal_ref="Art. 13 DSGVO (Informationspflichten), Art. 26 DSGVO (gemeinsame Verantwortlichkeit)",
|
||||||
|
))
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Check 10: Dark-Pattern language — "muessen/erforderlich" for non-essential
|
||||||
|
dark_pattern_phrases = [
|
||||||
|
("muessen heruntergeladen werden", "heruntergeladen"),
|
||||||
|
("muessen akzeptiert werden", "akzeptiert"),
|
||||||
|
("muessen gesetzt werden", "gesetzt"),
|
||||||
|
("cookies sind erforderlich", "erforderlich"),
|
||||||
|
("cookies are required", "required"),
|
||||||
|
("must be downloaded", "downloaded"),
|
||||||
|
("must be accepted", "accepted"),
|
||||||
|
("sind zwingend notwendig", "zwingend"),
|
||||||
|
("unbedingt erforderlich", "unbedingt"),
|
||||||
|
]
|
||||||
|
for phrase, keyword in dark_pattern_phrases:
|
||||||
|
if phrase in banner_lower:
|
||||||
|
# Check if context is about non-essential cookies
|
||||||
|
context_essential = any(kw in banner_lower for kw in [
|
||||||
|
"technisch notwendig", "essential", "strictly necessary",
|
||||||
|
"unbedingt erforderlich fuer den betrieb",
|
||||||
|
])
|
||||||
|
if not context_essential:
|
||||||
|
violations.append(Violation(
|
||||||
|
service="Cookie-Banner",
|
||||||
|
severity="MEDIUM",
|
||||||
|
text=f"Dark-Pattern-Sprache: '{phrase}' suggeriert technische "
|
||||||
|
f"Notwendigkeit fuer nicht-essentielle Cookies. Nutzer koennten "
|
||||||
|
f"den Eindruck gewinnen, eine Zustimmung sei alternativlos.",
|
||||||
|
legal_ref="EDPB Guidelines 05/2020 Rn. 70, Art. 7(4) DSGVO (freiwillige Einwilligung)",
|
||||||
|
))
|
||||||
|
break
|
||||||
|
|
||||||
|
# Check 11: Modal dismiss = consent (click outside closes + sets consent)
|
||||||
|
try:
|
||||||
|
dismiss_is_consent = await page.evaluate("""
|
||||||
|
() => {
|
||||||
|
const dialog = document.querySelector(
|
||||||
|
'#CybotCookiebotDialog, #onetrust-banner-sdk, #didomi-host, '
|
||||||
|
+ '#usercentrics-root, .cky-consent-container, #cmpbox, '
|
||||||
|
+ '[class*="cookie-banner"], [class*="consent-banner"], [role="dialog"]'
|
||||||
|
);
|
||||||
|
if (!dialog) return { hasOverlay: false, overlayCloses: false };
|
||||||
|
// Check for overlay/backdrop elements
|
||||||
|
const overlays = document.querySelectorAll(
|
||||||
|
'.overlay, .backdrop, .modal-backdrop, '
|
||||||
|
+ '[class*="overlay"], [class*="backdrop"], '
|
||||||
|
+ '[class*="dimmer"], .cdk-overlay-backdrop'
|
||||||
|
);
|
||||||
|
let overlayHasClick = false;
|
||||||
|
for (const ov of overlays) {
|
||||||
|
const listeners = getEventListeners ? getEventListeners(ov) : {};
|
||||||
|
if (listeners.click && listeners.click.length > 0) {
|
||||||
|
overlayHasClick = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Alternative: check if dialog is non-modal (no inert on background)
|
||||||
|
const isModal = dialog.getAttribute('aria-modal') === 'true' ||
|
||||||
|
dialog.hasAttribute('open');
|
||||||
|
return {
|
||||||
|
hasOverlay: overlays.length > 0,
|
||||||
|
overlayHasClick: overlayHasClick,
|
||||||
|
isModal: isModal,
|
||||||
|
dialogRole: dialog.getAttribute('role'),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
""")
|
||||||
|
if dismiss_is_consent and dismiss_is_consent.get("hasOverlay") and not dismiss_is_consent.get("isModal"):
|
||||||
|
violations.append(Violation(
|
||||||
|
service="Cookie-Banner",
|
||||||
|
severity="HIGH",
|
||||||
|
text="Consent-Dialog ist nicht modal — Klick auf den Hintergrund kann "
|
||||||
|
"das Fenster schliessen und als Einwilligung gewertet werden. "
|
||||||
|
"Ein versehentlicher Klick ist keine aktive Einwilligung. "
|
||||||
|
"Der Dialog muss modal sein (nur explizite Buttons als Optionen).",
|
||||||
|
legal_ref="EuGH C-673/17 Planet49 (aktive Handlung), Art. 7(1) DSGVO (Nachweispflicht), "
|
||||||
|
"EDPB Guidelines 05/2020 Rn. 77 (silence/inactivity ≠ consent)",
|
||||||
|
))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Banner text check failed: %s", e)
|
||||||
|
|
||||||
|
return {"violations": violations, "has_impressum": has_impressum, "has_dse": has_dse}
|
||||||
@@ -0,0 +1,213 @@
|
|||||||
|
"""
|
||||||
|
Consent Scanner — Playwright-based 3-phase cookie consent test.
|
||||||
|
|
||||||
|
Phase A: Before consent (first visit)
|
||||||
|
Phase B: After rejecting consent
|
||||||
|
Phase C: After accepting consent
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
from playwright.async_api import async_playwright, Page, BrowserContext
|
||||||
|
|
||||||
|
from services.banner_detector import detect_banner, click_button, BannerInfo
|
||||||
|
from services.script_analyzer import (
|
||||||
|
classify_scripts, find_tracking_services,
|
||||||
|
find_violations_before_consent, find_violations_after_reject, Violation,
|
||||||
|
)
|
||||||
|
from services.banner_text_checker import check_banner_text as _check_banner_text
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
USER_AGENT = (
|
||||||
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
||||||
|
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ConsentTestResult:
|
||||||
|
banner_detected: bool = False
|
||||||
|
banner_provider: str = ""
|
||||||
|
# Phase A: Before consent
|
||||||
|
before_scripts: list[str] = field(default_factory=list)
|
||||||
|
before_cookies: list[str] = field(default_factory=list)
|
||||||
|
before_tracking: list[str] = field(default_factory=list)
|
||||||
|
before_violations: list[Violation] = field(default_factory=list)
|
||||||
|
# Phase B: After reject
|
||||||
|
reject_scripts: list[str] = field(default_factory=list)
|
||||||
|
reject_cookies: list[str] = field(default_factory=list)
|
||||||
|
reject_new_tracking: list[str] = field(default_factory=list)
|
||||||
|
reject_violations: list[Violation] = field(default_factory=list)
|
||||||
|
# Phase C: After accept
|
||||||
|
accept_scripts: list[str] = field(default_factory=list)
|
||||||
|
accept_cookies: list[str] = field(default_factory=list)
|
||||||
|
accept_new_tracking: list[str] = field(default_factory=list)
|
||||||
|
accept_undocumented: list[str] = field(default_factory=list)
|
||||||
|
# Phase D-F: Per-category tests
|
||||||
|
category_tests: list = field(default_factory=list) # list[CategoryTestResult]
|
||||||
|
# Banner text checks
|
||||||
|
banner_text_violations: list[Violation] = field(default_factory=list)
|
||||||
|
banner_has_impressum_link: bool = False
|
||||||
|
banner_has_dse_link: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
async def run_consent_test(url: str, wait_secs: int = 10) -> ConsentTestResult:
|
||||||
|
"""Run 3-phase consent test on a URL."""
|
||||||
|
result = ConsentTestResult()
|
||||||
|
wait_ms = wait_secs * 1000
|
||||||
|
|
||||||
|
async with async_playwright() as p:
|
||||||
|
browser = await p.chromium.launch(
|
||||||
|
headless=True,
|
||||||
|
args=["--no-sandbox", "--disable-dev-shm-usage"],
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# ── Phase A: Before consent ──────────────────────────
|
||||||
|
logger.info("Phase A: First visit (no interaction)")
|
||||||
|
ctx_a = await browser.new_context(user_agent=USER_AGENT)
|
||||||
|
page_a = await ctx_a.new_page()
|
||||||
|
scripts_a = []
|
||||||
|
page_a.on("request", lambda req: _collect_script(req, scripts_a))
|
||||||
|
|
||||||
|
await page_a.goto(url, wait_until="networkidle", timeout=30000)
|
||||||
|
await page_a.wait_for_timeout(wait_ms)
|
||||||
|
|
||||||
|
result.before_scripts = _get_page_scripts(scripts_a)
|
||||||
|
result.before_cookies = _get_cookie_names(await ctx_a.cookies())
|
||||||
|
result.before_tracking = find_tracking_services(result.before_scripts)
|
||||||
|
result.before_violations = find_violations_before_consent(result.before_scripts)
|
||||||
|
|
||||||
|
# Detect banner
|
||||||
|
banner = await detect_banner(page_a)
|
||||||
|
result.banner_detected = banner.detected
|
||||||
|
result.banner_provider = banner.provider
|
||||||
|
|
||||||
|
# Check banner text for legal issues
|
||||||
|
if banner.detected:
|
||||||
|
banner_violations = await _check_banner_text(page_a)
|
||||||
|
result.banner_text_violations = banner_violations["violations"]
|
||||||
|
result.banner_has_impressum_link = banner_violations["has_impressum"]
|
||||||
|
result.banner_has_dse_link = banner_violations["has_dse"]
|
||||||
|
|
||||||
|
await ctx_a.close()
|
||||||
|
|
||||||
|
if not banner.detected:
|
||||||
|
logger.info("No consent banner detected — skipping Phase B/C")
|
||||||
|
await browser.close()
|
||||||
|
return result
|
||||||
|
|
||||||
|
# ── Phase B: After rejecting ─────────────────────────
|
||||||
|
logger.info("Phase B: Reject consent (%s)", banner.provider)
|
||||||
|
ctx_b = await browser.new_context(user_agent=USER_AGENT)
|
||||||
|
page_b = await ctx_b.new_page()
|
||||||
|
scripts_b = []
|
||||||
|
page_b.on("request", lambda req: _collect_script(req, scripts_b))
|
||||||
|
|
||||||
|
await page_b.goto(url, wait_until="networkidle", timeout=30000)
|
||||||
|
await page_b.wait_for_timeout(3000)
|
||||||
|
|
||||||
|
clicked = await click_button(page_b, banner.reject_selector)
|
||||||
|
if clicked:
|
||||||
|
logger.info("Reject button clicked, waiting %ds", wait_secs)
|
||||||
|
await page_b.wait_for_timeout(wait_ms)
|
||||||
|
else:
|
||||||
|
logger.warning("Could not click reject button")
|
||||||
|
|
||||||
|
result.reject_scripts = _get_page_scripts(scripts_b)
|
||||||
|
result.reject_cookies = _get_cookie_names(await ctx_b.cookies())
|
||||||
|
reject_tracking = find_tracking_services(result.reject_scripts)
|
||||||
|
result.reject_new_tracking = [t for t in reject_tracking if t not in result.before_tracking]
|
||||||
|
result.reject_violations = find_violations_after_reject(
|
||||||
|
result.before_scripts, result.reject_scripts,
|
||||||
|
)
|
||||||
|
|
||||||
|
await ctx_b.close()
|
||||||
|
|
||||||
|
# ── Phase C: After accepting ─────────────────────────
|
||||||
|
logger.info("Phase C: Accept consent (%s)", banner.provider)
|
||||||
|
ctx_c = await browser.new_context(user_agent=USER_AGENT)
|
||||||
|
page_c = await ctx_c.new_page()
|
||||||
|
scripts_c = []
|
||||||
|
page_c.on("request", lambda req: _collect_script(req, scripts_c))
|
||||||
|
|
||||||
|
await page_c.goto(url, wait_until="networkidle", timeout=30000)
|
||||||
|
await page_c.wait_for_timeout(3000)
|
||||||
|
|
||||||
|
clicked = await click_button(page_c, banner.accept_selector)
|
||||||
|
if clicked:
|
||||||
|
logger.info("Accept button clicked, waiting %ds", wait_secs)
|
||||||
|
await page_c.wait_for_timeout(wait_ms)
|
||||||
|
else:
|
||||||
|
logger.warning("Could not click accept button")
|
||||||
|
|
||||||
|
result.accept_scripts = _get_page_scripts(scripts_c)
|
||||||
|
result.accept_cookies = _get_cookie_names(await ctx_c.cookies())
|
||||||
|
accept_tracking = find_tracking_services(result.accept_scripts)
|
||||||
|
result.accept_new_tracking = [t for t in accept_tracking if t not in result.before_tracking]
|
||||||
|
|
||||||
|
await ctx_c.close()
|
||||||
|
|
||||||
|
# ── Phase D-F: Per-category tests ────────────────────────
|
||||||
|
try:
|
||||||
|
from services.category_tester import detect_categories, test_single_category
|
||||||
|
|
||||||
|
ctx_cat = await browser.new_context(user_agent=USER_AGENT)
|
||||||
|
page_cat = await ctx_cat.new_page()
|
||||||
|
await page_cat.goto(url, wait_until="networkidle", timeout=20000)
|
||||||
|
await page_cat.wait_for_timeout(2000)
|
||||||
|
|
||||||
|
categories = await detect_categories(page_cat, banner)
|
||||||
|
await page_cat.close()
|
||||||
|
|
||||||
|
if categories:
|
||||||
|
logger.info("Testing %d categories individually", len(categories))
|
||||||
|
for cat in categories:
|
||||||
|
cat_ctx = await browser.new_context(user_agent=USER_AGENT)
|
||||||
|
cat_result = await test_single_category(cat_ctx, url, cat, banner, wait_ms)
|
||||||
|
result.category_tests.append(cat_result)
|
||||||
|
await cat_ctx.close()
|
||||||
|
else:
|
||||||
|
logger.info("No categories detected — skipping per-category tests")
|
||||||
|
|
||||||
|
await ctx_cat.close()
|
||||||
|
except Exception as cat_err:
|
||||||
|
logger.warning("Category tests failed (non-blocking): %s", cat_err)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Consent test failed: %s", e)
|
||||||
|
finally:
|
||||||
|
await browser.close()
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Consent test complete: banner=%s, violations_before=%d, violations_reject=%d, categories=%d",
|
||||||
|
result.banner_provider, len(result.before_violations), len(result.reject_violations),
|
||||||
|
len(result.category_tests),
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _collect_script(request, scripts: list[str]):
|
||||||
|
"""Collect script request URLs."""
|
||||||
|
if request.resource_type in ("script", "image", "xhr", "fetch"):
|
||||||
|
scripts.append(request.url)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_page_scripts(collected: list[str]) -> list[str]:
|
||||||
|
"""Deduplicate and filter script URLs."""
|
||||||
|
seen = set()
|
||||||
|
result = []
|
||||||
|
for url in collected:
|
||||||
|
domain = url.split("/")[2] if "/" in url and len(url.split("/")) > 2 else url
|
||||||
|
if domain not in seen:
|
||||||
|
seen.add(domain)
|
||||||
|
result.append(url)
|
||||||
|
return result[:50] # Cap at 50
|
||||||
|
|
||||||
|
|
||||||
|
def _get_cookie_names(cookies: list[dict]) -> list[str]:
|
||||||
|
"""Extract cookie names from Playwright cookie list."""
|
||||||
|
return sorted(set(c.get("name", "") for c in cookies if c.get("name")))
|
||||||
|
|
||||||
Reference in New Issue
Block a user