7e426c31f1
cmp_extractor.py refactored to thin coordinator (123 LOC, was 223). Discovers all CMP modules via cmp_library/_registry.py:load_all() at import time. Restart consent-tester to pick up new modules. New cmp_library/ folder: - _registry.py: auto-discovers all modules with MATCHER + reconstruct() - epaas.py: BMW Group ePaaS (extracted from cmp_extractor) - onetrust.py: cdn.cookielaw.org Groups/Cookies schema - cookiebot.py: consent.cookiebot.com Categories schema - usercentrics.py: api.usercentrics.eu services schema - didomi.py: sdk.privacy-center.org notice + vendors + purposes - trustarc.py: consent.trustarc.com categories + vendors Each module: - MATCHER: re.Pattern matching the CMP JSON endpoint URL - reconstruct(d: dict) -> str: builds German Markdown cookie-policy text Phase E (self-improving) will write auto_*.py files into the same folder; _registry already picks those up via pkgutil.iter_modules.
47 lines
1.7 KiB
Python
47 lines
1.7 KiB
Python
"""Cookiebot (by Usercentrics A/S — separate product from Usercentrics CMP).
|
|
|
|
URLs (multiple shapes observed):
|
|
- consent.cookiebot.com/<id>/cc.js (JSONP-wrapped)
|
|
- consent.cookiebot.com/uc.js?... (JSONP)
|
|
- consent.cookiebot.com/<id>/cd.js (cookie declaration)
|
|
We accept any URL on consent.cookiebot.com that returns JSON-like data.
|
|
The capture pipeline JSON-decodes; if it's JSONP we'd need to strip the
|
|
callback wrapper. For now we match only direct JSON responses.
|
|
|
|
Schema (cookiedeclaration JSON):
|
|
Categories: list with name + cookies (each with name, vendor, expires)
|
|
"""
|
|
|
|
import re
|
|
|
|
MATCHER = re.compile(r"consent\.cookiebot\.com/.*\.(?:json|js)(?:\?|$)", re.I)
|
|
|
|
|
|
def reconstruct(d: dict) -> str:
|
|
parts: list[str] = ["# Cookie-Richtlinie (Cookiebot)"]
|
|
|
|
cats = d.get("Categories") or d.get("categories") or []
|
|
for cat in cats:
|
|
name = cat.get("Name") or cat.get("name") or ""
|
|
desc = cat.get("Description") or cat.get("description") or ""
|
|
parts.append("")
|
|
parts.append(f"## {name}")
|
|
if desc:
|
|
parts.append(desc)
|
|
cookies = cat.get("Cookies") or cat.get("cookies") or []
|
|
for c in cookies[:50]:
|
|
cn = c.get("Name") or c.get("name") or ""
|
|
vendor = c.get("Vendor") or c.get("vendor") or ""
|
|
expires = c.get("Expires") or c.get("expires") or ""
|
|
purpose = c.get("Purpose") or c.get("purpose") or ""
|
|
line = f"- {cn}"
|
|
if vendor:
|
|
line += f" ({vendor})"
|
|
if purpose:
|
|
line += f" — {purpose[:120]}"
|
|
if expires:
|
|
line += f" — Speicherdauer: {expires}"
|
|
parts.append(line)
|
|
|
|
return "\n".join(parts)
|