"""OneTrust Cookie Consent. URL: cdn.cookielaw.org/consent//.json OR cdn.cookielaw.org/consent//.json Schema: Groups[] with GroupName, GroupDescription, Cookies[] """ import re MATCHER = re.compile(r"cdn\.cookielaw\.org/consent/[^/]+/[^/]+\.json", re.I) _TAG_RE = re.compile(r"<[^>]+>") _WS_RE = re.compile(r"\s+") def _clean(text: str) -> str: no_tags = _TAG_RE.sub(" ", text) no_tags = no_tags.replace(" ", " ").replace("&", "&") return _WS_RE.sub(" ", no_tags).strip() def reconstruct(d: dict) -> str: parts: list[str] = ["# Cookie-Richtlinie (OneTrust)"] # Optional preamble fields for key in ("Description", "PolicyText", "PolicyDescription"): val = d.get(key) if val: parts.append("") parts.append(_clean(str(val))) groups = d.get("Groups") or d.get("groups") or [] for g in groups: name = g.get("GroupName") or g.get("name") or "" desc = g.get("GroupDescription") or g.get("description") or "" parts.append("") parts.append(f"## {name}") if desc: parts.append(_clean(str(desc))) cookies = g.get("Cookies") or g.get("cookies") or [] for c in cookies[:50]: cn = c.get("Name") or c.get("name") or "" cp = c.get("Provider") or c.get("provider") or "" cd = c.get("description") or c.get("Description") or "" ce = c.get("Length") or c.get("expires") or "" line = f"- {cn}" if cp: line += f" ({cp})" if cd: line += f" — {cd[:120]}" if ce: line += f" — Speicherdauer: {ce}" parts.append(line) return "\n".join(parts)