feat(consent-tester): Phase B — named CMP library + plugin architecture
cmp_extractor.py refactored to thin coordinator (123 LOC, was 223). Discovers all CMP modules via cmp_library/_registry.py:load_all() at import time. Restart consent-tester to pick up new modules. New cmp_library/ folder: - _registry.py: auto-discovers all modules with MATCHER + reconstruct() - epaas.py: BMW Group ePaaS (extracted from cmp_extractor) - onetrust.py: cdn.cookielaw.org Groups/Cookies schema - cookiebot.py: consent.cookiebot.com Categories schema - usercentrics.py: api.usercentrics.eu services schema - didomi.py: sdk.privacy-center.org notice + vendors + purposes - trustarc.py: consent.trustarc.com categories + vendors Each module: - MATCHER: re.Pattern matching the CMP JSON endpoint URL - reconstruct(d: dict) -> str: builds German Markdown cookie-policy text Phase E (self-improving) will write auto_*.py files into the same folder; _registry already picks those up via pkgutil.iter_modules.
This commit is contained in:
@@ -0,0 +1,69 @@
|
||||
"""BMW Group ePaaS (Enterprise Privacy as a Service).
|
||||
|
||||
URL: /epaas/prod/policypage/<tenant>/<config>/<locale>.epaas.json
|
||||
Schema: policyPageMetadata + categories + providers
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
MATCHER = re.compile(r"/epaas/prod/policypage/.+\.epaas\.json(\?|$)", re.I)
|
||||
|
||||
_TAG_RE = re.compile(r"<[^>]+>")
|
||||
_WS_RE = re.compile(r"\s+")
|
||||
|
||||
|
||||
def _clean(text: str) -> str:
|
||||
no_tags = _TAG_RE.sub(" ", text)
|
||||
no_tags = (no_tags
|
||||
.replace(" ", " ").replace("&", "&")
|
||||
.replace("<", "<").replace(">", ">")
|
||||
.replace(""", '"').replace("'", "'"))
|
||||
return _WS_RE.sub(" ", no_tags).strip()
|
||||
|
||||
|
||||
def reconstruct(d: dict) -> str:
|
||||
meta = d.get("policyPageMetadata", {}) or {}
|
||||
parts: list[str] = ["# Cookie-Richtlinie"]
|
||||
|
||||
for key in ("heading", "subHeading", "prologue", "dataController", "epilogue"):
|
||||
val = meta.get(key)
|
||||
if val:
|
||||
parts.append("")
|
||||
parts.append(_clean(str(val)))
|
||||
|
||||
cats = d.get("categories", []) or []
|
||||
if cats:
|
||||
parts.append("")
|
||||
parts.append("## Cookie-Kategorien")
|
||||
for c in cats:
|
||||
name = c.get("name") or c.get("id") or ""
|
||||
desc = c.get("description") or c.get("descriptionHtml") or ""
|
||||
parts.append("")
|
||||
parts.append(f"### {name}")
|
||||
parts.append(_clean(str(desc)))
|
||||
|
||||
providers = d.get("providers", []) or []
|
||||
if providers:
|
||||
parts.append("")
|
||||
parts.append(f"## Anbieter ({len(providers)})")
|
||||
for p in providers:
|
||||
name = p.get("name") or p.get("id") or ""
|
||||
purpose = (p.get("purpose") or "").strip()
|
||||
country = (p.get("country") or "").strip()
|
||||
persistence = (p.get("persistencePurposeDescription") or "").strip()
|
||||
line = f"- {name}"
|
||||
if purpose:
|
||||
line += f" — Zweck: {purpose}"
|
||||
if country:
|
||||
line += f" — Sitz: {country}"
|
||||
if persistence:
|
||||
line += f" — Speicherdauer: {persistence[:120]}"
|
||||
parts.append(line)
|
||||
|
||||
if meta.get("expiresAfter"):
|
||||
parts.append("")
|
||||
parts.append(f"Speicherdauer: {meta['expiresAfter']}")
|
||||
if meta.get("persistencePurposeText"):
|
||||
parts.append(_clean(str(meta["persistencePurposeText"])))
|
||||
|
||||
return "\n".join(parts)
|
||||
Reference in New Issue
Block a user