fix(vvt): correct ePaaS schema mapping + category-aware scoring
The first BMW VVT table rendered all 24 providers at 20% score because
the ePaaS extractor was reading the wrong field names. Actual schema is
nested: providers[].processings[].persistences[], NOT providers[] alone.
Correct ePaaS schema (verified against bmw.com/epaas/.../de_DE.epaas.json):
Provider: {id, name, description, processings[]}
Processing: {id, name, description, categoryId, optOutLink,
privacyPolicyLink, persistences[]}
Persistence: {id, name, domain, type, expiry, description}
Two structural changes:
1. One row per processing (not provider). BMW has 26 providers but ~91
processings spread across them (Adobe alone has ACMProcessing,
AdobeAnalytics, AdobeCampaign, AdobeTargetAnalytics, AdobeTargetPers.).
The cookie widget displays each processing separately — VVT now
mirrors that. Display name format: 'Provider Name — Processing Name'.
2. Read optOutLink/privacyPolicyLink from PROCESSING (where they live),
not provider. Persistences flatten to cookies[] with name + expiry +
description.
Plus category mapping:
advertising -> marketing
strictlyNecessary -> necessary
statistics -> statistics
functional -> functional
Category-aware scoring (cookie_link_validator.score_vendors):
- 'necessary' (technisch erforderliche, §25 Abs. 2 TDDDG): no opt-out
required, no country required. Score weight shifts to purpose +
cookie disclosure (essential cookies must list names + expiry).
- All other categories: opt-out URL still mandatory; missing opt-out
flags 'no_opt_out_url' and zeros that block of points.
Expected BMW result after this fix:
- ~91 rows (Adobe Analytics, Adform Retargeting, Akamai Infrastructure,
AWS, ..., plus ~60 strictlyNecessary processings)
- Marketing rows with present opt-out → ~75-90%
- Necessary rows with cookie+expiry → ~85-95%
- Rows missing fields → still flagged
This commit is contained in:
@@ -173,8 +173,16 @@ async def validate_vendor_urls(vendors: list[dict]) -> list[dict]:
|
|||||||
|
|
||||||
|
|
||||||
def score_vendors(vendors: list[dict]) -> list[dict]:
|
def score_vendors(vendors: list[dict]) -> list[dict]:
|
||||||
"""Compute per-vendor compliance score (0-100) and flags. Mutates."""
|
"""Compute per-vendor compliance score (0-100) and flags. Mutates.
|
||||||
|
|
||||||
|
Category-aware: 'necessary' (technisch erforderliche Cookies) do NOT
|
||||||
|
require an opt-out — §25 Abs. 2 TDDDG. Penalising them for that would
|
||||||
|
be wrong; instead we require precise purpose + cookie disclosure.
|
||||||
|
"""
|
||||||
for v in vendors:
|
for v in vendors:
|
||||||
|
is_necessary = (v.get("category") or "").lower() in (
|
||||||
|
"necessary", "strictlynecessary",
|
||||||
|
)
|
||||||
score = 0
|
score = 0
|
||||||
max_score = 0
|
max_score = 0
|
||||||
flags: list[str] = []
|
flags: list[str] = []
|
||||||
@@ -186,50 +194,56 @@ def score_vendors(vendors: list[dict]) -> list[dict]:
|
|||||||
else:
|
else:
|
||||||
flags.append("no_name")
|
flags.append("no_name")
|
||||||
|
|
||||||
# Purpose — 15
|
# Purpose — 20
|
||||||
max_score += 15
|
max_score += 20
|
||||||
if v.get("purpose"):
|
if v.get("purpose"):
|
||||||
score += 15
|
score += 20
|
||||||
else:
|
else:
|
||||||
flags.append("no_purpose")
|
flags.append("no_purpose")
|
||||||
|
|
||||||
# Country (3rd-country transfer relevance) — 10
|
# Country (3rd-country transfer relevance) — only relevant for
|
||||||
max_score += 10
|
# consent-based categories (otherwise irrelevant flag noise)
|
||||||
if v.get("country"):
|
if not is_necessary:
|
||||||
score += 10
|
max_score += 10
|
||||||
else:
|
if v.get("country"):
|
||||||
flags.append("no_country")
|
score += 10
|
||||||
|
else:
|
||||||
|
flags.append("no_country")
|
||||||
|
|
||||||
# Opt-Out URL present + reachable — 25
|
# Opt-Out URL — only for consent-based categories (§25 TDDDG)
|
||||||
max_score += 25
|
if not is_necessary:
|
||||||
if not v.get("opt_out_url"):
|
max_score += 25
|
||||||
flags.append("no_opt_out_url")
|
if not v.get("opt_out_url"):
|
||||||
elif v.get("opt_out_ok") is False:
|
flags.append("no_opt_out_url")
|
||||||
flags.append("broken_opt_out")
|
elif v.get("opt_out_ok") is False:
|
||||||
score += 5 # at least they tried
|
flags.append("broken_opt_out")
|
||||||
else:
|
score += 5
|
||||||
score += 25
|
else:
|
||||||
|
score += 25
|
||||||
|
|
||||||
# Privacy policy URL present + reachable — 15
|
# Privacy policy URL — relevant for all, but weight lower for necessary
|
||||||
max_score += 15
|
weight = 10 if is_necessary else 15
|
||||||
|
max_score += weight
|
||||||
if not v.get("privacy_policy_url"):
|
if not v.get("privacy_policy_url"):
|
||||||
flags.append("no_privacy_url")
|
flags.append("no_privacy_url")
|
||||||
elif v.get("privacy_ok") is False:
|
elif v.get("privacy_ok") is False:
|
||||||
flags.append("broken_privacy_url")
|
flags.append("broken_privacy_url")
|
||||||
score += 5
|
score += weight // 3
|
||||||
else:
|
else:
|
||||||
score += 15
|
score += weight
|
||||||
|
|
||||||
# Cookies disclosed (names + expiry) — 15
|
# Cookies disclosed (names + expiry) — higher weight for necessary
|
||||||
max_score += 15
|
# (since that's mostly what they offer in lieu of opt-out)
|
||||||
|
weight = 50 if is_necessary else 15
|
||||||
|
max_score += weight
|
||||||
cookies = v.get("cookies") or []
|
cookies = v.get("cookies") or []
|
||||||
if cookies:
|
if cookies:
|
||||||
named = sum(1 for c in cookies if c.get("name"))
|
named = sum(1 for c in cookies if c.get("name"))
|
||||||
with_expiry = sum(1 for c in cookies if c.get("expiry"))
|
with_expiry = sum(1 for c in cookies if c.get("expiry"))
|
||||||
if named >= 1 and with_expiry >= 1:
|
if named >= 1 and with_expiry >= 1:
|
||||||
score += 15
|
score += weight
|
||||||
elif named >= 1:
|
elif named >= 1:
|
||||||
score += 8
|
score += weight // 2
|
||||||
flags.append("cookies_no_expiry")
|
flags.append("cookies_no_expiry")
|
||||||
else:
|
else:
|
||||||
flags.append("cookies_no_names")
|
flags.append("cookies_no_names")
|
||||||
|
|||||||
@@ -90,41 +90,67 @@ def extract_vendors_from_payloads(payloads: list[dict]) -> list[dict]:
|
|||||||
|
|
||||||
# ── ePaaS (BMW Group) ───────────────────────────────────────────────
|
# ── ePaaS (BMW Group) ───────────────────────────────────────────────
|
||||||
|
|
||||||
|
# Maps ePaaS categoryId -> canonical category used by the VVT scorer.
|
||||||
|
_EPAAS_CATEGORY_MAP = {
|
||||||
|
"advertising": "marketing",
|
||||||
|
"marketing": "marketing",
|
||||||
|
"strictlyNecessary": "necessary",
|
||||||
|
"necessary": "necessary",
|
||||||
|
"statistics": "statistics",
|
||||||
|
"functional": "functional",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def _extract_epaas(d: dict) -> list[dict]:
|
def _extract_epaas(d: dict) -> list[dict]:
|
||||||
|
"""Convert ePaaS payload into one row per *processing* (not provider).
|
||||||
|
|
||||||
|
ePaaS schema (BMW):
|
||||||
|
providers[].processings[].persistences[]
|
||||||
|
provider: {id, name, description}
|
||||||
|
processing: {id, name, description, categoryId, optOutLink,
|
||||||
|
privacyPolicyLink, persistences}
|
||||||
|
persistence: {id, name, domain, type, expiry, description}
|
||||||
|
|
||||||
|
Each processing is a separate displayable unit in the cookie widget
|
||||||
|
(Adobe Analytics, Adobe Campaign, Adobe Target Personalisation, …) —
|
||||||
|
matching the website layout one-to-one in the VVT table. Provider name
|
||||||
|
becomes the prefix so the data-controller entity is visible.
|
||||||
|
"""
|
||||||
out: list[dict] = []
|
out: list[dict] = []
|
||||||
providers = d.get("providers", []) or []
|
for provider in d.get("providers", []) or []:
|
||||||
cookies_by_provider: dict[str, list[dict]] = {}
|
provider_name = provider.get("name") or provider.get("id") or ""
|
||||||
|
provider_desc = _clean(provider.get("description"))
|
||||||
|
for processing in provider.get("processings", []) or []:
|
||||||
|
name = (processing.get("name") or processing.get("id")
|
||||||
|
or provider_name)
|
||||||
|
purpose = _clean(processing.get("description")
|
||||||
|
or processing.get("name") or provider_desc)
|
||||||
|
cat_raw = processing.get("categoryId", "")
|
||||||
|
category = _EPAAS_CATEGORY_MAP.get(cat_raw, cat_raw or "")
|
||||||
|
|
||||||
for c in d.get("cookies", []) or []:
|
cookies: list[dict] = []
|
||||||
pid = str(c.get("providerId") or c.get("provider") or c.get("vendor") or "")
|
for c in processing.get("persistences", []) or []:
|
||||||
if pid:
|
cookies.append({
|
||||||
cookies_by_provider.setdefault(pid, []).append({
|
"name": c.get("name") or c.get("id") or "",
|
||||||
"name": c.get("name") or c.get("id") or "",
|
"purpose": _clean(c.get("description")),
|
||||||
"purpose": _clean(c.get("purpose") or c.get("description")),
|
"expiry": _clean(c.get("expiry")),
|
||||||
"expiry": _clean(c.get("expiry") or c.get("retention") or c.get("persistence")),
|
"is_third_party": True,
|
||||||
"is_third_party": bool(c.get("isThirdParty") or c.get("third_party")),
|
})
|
||||||
|
|
||||||
|
display_name = (f"{provider_name} — {name}"
|
||||||
|
if name and name != provider_name
|
||||||
|
else (provider_name or name))
|
||||||
|
out.append({
|
||||||
|
"name": display_name,
|
||||||
|
"country": "", # ePaaS doesn't surface vendor country
|
||||||
|
"purpose": purpose,
|
||||||
|
"category": category,
|
||||||
|
"opt_out_url": (processing.get("optOutLink") or "").strip(),
|
||||||
|
"privacy_policy_url": (processing.get("privacyPolicyLink")
|
||||||
|
or "").strip(),
|
||||||
|
"persistence": "",
|
||||||
|
"cookies": cookies,
|
||||||
})
|
})
|
||||||
|
|
||||||
for p in providers:
|
|
||||||
pid = str(p.get("id") or p.get("vendorId") or p.get("name") or "")
|
|
||||||
cookies = cookies_by_provider.get(pid, []) or [{
|
|
||||||
"name": c.get("name", ""),
|
|
||||||
"purpose": _clean(c.get("purpose")),
|
|
||||||
"expiry": _clean(c.get("expiry") or c.get("persistence")),
|
|
||||||
"is_third_party": True,
|
|
||||||
} for c in (p.get("cookies", []) or [])]
|
|
||||||
out.append({
|
|
||||||
"name": p.get("name") or pid or "",
|
|
||||||
"country": (p.get("country") or "").strip(),
|
|
||||||
"purpose": _clean(p.get("purpose")),
|
|
||||||
"category": (p.get("category") or "").strip(),
|
|
||||||
"opt_out_url": (p.get("optOutUrl") or p.get("optoutUrl")
|
|
||||||
or p.get("opt_out_url") or "").strip(),
|
|
||||||
"privacy_policy_url": (p.get("policyUrl") or p.get("policy_url")
|
|
||||||
or p.get("privacyPolicyUrl") or "").strip(),
|
|
||||||
"persistence": _clean(p.get("persistencePurposeDescription")),
|
|
||||||
"cookies": cookies,
|
|
||||||
})
|
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user