From 938f9a6c51dc63eed6ceb8f5284c937a6cf9a1db Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Sat, 16 May 2026 20:58:48 +0200 Subject: [PATCH] fix(cmp): tolerate variable URL segments in ePaaS policy pattern BMW ePaaS URLs use 3 segments between /policypage/ and .epaas.json: /epaas/prod/policypage///.epaas.json The old pattern only matched 2 segments. Switch to a tolerant pattern that matches any path before .epaas.json (anchored at .epaas.json end). --- consent-tester/services/cmp_extractor.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/consent-tester/services/cmp_extractor.py b/consent-tester/services/cmp_extractor.py index 8b5a19e9..8bc4c2e4 100644 --- a/consent-tester/services/cmp_extractor.py +++ b/consent-tester/services/cmp_extractor.py @@ -32,7 +32,9 @@ logger = logging.getLogger(__name__) # URL patterns that identify a CMP policy JSON. Order matters — first match wins. _MATCHERS: list[tuple[str, re.Pattern[str]]] = [ - ("epaas", re.compile(r"/epaas/prod/policypage/[^/]+/[^/]+\.epaas\.json", re.I)), + # BMW ePaaS: /epaas/prod/policypage///.epaas.json + # Use a tolerant pattern: any number of segments before .epaas.json + ("epaas", re.compile(r"/epaas/prod/policypage/.+\.epaas\.json(\?|$)", re.I)), ("onetrust", re.compile(r"cdn\.cookielaw\.org/consent/[^/]+/[^/]+\.json", re.I)), ]