diff --git a/consent-tester/services/cmp_extractor.py b/consent-tester/services/cmp_extractor.py index d2ca2bcc..7f8ef0ad 100644 --- a/consent-tester/services/cmp_extractor.py +++ b/consent-tester/services/cmp_extractor.py @@ -71,18 +71,29 @@ class CMPCapture: "/api/config", "/beacon", "/track", "/analytics", "/fonts/", "/log/", "/heartbeat", "/.well-known/", "/intake/", "/collect", "/ping", "/metrics", + "/login", "/auth", "/user", "/session", "/cart", "/checkout", + "/search", "/recommendation", "/flyout", "/menu", "/nav", + "/translation", "/i18n", "/locale", "/feature-flag", )): return data = await _parse_json_response(response) if data is None: return + # Skip tiny payloads — real CMP cookie policies are ≥5KB. + # A 4KB JSON of cookie-shaped data is almost never the policy. + try: + size_kb = len(json.dumps(data)) // 1024 + except Exception: + size_kb = 0 + if size_kb < 5: + return from services.cmp_heuristic import looks_like_cookie_policy if looks_like_cookie_policy(data): self.payloads.append(("_heuristic", data)) logger.info( "CMP captured: _heuristic (%s, ~%dKB)", - url[:120], len(json.dumps(data)) // 1024, + url[:120], size_kb, ) except Exception as e: logger.debug("CMP listener error: %s", e) diff --git a/consent-tester/services/dsi_discovery.py b/consent-tester/services/dsi_discovery.py index b06db3fb..668c33e1 100644 --- a/consent-tester/services/dsi_discovery.py +++ b/consent-tester/services/dsi_discovery.py @@ -349,7 +349,13 @@ async def discover_dsi_documents( if cmp_capture.payloads: cmp_text = cmp_capture.reconstruct_cookie_policy() cmp_wc = len(cmp_text.split()) if cmp_text else 0 - if cmp_wc > 0 and ( + # Replace DOM with CMP only when CMP is *strictly larger* + # AND meets at least one of: DOM was very thin, CMP is + # substantial, or CMP is significantly longer than DOM. + # The strict-larger guard prevents a tiny heuristic match + # (e.g. an unrelated /api/data JSON) from clobbering a + # bigger DOM extraction. + if cmp_wc > self_wc and ( self_wc < 300 or cmp_wc >= 1000 or cmp_wc > self_wc * 1.5