diag(cmp): log skipped CMP candidates with top-keys for Phase 0
VW & andere unbekannte CMPs liefern 603-Wort-Bug: kein Named-Matcher greift, generische Heuristik filtert oder size_kb < 5 → cmp_cookie_text bleibt leer → Backend faellt auf 603-Wort DOM-Navigation zurueck. Neuer INFO-Log fuer jede JSON-Response >=3KB die als CMP-Kandidat ueberlebt, aber Heuristik ODER Size-Schwelle nicht passt. Top-Keys + URL + Size — beim naechsten VW-Run sofort sichtbar, welcher Endpoint ein Named-Pattern braucht. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -80,21 +80,33 @@ class CMPCapture:
|
|||||||
data = await _parse_json_response(response)
|
data = await _parse_json_response(response)
|
||||||
if data is None:
|
if data is None:
|
||||||
return
|
return
|
||||||
# Skip tiny payloads — real CMP cookie policies are ≥5KB.
|
|
||||||
# A 4KB JSON of cookie-shaped data is almost never the policy.
|
|
||||||
try:
|
try:
|
||||||
size_kb = len(json.dumps(data)) // 1024
|
size_kb = len(json.dumps(data)) // 1024
|
||||||
except Exception:
|
except Exception:
|
||||||
size_kb = 0
|
size_kb = 0
|
||||||
if size_kb < 5:
|
|
||||||
return
|
|
||||||
from services.cmp_heuristic import looks_like_cookie_policy
|
from services.cmp_heuristic import looks_like_cookie_policy
|
||||||
if looks_like_cookie_policy(data):
|
matched = looks_like_cookie_policy(data)
|
||||||
|
if matched and size_kb >= 5:
|
||||||
self.payloads.append(("_heuristic", data))
|
self.payloads.append(("_heuristic", data))
|
||||||
logger.info(
|
logger.info(
|
||||||
"CMP captured: _heuristic (%s, ~%dKB)",
|
"CMP captured: _heuristic (%s, ~%dKB)",
|
||||||
url[:120], size_kb,
|
url[:120], size_kb,
|
||||||
)
|
)
|
||||||
|
elif size_kb >= 3:
|
||||||
|
# Phase-0-Diagnose-Log: JSON-Response die als CMP-Kandidat
|
||||||
|
# ueberlebt hat, aber heuristic OR size-threshold abgelehnt
|
||||||
|
# wurde. Zeigt beim naechsten VW/BMW/... Run welche Endpoints
|
||||||
|
# uebersehen werden — schneller Pattern-Add ohne raten.
|
||||||
|
top_keys = []
|
||||||
|
if isinstance(data, dict):
|
||||||
|
top_keys = list(data.keys())[:8]
|
||||||
|
elif isinstance(data, list) and data and isinstance(data[0], dict):
|
||||||
|
top_keys = list(data[0].keys())[:8]
|
||||||
|
logger.info(
|
||||||
|
"CMP candidate skipped: url=%s size=%dKB heuristic=%s "
|
||||||
|
"top_keys=%s",
|
||||||
|
url[:120], size_kb, matched, top_keys,
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug("CMP listener error: %s", e)
|
logger.debug("CMP listener error: %s", e)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user