feat(audit): Cookie-Library-Fallback fuer VW-Pattern (kein bekanntes CMP)
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
CI / detect-changes (push) Successful in 10s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / validate-canonical-controls (push) Successful in 17s
CI / loc-budget (push) Failing after 17s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 41s
CI / test-python-document-crawler (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
CI / detect-changes (push) Successful in 10s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / validate-canonical-controls (push) Successful in 17s
CI / loc-budget (push) Failing after 17s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-backend (push) Successful in 41s
Wenn nach Standard-Extract + Phase-G + LLM-Cascade weiterhin < 3 cmp_vendors aber >= 5 Cookies im after_accept stehen (typisch: Custom-CMP wie VW 'cookiemgmt'), matcht der Fallback die Cookie-Namen gegen die compliance.cookie_library und rekonstruiert Vendor-Records aus den Library-Eintraegen. Hintergrund: VW Run de2a029e zeigt 4 Vendors trotz 28 after_accept-Cookies. cmp_payloads ist 0 (kein bekanntes IAB-Tool erkannt) und die hinterlegte Cookie-URL liefert 404. Die DSE ist mit 34k zwar substanziell, listet aber keine Vendor-Tabelle. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -727,6 +727,35 @@ async def _run_compliance_check(check_id: str, req: ComplianceCheckRequest):
|
||||
logger.info("P57: added %d new vendors from Phase G (total: %d)",
|
||||
added, len(cmp_vendors))
|
||||
|
||||
# Cookie-Library-Fallback (P52 Lite): wenn weiterhin wenige
|
||||
# Vendors aber viele after_accept-Cookies, aus Library auflösen.
|
||||
if banner_result and len(cmp_vendors) < 3:
|
||||
try:
|
||||
from compliance.services.cookie_to_vendor_fallback import (
|
||||
fallback_vendors_for_run,
|
||||
)
|
||||
from database import SessionLocal as _SLfb
|
||||
_fb_db = _SLfb()
|
||||
try:
|
||||
extra = fallback_vendors_for_run(
|
||||
_fb_db, banner_result, len(cmp_vendors),
|
||||
)
|
||||
if extra:
|
||||
existing_names = {(v.get("name") or "").strip().lower()
|
||||
for v in cmp_vendors}
|
||||
for v in extra:
|
||||
if v["name"].lower() in existing_names:
|
||||
continue
|
||||
cmp_vendors.append(v)
|
||||
logger.info(
|
||||
"Cookie-Library-Fallback: cmp_vendors %d -> %d",
|
||||
len(cmp_vendors) - len(extra), len(cmp_vendors),
|
||||
)
|
||||
finally:
|
||||
_fb_db.close()
|
||||
except Exception as e:
|
||||
logger.warning("Cookie-Library-Fallback skipped: %s", e)
|
||||
|
||||
# P50: enrich vendors with per-vendor detail-modal-extracts
|
||||
# (description, opt-out URL, privacy URL, cookies). Detail
|
||||
# comes from Phase G Info-button-click-through in /scan.
|
||||
|
||||
@@ -0,0 +1,100 @@
|
||||
"""
|
||||
Cookie-zu-Vendor-Fallback (P52 Lite).
|
||||
|
||||
Wenn weder cmp_payloads noch vendor_llm_extract Vendors lieferten,
|
||||
matchen wir die im after_accept gesehenen Cookies gegen die
|
||||
compliance.cookie_library und bauen Vendor-Records aus den Library-
|
||||
Eintraegen (cookie_name → vendor_name, actual_category).
|
||||
|
||||
Typisches Szenario: VW nutzt ein Custom-CMP (cookiemgmt-Wrapper),
|
||||
kein bekanntes IAB-Tool. cmp_payloads = leer, aber after_accept.cookies
|
||||
hat 28 Eintraege. Diese 28 Cookies sind in der Library = ~15-20 Vendors.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Iterable
|
||||
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _collect_cookie_names(banner_result: dict | None) -> set[str]:
|
||||
names: set[str] = set()
|
||||
if not isinstance(banner_result, dict):
|
||||
return names
|
||||
for ph in (banner_result.get("phases") or {}).values():
|
||||
if not isinstance(ph, dict):
|
||||
continue
|
||||
for ck in (ph.get("cookies") or []):
|
||||
if isinstance(ck, str):
|
||||
names.add(ck.strip())
|
||||
elif isinstance(ck, dict):
|
||||
n = (ck.get("name") or "").strip()
|
||||
if n:
|
||||
names.add(n)
|
||||
return {n for n in names if n and len(n) <= 120}
|
||||
|
||||
|
||||
def lookup_vendors_from_library(
|
||||
db: Session,
|
||||
cookie_names: Iterable[str],
|
||||
) -> list[dict]:
|
||||
"""Resolves cookie names to vendor records via cookie_library."""
|
||||
names = [n for n in cookie_names if n]
|
||||
if not names:
|
||||
return []
|
||||
rows = db.execute(text(
|
||||
"""
|
||||
SELECT cookie_name, actual_category, vendor_name
|
||||
FROM compliance.cookie_library
|
||||
WHERE LOWER(cookie_name) = ANY(:lc)
|
||||
"""
|
||||
), {"lc": [n.lower() for n in names]}).fetchall()
|
||||
by_vendor: dict[str, dict] = {}
|
||||
for cname, cat, vendor in rows:
|
||||
if not vendor:
|
||||
continue
|
||||
entry = by_vendor.setdefault(vendor, {
|
||||
"name": vendor,
|
||||
"country": "",
|
||||
"purpose": "",
|
||||
"category": cat or "",
|
||||
"opt_out_url": "",
|
||||
"privacy_policy_url": "",
|
||||
"persistence": "",
|
||||
"cookies": [],
|
||||
"source": "library_fallback",
|
||||
})
|
||||
entry["cookies"].append({
|
||||
"name": cname, "purpose": "", "expiry": "",
|
||||
"is_third_party": True,
|
||||
})
|
||||
return list(by_vendor.values())
|
||||
|
||||
|
||||
def fallback_vendors_for_run(
|
||||
db: Session,
|
||||
banner_result: dict | None,
|
||||
existing_vendor_count: int,
|
||||
) -> list[dict]:
|
||||
"""Returns extra vendor records to merge with the run's cmp_vendors.
|
||||
|
||||
Only fires when existing_vendor_count is suspiciously low (< 3) AND
|
||||
we have enough cookies to look up (>= 5). Otherwise skip.
|
||||
"""
|
||||
if existing_vendor_count >= 3:
|
||||
return []
|
||||
names = _collect_cookie_names(banner_result)
|
||||
if len(names) < 5:
|
||||
return []
|
||||
vendors = lookup_vendors_from_library(db, names)
|
||||
if vendors:
|
||||
logger.info(
|
||||
"Cookie-Library-Fallback: %d Vendors aus %d Cookies (vorher %d)",
|
||||
len(vendors), len(names), existing_vendor_count,
|
||||
)
|
||||
return vendors
|
||||
Reference in New Issue
Block a user