9dfdaae8e4
load_big_library matchte nur EXAKT → nur ~27% der BMW-Cookies trafen die Open-Cookie-DB, weil Per-Instanz-Suffixe abweichen (_ga_GTM-XYZ, AMCVS_###@ AdobeOrg, _pk_id.5.7d8). Jetzt: Library einmal laden, Namen entwildcarden, über _candidate_keys (exact + Präfix an Trennzeichen, Mindestlänge 3 gegen Über-Match) matchen. Reuse der bewährten _strip_wildcards-Logik. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
237 lines
9.4 KiB
Python
237 lines
9.4 KiB
Python
"""Pro-Cookie-Library-Abgleich: deklariert vs. cookie_knowledge_db."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from compliance.services.cookie_library_check import (
|
|
_candidate_keys,
|
|
_match_lib,
|
|
analyze_cookies,
|
|
)
|
|
|
|
|
|
def test_candidate_keys_strips_runtime_suffix():
|
|
assert "_ga" in _candidate_keys("_ga_GTM-ABC123")
|
|
assert "amcvs" in _candidate_keys("AMCVS_1234@AdobeOrg")
|
|
assert "_pk_id" in _candidate_keys("_pk_id.5.7d8f")
|
|
|
|
|
|
def test_match_lib_prefix_and_exact():
|
|
lib = {"_ga": {"actual_category": "statistics"},
|
|
"phpsessid": {"actual_category": "essential"}}
|
|
assert _match_lib("_ga_GTM-XYZ", lib)["actual_category"] == "statistics"
|
|
assert _match_lib("PHPSESSID".lower(), lib)["actual_category"] == "essential"
|
|
assert _match_lib("totally_unknown_xyz", lib) is None
|
|
# kurze generische Basis darf NICHT über-matchen
|
|
assert _match_lib("id_charger", {"id": {"x": 1}}) is None
|
|
|
|
|
|
def test_tracker_declared_necessary_is_high_finding():
|
|
# _ga ist laut Library technical_necessity=none, reid=high.
|
|
vendors = [{
|
|
"name": "Salesforce", "category": "necessary",
|
|
"cookies": [{"name": "_ga", "purpose": "Funktionsverbesserung"}],
|
|
}]
|
|
out = analyze_cookies(vendors)
|
|
assert out["summary"]["in_library"] == 1
|
|
f = out["findings"][0]
|
|
assert f["type"] == "tracker_as_necessary"
|
|
assert f["severity"] == "HIGH"
|
|
assert "§ 25" in f["remediation"]
|
|
assert f["library_purpose"] # exact_purpose aus Library
|
|
|
|
|
|
def test_missing_purpose_when_library_knows_it():
|
|
vendors = [{
|
|
"name": "X", "category": "marketing",
|
|
"cookies": [{"name": "_ga", "purpose": ""}],
|
|
}]
|
|
out = analyze_cookies(vendors)
|
|
f = out["findings"][0]
|
|
assert f["type"] == "missing_purpose"
|
|
assert f["severity"] == "MEDIUM"
|
|
assert f["library_purpose"]
|
|
|
|
|
|
def test_unknown_cookie_no_finding():
|
|
vendors = [{
|
|
"name": "Y", "category": "necessary",
|
|
"cookies": [{"name": "completely_unknown_xyz_123", "purpose": ""}],
|
|
}]
|
|
out = analyze_cookies(vendors)
|
|
assert out["summary"]["checked"] == 1
|
|
assert out["summary"]["in_library"] == 0
|
|
assert out["findings"] == []
|
|
|
|
|
|
def _types(out):
|
|
return {f["type"] for f in out["findings"]}
|
|
|
|
|
|
def test_third_country_and_eu_alternative_for_us_tracker():
|
|
# _ga: US-Vendor + EU-Alternative Matomo in der Library.
|
|
out = analyze_cookies([{
|
|
"name": "Google", "category": "marketing",
|
|
"cookies": [{"name": "_ga", "purpose": "Statistik", "expiry": "2 Jahre"}],
|
|
}])
|
|
t = _types(out)
|
|
assert "third_country" in t
|
|
assert "eu_alternative" in t
|
|
|
|
|
|
def test_session_cookie_unknown_country_no_third_country():
|
|
# PHPSESSID: rich-DB vendor_country 'N/A' → KEIN Drittland (war False Positive,
|
|
# weil 'N/A' nicht im EWR-Set steht). First-Party-Session-Cookie.
|
|
out = analyze_cookies([{
|
|
"name": "BMW AG — Infrastructure Basic", "category": "necessary",
|
|
"cookies": [{"name": "PHPSESSID", "purpose": "Session", "expiry": "Session"}],
|
|
}])
|
|
assert not [f for f in out["findings"] if f["type"] == "third_country"]
|
|
|
|
|
|
def test_missing_opt_out_for_marketing_vendor():
|
|
out = analyze_cookies([{
|
|
"name": "AdVendor", "category": "marketing", "opt_out_url": "",
|
|
"cookies": [{"name": "track1", "purpose": "ads", "expiry": "1 Jahr"}],
|
|
}])
|
|
mo = [f for f in out["findings"] if f["type"] == "missing_opt_out"]
|
|
assert len(mo) == 1
|
|
assert mo[0]["kind"] == "finding"
|
|
assert "Widerspruch" in mo[0]["remediation"] or "Opt-Out" in mo[0]["remediation"]
|
|
|
|
|
|
def test_no_missing_opt_out_when_url_present_or_necessary():
|
|
# Mit Opt-Out-URL → kein Finding; notwendige Kategorie → ebenfalls keins.
|
|
out = analyze_cookies([
|
|
{"name": "A", "category": "marketing", "opt_out_url": "https://x/opt",
|
|
"cookies": [{"name": "t", "purpose": "ads", "expiry": "1 Jahr"}]},
|
|
{"name": "B", "category": "necessary", "opt_out_url": "",
|
|
"cookies": [{"name": "sess", "purpose": "x", "expiry": "Session"}]},
|
|
])
|
|
assert not [f for f in out["findings"] if f["type"] == "missing_opt_out"]
|
|
|
|
|
|
def test_kind_splits_findings_from_hinweise():
|
|
# third_country/eu_alternative = Hinweis (advisory); Rest = Finding.
|
|
out = analyze_cookies([{
|
|
"name": "Google", "category": "necessary",
|
|
"cookies": [{"name": "_ga", "purpose": "", "expiry": "2 Jahre"}],
|
|
}])
|
|
by = {f["type"]: f["kind"] for f in out["findings"]}
|
|
assert by.get("third_country") == "hinweis"
|
|
assert by.get("eu_alternative") == "hinweis"
|
|
assert by.get("tracker_as_necessary") == "finding"
|
|
# Drittland-Wording: neutral, pro Verarbeiter, keine "in DSE benennen"-Befehle.
|
|
tc = next(f for f in out["findings"] if f["type"] == "third_country")
|
|
assert "pro Verarbeiter" in tc["remediation"]
|
|
assert "benennen" not in tc["remediation"]
|
|
|
|
|
|
def test_third_country_deduped_per_vendor():
|
|
out = analyze_cookies([{
|
|
"name": "Google", "category": "marketing",
|
|
"cookies": [
|
|
{"name": "_ga", "purpose": "x", "expiry": "2 Jahre"},
|
|
{"name": "_gid", "purpose": "x", "expiry": "1 Tag"},
|
|
],
|
|
}])
|
|
assert sum(1 for f in out["findings"] if f["type"] == "third_country") == 1
|
|
|
|
|
|
def test_excessive_lifetime():
|
|
# _gid: typische Laufzeit 24 Stunden; deklariert 2 Jahre.
|
|
out = analyze_cookies([{
|
|
"name": "Google", "category": "marketing",
|
|
"cookies": [{"name": "_gid", "purpose": "x", "expiry": "2 Jahre"}],
|
|
}])
|
|
el = [f for f in out["findings"] if f["type"] == "excessive_lifetime"]
|
|
assert el and "Art. 5" in el[0]["remediation"]
|
|
|
|
|
|
def test_findings_carry_control_and_legal_basis():
|
|
# A: jeder Befund traegt control_id + Rechtsgrundlage (auditfest).
|
|
out = analyze_cookies([{
|
|
"name": "Google", "category": "necessary",
|
|
"cookies": [{"name": "_ga", "purpose": "x",
|
|
"expiry": "Wird solange gespeichert, bis es deaktiviert wird."}],
|
|
}])
|
|
assert out["findings"], "es sollte Befunde geben"
|
|
for f in out["findings"]:
|
|
assert "control" in f
|
|
vd = next(f for f in out["findings"] if f["type"] == "vague_duration")
|
|
assert vd["control"]["control_id"] == "AUTH-2051-A03"
|
|
assert "Art. 5" in vd["control"]["article"]
|
|
|
|
|
|
def test_vague_duration_flagged_concrete_ok():
|
|
# User-Beispiel Salesforce: "bis der Nutzer es deaktiviert" = vage.
|
|
out = analyze_cookies([{
|
|
"name": "Salesforce", "category": "necessary",
|
|
"cookies": [
|
|
{"name": "MUTEX_X", "purpose": "x",
|
|
"expiry": "Wird solange gespeichert, bis es durch den Nutzer in seinem Browser deaktiviert wird."},
|
|
{"name": "ok1", "purpose": "x", "expiry": "13 Monate"},
|
|
{"name": "sess", "purpose": "x",
|
|
"expiry": "Dieses Session Cookie wird beim Schließen des Browsers wieder gelöscht."},
|
|
],
|
|
}])
|
|
vd = [f for f in out["findings"] if f["type"] == "vague_duration"]
|
|
assert len(vd) == 1 # nur MUTEX_X; 13 Monate + Session ok
|
|
assert vd[0]["cookie"] == "MUTEX_X"
|
|
assert "Art. 5" in vd[0]["remediation"]
|
|
|
|
|
|
def test_missing_retention_vendor_without_cookies_or_duration():
|
|
# User-Beispiel Nayoki GmbH: als 'necessary' deklarierter Auftragsverarbeiter,
|
|
# KEINE Cookies gelistet, KEINE persistence → Speicherdauer/Löschfrist-Finding.
|
|
out = analyze_cookies([{
|
|
"name": "Nayoki GmbH — BMW Sport & Kultur Social Wall",
|
|
"category": "necessary", "persistence": "",
|
|
"purpose": "Verwaltung der Social Wall.",
|
|
"cookies": [],
|
|
}])
|
|
mr = [f for f in out["findings"] if f["type"] == "missing_retention"]
|
|
assert len(mr) == 1
|
|
assert "Nayoki" in mr[0]["vendor"]
|
|
assert "Löschfrist" in mr[0]["remediation"]
|
|
assert mr[0]["severity"] == "MEDIUM"
|
|
assert mr[0]["control"]["control_id"] == "AUTH-2051-A03"
|
|
assert "Art. 13 Abs. 2" in mr[0]["control"]["article"]
|
|
|
|
|
|
def test_no_missing_retention_when_vendor_has_cookies():
|
|
# Vendor MIT Cookies (konkrete Dauer) → kein missing_retention.
|
|
out = analyze_cookies([{
|
|
"name": "X", "category": "necessary", "persistence": "",
|
|
"cookies": [{"name": "sess", "purpose": "x", "expiry": "Session"}],
|
|
}])
|
|
assert not [f for f in out["findings"] if f["type"] == "missing_retention"]
|
|
|
|
|
|
def test_cookie_categories_exposes_actual_library_category():
|
|
# Für die Banner-Sicht: name_lower → tatsächliche Kategorie laut Library.
|
|
big = {"bmw_track_de": {
|
|
"actual_category": "marketing", "typical_max_age_seconds": 86400,
|
|
"purpose_de": "Tracking", "vendor_name": "BMW",
|
|
}}
|
|
out = analyze_cookies([{
|
|
"name": "BMW", "category": "necessary",
|
|
"cookies": [{"name": "bmw_track_de", "purpose": "x", "expiry": "1 Tag"}],
|
|
}], big)
|
|
assert out["cookie_categories"]["bmw_track_de"] == "marketing"
|
|
|
|
|
|
def test_big_library_covers_cookie_not_in_rich_db():
|
|
# Cookie nicht in der 35er rich-DB, aber in der grossen 2287er (big_lib).
|
|
big = {"bmw_track_de": {
|
|
"actual_category": "marketing", "typical_max_age_seconds": 86400,
|
|
"purpose_de": "Reichweiten-Tracking", "vendor_name": "BMW",
|
|
}}
|
|
out = analyze_cookies([{
|
|
"name": "BMW", "category": "necessary",
|
|
"cookies": [{"name": "bmw_track_de", "purpose": "", "expiry": "2 Jahre"}],
|
|
}], big)
|
|
assert out["summary"]["in_library"] == 1
|
|
t = {f["type"] for f in out["findings"]}
|
|
assert "tracker_as_necessary" in t # actual_category=marketing → Tracker
|
|
assert "excessive_lifetime" in t # 2 Jahre vs. 1 Tag
|