Files
breakpilot-compliance/backend-compliance/compliance/tests/test_cookie_library_check.py
T
Benjamin Admin 9dfdaae8e4 feat(cookie): präfix-bewusster Library-Match (Runtime-Suffixe)
load_big_library matchte nur EXAKT → nur ~27% der BMW-Cookies trafen die
Open-Cookie-DB, weil Per-Instanz-Suffixe abweichen (_ga_GTM-XYZ, AMCVS_###@
AdobeOrg, _pk_id.5.7d8). Jetzt: Library einmal laden, Namen entwildcarden,
über _candidate_keys (exact + Präfix an Trennzeichen, Mindestlänge 3 gegen
Über-Match) matchen. Reuse der bewährten _strip_wildcards-Logik.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-11 15:24:45 +02:00

237 lines
9.4 KiB
Python

"""Pro-Cookie-Library-Abgleich: deklariert vs. cookie_knowledge_db."""
from __future__ import annotations
from compliance.services.cookie_library_check import (
_candidate_keys,
_match_lib,
analyze_cookies,
)
def test_candidate_keys_strips_runtime_suffix():
assert "_ga" in _candidate_keys("_ga_GTM-ABC123")
assert "amcvs" in _candidate_keys("AMCVS_1234@AdobeOrg")
assert "_pk_id" in _candidate_keys("_pk_id.5.7d8f")
def test_match_lib_prefix_and_exact():
lib = {"_ga": {"actual_category": "statistics"},
"phpsessid": {"actual_category": "essential"}}
assert _match_lib("_ga_GTM-XYZ", lib)["actual_category"] == "statistics"
assert _match_lib("PHPSESSID".lower(), lib)["actual_category"] == "essential"
assert _match_lib("totally_unknown_xyz", lib) is None
# kurze generische Basis darf NICHT über-matchen
assert _match_lib("id_charger", {"id": {"x": 1}}) is None
def test_tracker_declared_necessary_is_high_finding():
# _ga ist laut Library technical_necessity=none, reid=high.
vendors = [{
"name": "Salesforce", "category": "necessary",
"cookies": [{"name": "_ga", "purpose": "Funktionsverbesserung"}],
}]
out = analyze_cookies(vendors)
assert out["summary"]["in_library"] == 1
f = out["findings"][0]
assert f["type"] == "tracker_as_necessary"
assert f["severity"] == "HIGH"
assert "§ 25" in f["remediation"]
assert f["library_purpose"] # exact_purpose aus Library
def test_missing_purpose_when_library_knows_it():
vendors = [{
"name": "X", "category": "marketing",
"cookies": [{"name": "_ga", "purpose": ""}],
}]
out = analyze_cookies(vendors)
f = out["findings"][0]
assert f["type"] == "missing_purpose"
assert f["severity"] == "MEDIUM"
assert f["library_purpose"]
def test_unknown_cookie_no_finding():
vendors = [{
"name": "Y", "category": "necessary",
"cookies": [{"name": "completely_unknown_xyz_123", "purpose": ""}],
}]
out = analyze_cookies(vendors)
assert out["summary"]["checked"] == 1
assert out["summary"]["in_library"] == 0
assert out["findings"] == []
def _types(out):
return {f["type"] for f in out["findings"]}
def test_third_country_and_eu_alternative_for_us_tracker():
# _ga: US-Vendor + EU-Alternative Matomo in der Library.
out = analyze_cookies([{
"name": "Google", "category": "marketing",
"cookies": [{"name": "_ga", "purpose": "Statistik", "expiry": "2 Jahre"}],
}])
t = _types(out)
assert "third_country" in t
assert "eu_alternative" in t
def test_session_cookie_unknown_country_no_third_country():
# PHPSESSID: rich-DB vendor_country 'N/A' → KEIN Drittland (war False Positive,
# weil 'N/A' nicht im EWR-Set steht). First-Party-Session-Cookie.
out = analyze_cookies([{
"name": "BMW AG — Infrastructure Basic", "category": "necessary",
"cookies": [{"name": "PHPSESSID", "purpose": "Session", "expiry": "Session"}],
}])
assert not [f for f in out["findings"] if f["type"] == "third_country"]
def test_missing_opt_out_for_marketing_vendor():
out = analyze_cookies([{
"name": "AdVendor", "category": "marketing", "opt_out_url": "",
"cookies": [{"name": "track1", "purpose": "ads", "expiry": "1 Jahr"}],
}])
mo = [f for f in out["findings"] if f["type"] == "missing_opt_out"]
assert len(mo) == 1
assert mo[0]["kind"] == "finding"
assert "Widerspruch" in mo[0]["remediation"] or "Opt-Out" in mo[0]["remediation"]
def test_no_missing_opt_out_when_url_present_or_necessary():
# Mit Opt-Out-URL → kein Finding; notwendige Kategorie → ebenfalls keins.
out = analyze_cookies([
{"name": "A", "category": "marketing", "opt_out_url": "https://x/opt",
"cookies": [{"name": "t", "purpose": "ads", "expiry": "1 Jahr"}]},
{"name": "B", "category": "necessary", "opt_out_url": "",
"cookies": [{"name": "sess", "purpose": "x", "expiry": "Session"}]},
])
assert not [f for f in out["findings"] if f["type"] == "missing_opt_out"]
def test_kind_splits_findings_from_hinweise():
# third_country/eu_alternative = Hinweis (advisory); Rest = Finding.
out = analyze_cookies([{
"name": "Google", "category": "necessary",
"cookies": [{"name": "_ga", "purpose": "", "expiry": "2 Jahre"}],
}])
by = {f["type"]: f["kind"] for f in out["findings"]}
assert by.get("third_country") == "hinweis"
assert by.get("eu_alternative") == "hinweis"
assert by.get("tracker_as_necessary") == "finding"
# Drittland-Wording: neutral, pro Verarbeiter, keine "in DSE benennen"-Befehle.
tc = next(f for f in out["findings"] if f["type"] == "third_country")
assert "pro Verarbeiter" in tc["remediation"]
assert "benennen" not in tc["remediation"]
def test_third_country_deduped_per_vendor():
out = analyze_cookies([{
"name": "Google", "category": "marketing",
"cookies": [
{"name": "_ga", "purpose": "x", "expiry": "2 Jahre"},
{"name": "_gid", "purpose": "x", "expiry": "1 Tag"},
],
}])
assert sum(1 for f in out["findings"] if f["type"] == "third_country") == 1
def test_excessive_lifetime():
# _gid: typische Laufzeit 24 Stunden; deklariert 2 Jahre.
out = analyze_cookies([{
"name": "Google", "category": "marketing",
"cookies": [{"name": "_gid", "purpose": "x", "expiry": "2 Jahre"}],
}])
el = [f for f in out["findings"] if f["type"] == "excessive_lifetime"]
assert el and "Art. 5" in el[0]["remediation"]
def test_findings_carry_control_and_legal_basis():
# A: jeder Befund traegt control_id + Rechtsgrundlage (auditfest).
out = analyze_cookies([{
"name": "Google", "category": "necessary",
"cookies": [{"name": "_ga", "purpose": "x",
"expiry": "Wird solange gespeichert, bis es deaktiviert wird."}],
}])
assert out["findings"], "es sollte Befunde geben"
for f in out["findings"]:
assert "control" in f
vd = next(f for f in out["findings"] if f["type"] == "vague_duration")
assert vd["control"]["control_id"] == "AUTH-2051-A03"
assert "Art. 5" in vd["control"]["article"]
def test_vague_duration_flagged_concrete_ok():
# User-Beispiel Salesforce: "bis der Nutzer es deaktiviert" = vage.
out = analyze_cookies([{
"name": "Salesforce", "category": "necessary",
"cookies": [
{"name": "MUTEX_X", "purpose": "x",
"expiry": "Wird solange gespeichert, bis es durch den Nutzer in seinem Browser deaktiviert wird."},
{"name": "ok1", "purpose": "x", "expiry": "13 Monate"},
{"name": "sess", "purpose": "x",
"expiry": "Dieses Session Cookie wird beim Schließen des Browsers wieder gelöscht."},
],
}])
vd = [f for f in out["findings"] if f["type"] == "vague_duration"]
assert len(vd) == 1 # nur MUTEX_X; 13 Monate + Session ok
assert vd[0]["cookie"] == "MUTEX_X"
assert "Art. 5" in vd[0]["remediation"]
def test_missing_retention_vendor_without_cookies_or_duration():
# User-Beispiel Nayoki GmbH: als 'necessary' deklarierter Auftragsverarbeiter,
# KEINE Cookies gelistet, KEINE persistence → Speicherdauer/Löschfrist-Finding.
out = analyze_cookies([{
"name": "Nayoki GmbH — BMW Sport & Kultur Social Wall",
"category": "necessary", "persistence": "",
"purpose": "Verwaltung der Social Wall.",
"cookies": [],
}])
mr = [f for f in out["findings"] if f["type"] == "missing_retention"]
assert len(mr) == 1
assert "Nayoki" in mr[0]["vendor"]
assert "Löschfrist" in mr[0]["remediation"]
assert mr[0]["severity"] == "MEDIUM"
assert mr[0]["control"]["control_id"] == "AUTH-2051-A03"
assert "Art. 13 Abs. 2" in mr[0]["control"]["article"]
def test_no_missing_retention_when_vendor_has_cookies():
# Vendor MIT Cookies (konkrete Dauer) → kein missing_retention.
out = analyze_cookies([{
"name": "X", "category": "necessary", "persistence": "",
"cookies": [{"name": "sess", "purpose": "x", "expiry": "Session"}],
}])
assert not [f for f in out["findings"] if f["type"] == "missing_retention"]
def test_cookie_categories_exposes_actual_library_category():
# Für die Banner-Sicht: name_lower → tatsächliche Kategorie laut Library.
big = {"bmw_track_de": {
"actual_category": "marketing", "typical_max_age_seconds": 86400,
"purpose_de": "Tracking", "vendor_name": "BMW",
}}
out = analyze_cookies([{
"name": "BMW", "category": "necessary",
"cookies": [{"name": "bmw_track_de", "purpose": "x", "expiry": "1 Tag"}],
}], big)
assert out["cookie_categories"]["bmw_track_de"] == "marketing"
def test_big_library_covers_cookie_not_in_rich_db():
# Cookie nicht in der 35er rich-DB, aber in der grossen 2287er (big_lib).
big = {"bmw_track_de": {
"actual_category": "marketing", "typical_max_age_seconds": 86400,
"purpose_de": "Reichweiten-Tracking", "vendor_name": "BMW",
}}
out = analyze_cookies([{
"name": "BMW", "category": "necessary",
"cookies": [{"name": "bmw_track_de", "purpose": "", "expiry": "2 Jahre"}],
}], big)
assert out["summary"]["in_library"] == 1
t = {f["type"] for f in out["findings"]}
assert "tracker_as_necessary" in t # actual_category=marketing → Tracker
assert "excessive_lifetime" in t # 2 Jahre vs. 1 Tag