"""Pro-Cookie-Library-Abgleich: deklariert vs. cookie_knowledge_db.""" from __future__ import annotations from compliance.services.cookie_library_check import ( _candidate_keys, _match_lib, analyze_cookies, ) def test_candidate_keys_strips_runtime_suffix(): assert "_ga" in _candidate_keys("_ga_GTM-ABC123") assert "amcvs" in _candidate_keys("AMCVS_1234@AdobeOrg") assert "_pk_id" in _candidate_keys("_pk_id.5.7d8f") def test_match_lib_prefix_and_exact(): lib = {"_ga": {"actual_category": "statistics"}, "phpsessid": {"actual_category": "essential"}} assert _match_lib("_ga_GTM-XYZ", lib)["actual_category"] == "statistics" assert _match_lib("PHPSESSID".lower(), lib)["actual_category"] == "essential" assert _match_lib("totally_unknown_xyz", lib) is None # kurze generische Basis darf NICHT über-matchen assert _match_lib("id_charger", {"id": {"x": 1}}) is None def test_tracker_declared_necessary_is_high_finding(): # _ga ist laut Library technical_necessity=none, reid=high. vendors = [{ "name": "Salesforce", "category": "necessary", "cookies": [{"name": "_ga", "purpose": "Funktionsverbesserung"}], }] out = analyze_cookies(vendors) assert out["summary"]["in_library"] == 1 f = out["findings"][0] assert f["type"] == "tracker_as_necessary" assert f["severity"] == "HIGH" assert "§ 25" in f["remediation"] assert f["library_purpose"] # exact_purpose aus Library def test_missing_purpose_when_library_knows_it(): vendors = [{ "name": "X", "category": "marketing", "cookies": [{"name": "_ga", "purpose": ""}], }] out = analyze_cookies(vendors) f = out["findings"][0] assert f["type"] == "missing_purpose" assert f["severity"] == "MEDIUM" assert f["library_purpose"] def test_unknown_cookie_no_finding(): vendors = [{ "name": "Y", "category": "necessary", "cookies": [{"name": "completely_unknown_xyz_123", "purpose": ""}], }] out = analyze_cookies(vendors) assert out["summary"]["checked"] == 1 assert out["summary"]["in_library"] == 0 assert out["findings"] == [] def _types(out): return {f["type"] for f in out["findings"]} def test_third_country_and_eu_alternative_for_us_tracker(): # _ga: US-Vendor + EU-Alternative Matomo in der Library. out = analyze_cookies([{ "name": "Google", "category": "marketing", "cookies": [{"name": "_ga", "purpose": "Statistik", "expiry": "2 Jahre"}], }]) t = _types(out) assert "third_country" in t assert "eu_alternative" in t def test_session_cookie_unknown_country_no_third_country(): # PHPSESSID: rich-DB vendor_country 'N/A' → KEIN Drittland (war False Positive, # weil 'N/A' nicht im EWR-Set steht). First-Party-Session-Cookie. out = analyze_cookies([{ "name": "BMW AG — Infrastructure Basic", "category": "necessary", "cookies": [{"name": "PHPSESSID", "purpose": "Session", "expiry": "Session"}], }]) assert not [f for f in out["findings"] if f["type"] == "third_country"] def test_missing_opt_out_for_marketing_vendor(): out = analyze_cookies([{ "name": "AdVendor", "category": "marketing", "opt_out_url": "", "cookies": [{"name": "track1", "purpose": "ads", "expiry": "1 Jahr"}], }]) mo = [f for f in out["findings"] if f["type"] == "missing_opt_out"] assert len(mo) == 1 assert mo[0]["kind"] == "finding" assert "Widerspruch" in mo[0]["remediation"] or "Opt-Out" in mo[0]["remediation"] def test_no_missing_opt_out_when_url_present_or_necessary(): # Mit Opt-Out-URL → kein Finding; notwendige Kategorie → ebenfalls keins. out = analyze_cookies([ {"name": "A", "category": "marketing", "opt_out_url": "https://x/opt", "cookies": [{"name": "t", "purpose": "ads", "expiry": "1 Jahr"}]}, {"name": "B", "category": "necessary", "opt_out_url": "", "cookies": [{"name": "sess", "purpose": "x", "expiry": "Session"}]}, ]) assert not [f for f in out["findings"] if f["type"] == "missing_opt_out"] def test_kind_splits_findings_from_hinweise(): # third_country/eu_alternative = Hinweis (advisory); Rest = Finding. out = analyze_cookies([{ "name": "Google", "category": "necessary", "cookies": [{"name": "_ga", "purpose": "", "expiry": "2 Jahre"}], }]) by = {f["type"]: f["kind"] for f in out["findings"]} assert by.get("third_country") == "hinweis" assert by.get("eu_alternative") == "hinweis" assert by.get("tracker_as_necessary") == "finding" # Drittland-Wording: neutral, pro Verarbeiter, keine "in DSE benennen"-Befehle. tc = next(f for f in out["findings"] if f["type"] == "third_country") assert "pro Verarbeiter" in tc["remediation"] assert "benennen" not in tc["remediation"] def test_third_country_deduped_per_vendor(): out = analyze_cookies([{ "name": "Google", "category": "marketing", "cookies": [ {"name": "_ga", "purpose": "x", "expiry": "2 Jahre"}, {"name": "_gid", "purpose": "x", "expiry": "1 Tag"}, ], }]) assert sum(1 for f in out["findings"] if f["type"] == "third_country") == 1 def test_excessive_lifetime(): # _gid: typische Laufzeit 24 Stunden; deklariert 2 Jahre. out = analyze_cookies([{ "name": "Google", "category": "marketing", "cookies": [{"name": "_gid", "purpose": "x", "expiry": "2 Jahre"}], }]) el = [f for f in out["findings"] if f["type"] == "excessive_lifetime"] assert el and "Art. 5" in el[0]["remediation"] def test_findings_carry_control_and_legal_basis(): # A: jeder Befund traegt control_id + Rechtsgrundlage (auditfest). out = analyze_cookies([{ "name": "Google", "category": "necessary", "cookies": [{"name": "_ga", "purpose": "x", "expiry": "Wird solange gespeichert, bis es deaktiviert wird."}], }]) assert out["findings"], "es sollte Befunde geben" for f in out["findings"]: assert "control" in f vd = next(f for f in out["findings"] if f["type"] == "vague_duration") assert vd["control"]["control_id"] == "AUTH-2051-A03" assert "Art. 5" in vd["control"]["article"] def test_vague_duration_flagged_concrete_ok(): # User-Beispiel Salesforce: "bis der Nutzer es deaktiviert" = vage. out = analyze_cookies([{ "name": "Salesforce", "category": "necessary", "cookies": [ {"name": "MUTEX_X", "purpose": "x", "expiry": "Wird solange gespeichert, bis es durch den Nutzer in seinem Browser deaktiviert wird."}, {"name": "ok1", "purpose": "x", "expiry": "13 Monate"}, {"name": "sess", "purpose": "x", "expiry": "Dieses Session Cookie wird beim Schließen des Browsers wieder gelöscht."}, ], }]) vd = [f for f in out["findings"] if f["type"] == "vague_duration"] assert len(vd) == 1 # nur MUTEX_X; 13 Monate + Session ok assert vd[0]["cookie"] == "MUTEX_X" assert "Art. 5" in vd[0]["remediation"] def test_missing_retention_vendor_without_cookies_or_duration(): # User-Beispiel Nayoki GmbH: als 'necessary' deklarierter Auftragsverarbeiter, # KEINE Cookies gelistet, KEINE persistence → Speicherdauer/Löschfrist-Finding. out = analyze_cookies([{ "name": "Nayoki GmbH — BMW Sport & Kultur Social Wall", "category": "necessary", "persistence": "", "purpose": "Verwaltung der Social Wall.", "cookies": [], }]) mr = [f for f in out["findings"] if f["type"] == "missing_retention"] assert len(mr) == 1 assert "Nayoki" in mr[0]["vendor"] assert "Löschfrist" in mr[0]["remediation"] assert mr[0]["severity"] == "MEDIUM" assert mr[0]["control"]["control_id"] == "AUTH-2051-A03" assert "Art. 13 Abs. 2" in mr[0]["control"]["article"] def test_no_missing_retention_when_vendor_has_cookies(): # Vendor MIT Cookies (konkrete Dauer) → kein missing_retention. out = analyze_cookies([{ "name": "X", "category": "necessary", "persistence": "", "cookies": [{"name": "sess", "purpose": "x", "expiry": "Session"}], }]) assert not [f for f in out["findings"] if f["type"] == "missing_retention"] def test_cookie_categories_exposes_actual_library_category(): # Für die Banner-Sicht: name_lower → tatsächliche Kategorie laut Library. big = {"bmw_track_de": { "actual_category": "marketing", "typical_max_age_seconds": 86400, "purpose_de": "Tracking", "vendor_name": "BMW", }} out = analyze_cookies([{ "name": "BMW", "category": "necessary", "cookies": [{"name": "bmw_track_de", "purpose": "x", "expiry": "1 Tag"}], }], big) assert out["cookie_categories"]["bmw_track_de"] == "marketing" def test_big_library_covers_cookie_not_in_rich_db(): # Cookie nicht in der 35er rich-DB, aber in der grossen 2287er (big_lib). big = {"bmw_track_de": { "actual_category": "marketing", "typical_max_age_seconds": 86400, "purpose_de": "Reichweiten-Tracking", "vendor_name": "BMW", }} out = analyze_cookies([{ "name": "BMW", "category": "necessary", "cookies": [{"name": "bmw_track_de", "purpose": "", "expiry": "2 Jahre"}], }], big) assert out["summary"]["in_library"] == 1 t = {f["type"] for f in out["findings"]} assert "tracker_as_necessary" in t # actual_category=marketing → Tracker assert "excessive_lifetime" in t # 2 Jahre vs. 1 Tag