Files
breakpilot-compliance/backend-compliance/tests/test_cookie_coherence_check.py
T
Benjamin Admin c908fcd5eb feat(b19): Cookie-Coherence — 3-Layer-Lookup + Vendor-Karten + CSV
Adressiert das BMW-Beispiel (740 Cookies, Salesforce als "essential"
mit 1-Jahres-Lifetime, Pseudo-Zwecke wie "Siehe dazugehörige
Datenverarbeitung"). User-Konzept "Regulation als Code".

Step 1 — cookie_library_lookup.py (3 Layer):
  1. Override = cookie_knowledge_db.py + extended (74) für
     Schrems-II / EUGH / EU-Alternative — BreakPilot-juristische-IP.
  2. Truth-Base = compliance.cookie_library (2287 aus Open Cookie
     Database, CC0). actual_category als Wahrheit.
  3. Auto-Learning = cookie_behavior_audits — Cross-Site-Konsens
     wenn ≥3 Sites denselben Cookie melden.

  Match: exact > prefix (mit Separator-Check) > wildcard. Kurze
  Library-Namen ("c", "ID") brauchen exact-match — verhindert
  False-Positive auf "completely_unknown". Trailing-Underscore
  in OCD ("guest_uuid_essential_") wird als implicit-wildcard
  interpretiert.

Step 2 — cookie_coherence_check.py (B19, 6 Finding-Typen):
  - MARKETING_AS_ESSENTIAL (HIGH): KB sagt actual=marketing, Site
    deklariert essential/erforderlich → Einwilligung wird umgangen
  - LIFETIME_TOO_LONG_FOR_ESSENTIAL (MED): essential + >90d
  - PSEUDO_PURPOSE (LOW): "Siehe dazugehörige Datenverarbeitung"
    / <4 Wörter (suppressed wenn Vendor-Purpose substantial ist)
  - MISSING_COUNTRY (LOW): vendor_country leer trotz KB-Hit
  - UNKNOWN_VENDOR (LOW): nicht in KB → Auto-Learning-Kandidat
  - DUPLICATE_VENDOR (MED): selber Vendor in N Kategorien =
    Stack-Aufspaltung um Marketing unter "essential" zu schmuggeln

  Jedes Finding mit recommended_action ("Cookie X aus 'erforderlich'
  raus und in 'Marketing' setzen").

Step 3 — cookie_observation_logger.py:
  Loggt nach jedem Audit alle (cookie, site, declared_purpose) in
  compliance.cookie_behavior_audits → Basis für Cross-Site-Konsens
  in Layer 3.

Step 4 — cookie_csv_exporter.py:
  cookies-full-{check_id}.csv mit 21 Spalten (Name, Vendor decl/KB,
  Cat decl/KB, Lifetime decl/KB, Country, Opt-Out, 8x FIND_* flags,
  recommended_action). UTF-8 BOM für Excel.
  ZIP-Attachment: erweitert audit_walk_zip_builder um extra_files=
  parameter; phase_e ruft mit cookies-full-...csv auf.

Step 5 — mail_render_v2/_vendor_cards.py:
  Statt 740 Cookie-Rows: Aggregation pro Vendor mit Cookie-Count +
  Issue-Count + 1-2 Beispiel-Cookies + Issue-Type-Tags. Top 30
  Vendoren in der Mail, Rest nur in CSV. Sortiert nach Issue-Score.

Step 6 — render_info_box_rechtsrahmen():
  Generic Header-Info-Box mit Art. 13 DSGVO + § 25 TDDDG + Art. 5
  + § 5 UWG + § 30/130 OWiG. Immer angezeigt, kein explicit-
  finding-mapping (User-mündigkeit).

Orchestrator + _compose: run_b19 + render_vendor_cards +
  render_info_box_rechtsrahmen ins V2-Layout.

Tests: 28/28 grün (15 lookup + 13 coherence).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-07 23:48:04 +02:00

139 lines
5.1 KiB
Python

"""Tests for B19 Cookie-Coherence-Check (Salesforce-as-essential)."""
from unittest.mock import patch
from compliance.services.cookie_coherence_check import (
_is_essential_category,
_is_marketing_category,
_is_pseudo_purpose,
check_cookie_coherence,
)
class TestCategoryHelpers:
def test_essential_de(self):
assert _is_essential_category("Erforderlich")
assert _is_essential_category("technisch notwendig")
def test_essential_en(self):
assert _is_essential_category("Strictly Necessary")
assert _is_essential_category("essential")
def test_not_essential(self):
assert not _is_essential_category("Marketing")
assert not _is_essential_category("Analyse")
def test_marketing(self):
assert _is_marketing_category("marketing")
assert _is_marketing_category("advertising")
assert not _is_marketing_category("functional")
class TestPseudoPurpose:
def test_explicit_floskel(self):
assert _is_pseudo_purpose("Siehe dazugehörige Datenverarbeitung")
assert _is_pseudo_purpose("see above")
def test_too_short(self):
assert _is_pseudo_purpose("Nutzung Cookie")
def test_real_purpose(self):
assert not _is_pseudo_purpose(
"Speichert die anonymisierte Besucher-ID zur "
"Unterscheidung über mehrere Sessions hinweg."
)
class TestCheck:
def _state(self, vendors):
return {"cmp_vendors": vendors}
def test_no_vendors_no_findings(self):
assert check_cookie_coherence({}) == []
def test_marketing_as_essential_high_finding(self):
# Pinterest _pin_unauth is actual=marketing per KB
state = self._state([{
"name": "Pinterest",
"category": "Erforderlich",
"cookies": [{
"name": "_pin_unauth",
"category": "Erforderlich",
"purpose": "Speichert technische Nutzerkennung dauerhaft",
"duration": "1 Jahr",
}],
}])
findings = check_cookie_coherence(state)
mae = [f for f in findings if f["check_id"] == "COOKIE-COHERENCE-MAE-001"]
assert len(mae) == 1
assert mae[0]["severity"] == "HIGH"
assert mae[0]["actual_category"] == "marketing"
def test_essential_with_long_lifetime_finding(self):
# Even if KB-classified as functional/essential, 1 Jahr in
# "essential" is implausible.
state = self._state([{
"name": "Salesforce",
"category": "Erforderlich",
"cookies": [{
"name": "guest_uuid_essential_abc123",
"category": "Erforderlich",
"purpose": "Speichert anonyme Session-Kennung über Browser hinweg",
"duration": "1 Jahr",
}],
}])
findings = check_cookie_coherence(state)
life = [f for f in findings if f["check_id"] == "COOKIE-COHERENCE-LIFE-001"]
assert len(life) == 1
assert life[0]["severity"] == "MEDIUM"
def test_pseudo_purpose_finding(self):
state = self._state([{
"name": "TestVendor",
"category": "functional",
"purpose": "irgendwas",
"cookies": [{
"name": "completely_made_up_cookie_xyz",
"category": "functional",
"purpose": "Siehe dazugehörige Datenverarbeitung",
"duration": "session",
}],
}])
findings = check_cookie_coherence(state)
purp = [f for f in findings if f["check_id"] == "COOKIE-COHERENCE-PURP-001"]
assert len(purp) == 1
def test_duplicate_vendor_finding(self):
# Salesforce in TWO different categories
state = self._state([
{"name": "Salesforce", "category": "Erforderlich",
"cookies": [{"name": "a", "purpose": "konkreter Zweck Text mit vielen Worten"}]},
{"name": "Salesforce Inc.", "category": "Marketing",
"cookies": [{"name": "b", "purpose": "konkreter Zweck Text mit vielen Worten"}]},
])
findings = check_cookie_coherence(state)
dup = [f for f in findings if f["check_id"] == "COOKIE-COHERENCE-DUP-001"]
assert len(dup) == 1
def test_pseudo_purpose_suppressed_when_vendor_purpose_substantial(self):
# If vendor-level purpose has substantial text, cookie inheriting
# "Siehe dazugehörige Datenverarbeitung" is not flagged.
state = self._state([{
"name": "Salesforce",
"category": "functional",
"purpose": (
"Salesforce CRM-System verarbeitet personenbezogene Daten "
"im Auftrag zur Verwaltung der Kundenbeziehung über mehrere "
"Touchpoints hinweg."
),
"cookies": [{
"name": "sf_session",
"category": "functional",
"purpose": "Siehe dazugehörige Datenverarbeitung",
"duration": "session",
}],
}])
findings = check_cookie_coherence(state)
purp = [f for f in findings if f["check_id"] == "COOKIE-COHERENCE-PURP-001"]
assert purp == []