diff --git a/admin-compliance/app/sdk/agent/_components/CookieFindings.tsx b/admin-compliance/app/sdk/agent/_components/CookieFindings.tsx index b411a0a6..254e5794 100644 --- a/admin-compliance/app/sdk/agent/_components/CookieFindings.tsx +++ b/admin-compliance/app/sdk/agent/_components/CookieFindings.tsx @@ -20,6 +20,7 @@ const TYPE_LABEL: Record = { excessive_lifetime: 'Speicherdauer zu lang', vague_duration: 'Speicherdauer nicht konkret', missing_retention: 'Keine Speicherdauer/Löschfrist', + missing_opt_out: 'Opt-Out-/Widerspruchs-Link fehlt', storage_transparency: 'Speichertyp nicht transparent', third_country: 'Drittland-Transfer', eu_alternative: 'EU-Alternative verfügbar', @@ -29,6 +30,7 @@ const TYPE_MEASURE: Record = { missing_purpose: 'Zweck je Cookie ergänzen (Art. 13 DSGVO).', vague_duration: 'Konkrete Speicherdauer oder Löschkriterium angeben (Art. 5 Abs. 1 lit. e).', missing_retention: 'Speicherdauer/Löschfrist je Verarbeiter festlegen (Art. 5 Abs. 1 lit. e).', + missing_opt_out: 'Opt-Out-/Widerspruchs-Link je Anbieter angeben (Art. 7 Abs. 3 + Art. 21).', excessive_lifetime: 'Speicherdauer auf das Erforderliche reduzieren (Art. 5 Abs. 1 lit. e).', storage_transparency: 'Speichertyp + -dauer je Objekt transparent ausweisen (§ 25 TDDDG).', third_country: 'Geeignete Garantien je Verarbeiter prüfen (SCC Art. 46 / Art. 49).', @@ -36,7 +38,8 @@ const TYPE_MEASURE: Record = { } const TYPE_ORDER = [ 'tracker_as_necessary', 'missing_purpose', 'vague_duration', 'missing_retention', - 'excessive_lifetime', 'storage_transparency', 'third_country', 'eu_alternative', + 'missing_opt_out', 'excessive_lifetime', 'storage_transparency', + 'third_country', 'eu_alternative', ] const SEV_ORDER: Record = { HIGH: 0, MEDIUM: 1, LOW: 2 } const SEV_COLOR: Record = { diff --git a/backend-compliance/compliance/services/cookie_library_check.py b/backend-compliance/compliance/services/cookie_library_check.py index 1954b39d..bba0a359 100644 --- a/backend-compliance/compliance/services/cookie_library_check.py +++ b/backend-compliance/compliance/services/cookie_library_check.py @@ -35,6 +35,7 @@ _CONTROL_MAP = { "excessive_lifetime": {"control_id": "AUTH-2051-A02", "regulation": "DSGVO", "article": "Art. 5 Abs. 1 lit. e"}, "tracker_as_necessary": {"control_id": "DATA-2851-A05", "regulation": "TDDDG", "article": "§ 25 Abs. 1"}, "missing_purpose": {"control_id": "AUTH-2053-A05", "regulation": "DSGVO", "article": "Art. 13"}, + "missing_opt_out": {"control_id": "DATA-2851-A05", "regulation": "DSGVO", "article": "Art. 7 Abs. 3 + Art. 21"}, "third_country": {"control_id": "DATA-1624-A04", "regulation": "DSGVO", "article": "Art. 44 ff."}, "eu_alternative": {"control_id": None, "regulation": "—", "article": "kommerzielle Empfehlung"}, } @@ -72,6 +73,12 @@ _EEA = { "FI", "GR", "HU", "IT", "LV", "LT", "LU", "MT", "PL", "PT", "RO", "SK", "SI", "ES", "SE", "IS", "LI", "NO", } +# Unbekannte/leere Herkunft ist KEIN Drittland (z.B. First-Party-Session-Cookies +# PHPSESSID/JSESSIONID mit vendor_country 'N/A'). +_UNKNOWN_COUNTRY = {"", "N/A", "NA", "N.A.", "UNKNOWN", "UNBEKANNT", "?"} +# Einwilligungspflichtige Kategorien (für Opt-Out-/Widerspruchs-Pflicht). +_CONSENT_CATS = {"marketing", "statistics", "targeting", "social_media", + "tracking", "werbung", "advertising"} _SEV_ORDER = {"HIGH": 0, "MEDIUM": 1, "LOW": 2} @@ -218,8 +225,13 @@ def analyze_cookies(vendors: list[dict], big_lib: dict | None = None) -> dict: ), }) - # 4) Drittland-Transfer (je Vendor einmal). - if (country and country not in _EEA or schrems) and vname not in seen_third: + # 4) Drittland-Transfer (je Vendor einmal). Nur bei BEKANNTEM + # Nicht-EWR-Land — 'N/A'/unbekannt ist KEIN Drittland (First-Party- + # Session-Cookies); Self-Hosting laut Library = kein Transfer. + country_third = (country not in _UNKNOWN_COUNTRY + and country not in _EEA + and "SELF-HOST" not in country) + if (country_third or schrems) and vname not in seen_third: seen_third.add(vname) findings.append({ "vendor": vname, "cookie": name, "type": "third_country", @@ -271,6 +283,23 @@ def analyze_cookies(vendors: list[dict], big_lib: dict | None = None) -> dict: ), }) + # Vendor-Ebene: einwilligungspflichtiger Anbieter (Marketing/Tracking) + # mit Cookies, aber ohne Opt-Out-/Widerspruchs-Link. + if (vcat in _CONSENT_CATS and (v.get("cookies") or []) + and not (v.get("opt_out_url") or "").strip()): + findings.append({ + "vendor": vname, "cookie": "(Vendor-Ebene)", + "type": "missing_opt_out", "severity": "LOW", + "declared": vcat_label, "library_purpose": "", + "remediation": ( + f"Für den einwilligungspflichtigen Anbieter '{vname}' " + f"({vcat_label}) ist kein Opt-Out-/Widerspruchs-Link " + f"hinterlegt. Eine einfache Widerrufs-/Widerspruchs-Möglichkeit " + f"angeben (Art. 7 Abs. 3 + Art. 21 DSGVO, § 25 TDDDG) — so " + f"einfach wie die Einwilligung." + ), + }) + # A: jeden Befund an Control + Rechtsgrundlage haengen + als echtes Finding # (zu beheben) oder Hinweis (advisory, gegen DSE abzugleichen) klassifizieren. for f in findings: diff --git a/backend-compliance/compliance/services/specialist_agents/impressum/agent.py b/backend-compliance/compliance/services/specialist_agents/impressum/agent.py index 17fc7ecd..5d4bfd76 100644 --- a/backend-compliance/compliance/services/specialist_agents/impressum/agent.py +++ b/backend-compliance/compliance/services/specialist_agents/impressum/agent.py @@ -79,12 +79,18 @@ def _build_measure(label: str, norm: str) -> str: def _line_of(text: str, start_pos: int, end_pos: int) -> str: - """Die Zeile um einen Regex-Treffer — als 'gefundener Wert' für die - Pflichtangaben-Tabelle. Gekappt + bereinigt.""" + """Ein enger Ausschnitt um einen Regex-Treffer — der 'gefundene Wert' für die + Pflichtangaben-Tabelle. Bevorzugt die Zeile; bei Texten ohne (genug) + Zeilenumbrüche (z.B. BMW-Impressum als ein Block) ein Fenster um den Treffer, + damit jede MC IHREN Beleg zeigt statt immer denselben Anfangssatz.""" start = text.rfind("\n", 0, start_pos) + 1 end = text.find("\n", end_pos) if end == -1: end = len(text) + # Zeile zu lang (kein/seltener Umbruch) → enges Fenster zentriert am Treffer. + if end - start > 160: + start = max(start, start_pos - 70) + end = min(end, end_pos + 70) return " ".join(text[start:end].split())[:160] diff --git a/backend-compliance/compliance/tests/test_cookie_library_check.py b/backend-compliance/compliance/tests/test_cookie_library_check.py index 4e71d8ec..f58b9e48 100644 --- a/backend-compliance/compliance/tests/test_cookie_library_check.py +++ b/backend-compliance/compliance/tests/test_cookie_library_check.py @@ -58,6 +58,38 @@ def test_third_country_and_eu_alternative_for_us_tracker(): assert "eu_alternative" in t +def test_session_cookie_unknown_country_no_third_country(): + # PHPSESSID: rich-DB vendor_country 'N/A' → KEIN Drittland (war False Positive, + # weil 'N/A' nicht im EWR-Set steht). First-Party-Session-Cookie. + out = analyze_cookies([{ + "name": "BMW AG — Infrastructure Basic", "category": "necessary", + "cookies": [{"name": "PHPSESSID", "purpose": "Session", "expiry": "Session"}], + }]) + assert not [f for f in out["findings"] if f["type"] == "third_country"] + + +def test_missing_opt_out_for_marketing_vendor(): + out = analyze_cookies([{ + "name": "AdVendor", "category": "marketing", "opt_out_url": "", + "cookies": [{"name": "track1", "purpose": "ads", "expiry": "1 Jahr"}], + }]) + mo = [f for f in out["findings"] if f["type"] == "missing_opt_out"] + assert len(mo) == 1 + assert mo[0]["kind"] == "finding" + assert "Widerspruch" in mo[0]["remediation"] or "Opt-Out" in mo[0]["remediation"] + + +def test_no_missing_opt_out_when_url_present_or_necessary(): + # Mit Opt-Out-URL → kein Finding; notwendige Kategorie → ebenfalls keins. + out = analyze_cookies([ + {"name": "A", "category": "marketing", "opt_out_url": "https://x/opt", + "cookies": [{"name": "t", "purpose": "ads", "expiry": "1 Jahr"}]}, + {"name": "B", "category": "necessary", "opt_out_url": "", + "cookies": [{"name": "sess", "purpose": "x", "expiry": "Session"}]}, + ]) + assert not [f for f in out["findings"] if f["type"] == "missing_opt_out"] + + def test_kind_splits_findings_from_hinweise(): # third_country/eu_alternative = Hinweis (advisory); Rest = Finding. out = analyze_cookies([{ diff --git a/backend-compliance/compliance/tests/test_impressum_line_of.py b/backend-compliance/compliance/tests/test_impressum_line_of.py new file mode 100644 index 00000000..1109d830 --- /dev/null +++ b/backend-compliance/compliance/tests/test_impressum_line_of.py @@ -0,0 +1,29 @@ +"""_line_of — Beleg-Ausschnitt pro Pflichtangabe. + +Regression: bei Impressum-Texten ohne Zeilenumbrüche (z.B. BMW als ein Block) +zeigte die Pflichtangaben-Tabelle für JEDE MC denselben Anfangssatz. Jetzt wird +ein enges Fenster um den Treffer ausgeschnitten → jede MC zeigt ihren Beleg. +""" + +from __future__ import annotations + +from compliance.services.specialist_agents.impressum.agent import _line_of + + +def test_window_per_match_in_long_block(): + text = "A" * 200 + " EMAIL kontakt@bmw.de " + "B" * 200 + " HRB 12345 " + "C" * 200 + e_pos = text.index("kontakt@bmw.de") + h_pos = text.index("12345") + email = _line_of(text, e_pos, e_pos + len("kontakt@bmw.de")) + hrb = _line_of(text, h_pos, h_pos + 5) + assert "kontakt@bmw.de" in email + assert "12345" in hrb + assert email != hrb # nicht mehr derselbe Anfangssatz + assert len(email) <= 160 + + +def test_short_line_unchanged(): + text = "Zeile eins\nkontakt@bmw.de\nZeile drei" + pos = text.index("kontakt@bmw.de") + out = _line_of(text, pos, pos + len("kontakt@bmw.de")) + assert out == "kontakt@bmw.de"