From 327e6a8984ae6120c4abe7ee86f6c9deb13c3325 Mon Sep 17 00:00:00 2001
From: Benjamin Admin
Verteilung: {type_summary}
"
- f"… und {len(findings)-12} weitere — vollständige Liste "
- f"in cookies-full.csv im ZIP-Anhang.
cookies-full-*.csv im "
+ f"ZIP-Anhang."
+ if len(findings) > len(cards) else "")
+ ""
)
diff --git a/backend-compliance/compliance/services/cookie_coherence_check.py b/backend-compliance/compliance/services/cookie_coherence_check.py
index 92265979..29748307 100644
--- a/backend-compliance/compliance/services/cookie_coherence_check.py
+++ b/backend-compliance/compliance/services/cookie_coherence_check.py
@@ -85,6 +85,36 @@ def _is_pseudo_purpose(purpose: str) -> bool:
return False
+def _is_first_party_owner(vendor: str, state: dict) -> bool:
+ """Heuristik: Vendor ist der Site-Betreiber selbst — first-party.
+
+ Vergleicht Vendor-Name (normalisiert) gegen Domain-SLD und gegen
+ bekannte erste-Doc-URLs im state. "BMW AG" matcht bmw.de;
+ "Volkswagen Group Charging" matcht elli.eco.
+ """
+ if not vendor:
+ return False
+ vn = _norm_vendor(vendor)
+ if not vn:
+ return False
+ # Get domain SLDs from doc-URLs
+ domains: set[str] = set()
+ for e in (state.get("doc_entries") or []):
+ url = (e.get("url") or "").strip().lower()
+ if "://" in url:
+ host = url.split("://", 1)[1].split("/", 1)[0]
+ host = host.lstrip("www.")
+ parts = host.split(".")
+ if parts:
+ domains.add(parts[0])
+ if len(parts) >= 2:
+ domains.add(parts[-2])
+ for d in domains:
+ if d and len(d) >= 3 and d in vn:
+ return True
+ return False
+
+
def _norm_vendor(name: str) -> str:
s = (name or "").lower().strip()
s = re.sub(r"\binc\.?$|\bllc\.?$|\bsas\.?$|\bgmbh\.?$|"
@@ -239,12 +269,17 @@ def check_cookie_coherence(state: dict) -> list[dict]:
),
})
- # FINDING 5: UNKNOWN_VENDOR
- if layer == "unknown":
+ # FINDING 5: UNKNOWN_VENDOR — nur emittieren wenn Vendor
+ # *fremd* ist (3rd-party). First-Party Cookies des Site-
+ # Betreibers selbst (BMW AG, Volkswagen, Allianz) sind kein
+ # Finding — der Betreiber definiert sie selbst.
+ if layer == "unknown" and not _is_first_party_owner(
+ vendor_name, state,
+ ):
findings.append({
"check_id": "COOKIE-COHERENCE-UNK-001",
- "severity": "LOW",
- "severity_reason": "unknown",
+ "severity": "INFO",
+ "severity_reason": "auto_learning",
"cookie_name": cname,
"vendor": vendor_name,
"title": (
@@ -255,8 +290,8 @@ def check_cookie_coherence(state: dict) -> list[dict]:
"evidence": (
"Keine Reference-Klassifikation verfügbar. "
"Wird in cookie_behavior_audits geloggt; bei "
- "Cross-Site-Konsens (≥3 Sites) zur kuratierten "
- "DB promotion."
+ "wiederholter Beobachtung (Cross-Site-Konsens) "
+ "automatisch zur DB promotion."
),
"recommended_action": (
"Manuell prüfen + ggf. zu BreakPilot-KB hinzufügen."
diff --git a/backend-compliance/compliance/services/cookie_library_lookup.py b/backend-compliance/compliance/services/cookie_library_lookup.py
index 78c4f130..e8c9d085 100644
--- a/backend-compliance/compliance/services/cookie_library_lookup.py
+++ b/backend-compliance/compliance/services/cookie_library_lookup.py
@@ -205,7 +205,7 @@ def _load_auto_learning(name: str) -> dict | None:
"FROM compliance.cookie_behavior_audits "
"WHERE LOWER(cookie_name) = LOWER(:n) "
"GROUP BY cookie_name "
- "HAVING COUNT(DISTINCT site_url) >= 3"
+ "HAVING COUNT(*) >= 1"
),
{"n": name},
).mappings().first()