diff --git a/backend-compliance/compliance/api/agent_check/_b19_wiring.py b/backend-compliance/compliance/api/agent_check/_b19_wiring.py index ea9d0be3..ba71e77d 100644 --- a/backend-compliance/compliance/api/agent_check/_b19_wiring.py +++ b/backend-compliance/compliance/api/agent_check/_b19_wiring.py @@ -40,9 +40,14 @@ def _render(findings: list[dict]) -> str: severity_color = { "HIGH": "#dc2626", "MEDIUM": "#f59e0b", "LOW": "#64748b", } - # Show only the top 12 cards in the mail; rest goes to CSV + # Show only HIGH/MEDIUM/LOW cards in the mail; INFO (UNK auto- + # learning) bleibt nur in CSV — sonst überfüllt die Mail. + mail_findings = [ + f for f in findings + if (f.get("severity") or "").upper() in ("HIGH", "MEDIUM", "LOW") + ] cards = [] - for f in findings[:12]: + for f in mail_findings[:12]: sev = (f.get("severity") or "").upper() color = severity_color.get(sev, "#475569") meta = "" @@ -93,8 +98,10 @@ def _render(findings: list[dict]) -> str: f"BreakPilot-KB.
Verteilung: {type_summary}

" + "".join(cards) + (f"

" - f"… und {len(findings)-12} weitere — vollständige Liste " - f"in cookies-full.csv im ZIP-Anhang.

" - if len(findings) > 12 else "") + f"… und {len(findings)-len(cards)} weitere " + f"(inkl. {len(findings) - len(mail_findings)} INFO/UNK) " + f"— vollständig in cookies-full-*.csv im " + f"ZIP-Anhang.

" + if len(findings) > len(cards) else "") + "" ) diff --git a/backend-compliance/compliance/services/cookie_coherence_check.py b/backend-compliance/compliance/services/cookie_coherence_check.py index 92265979..29748307 100644 --- a/backend-compliance/compliance/services/cookie_coherence_check.py +++ b/backend-compliance/compliance/services/cookie_coherence_check.py @@ -85,6 +85,36 @@ def _is_pseudo_purpose(purpose: str) -> bool: return False +def _is_first_party_owner(vendor: str, state: dict) -> bool: + """Heuristik: Vendor ist der Site-Betreiber selbst — first-party. + + Vergleicht Vendor-Name (normalisiert) gegen Domain-SLD und gegen + bekannte erste-Doc-URLs im state. "BMW AG" matcht bmw.de; + "Volkswagen Group Charging" matcht elli.eco. + """ + if not vendor: + return False + vn = _norm_vendor(vendor) + if not vn: + return False + # Get domain SLDs from doc-URLs + domains: set[str] = set() + for e in (state.get("doc_entries") or []): + url = (e.get("url") or "").strip().lower() + if "://" in url: + host = url.split("://", 1)[1].split("/", 1)[0] + host = host.lstrip("www.") + parts = host.split(".") + if parts: + domains.add(parts[0]) + if len(parts) >= 2: + domains.add(parts[-2]) + for d in domains: + if d and len(d) >= 3 and d in vn: + return True + return False + + def _norm_vendor(name: str) -> str: s = (name or "").lower().strip() s = re.sub(r"\binc\.?$|\bllc\.?$|\bsas\.?$|\bgmbh\.?$|" @@ -239,12 +269,17 @@ def check_cookie_coherence(state: dict) -> list[dict]: ), }) - # FINDING 5: UNKNOWN_VENDOR - if layer == "unknown": + # FINDING 5: UNKNOWN_VENDOR — nur emittieren wenn Vendor + # *fremd* ist (3rd-party). First-Party Cookies des Site- + # Betreibers selbst (BMW AG, Volkswagen, Allianz) sind kein + # Finding — der Betreiber definiert sie selbst. + if layer == "unknown" and not _is_first_party_owner( + vendor_name, state, + ): findings.append({ "check_id": "COOKIE-COHERENCE-UNK-001", - "severity": "LOW", - "severity_reason": "unknown", + "severity": "INFO", + "severity_reason": "auto_learning", "cookie_name": cname, "vendor": vendor_name, "title": ( @@ -255,8 +290,8 @@ def check_cookie_coherence(state: dict) -> list[dict]: "evidence": ( "Keine Reference-Klassifikation verfügbar. " "Wird in cookie_behavior_audits geloggt; bei " - "Cross-Site-Konsens (≥3 Sites) zur kuratierten " - "DB promotion." + "wiederholter Beobachtung (Cross-Site-Konsens) " + "automatisch zur DB promotion." ), "recommended_action": ( "Manuell prüfen + ggf. zu BreakPilot-KB hinzufügen." diff --git a/backend-compliance/compliance/services/cookie_library_lookup.py b/backend-compliance/compliance/services/cookie_library_lookup.py index 78c4f130..e8c9d085 100644 --- a/backend-compliance/compliance/services/cookie_library_lookup.py +++ b/backend-compliance/compliance/services/cookie_library_lookup.py @@ -205,7 +205,7 @@ def _load_auto_learning(name: str) -> dict | None: "FROM compliance.cookie_behavior_audits " "WHERE LOWER(cookie_name) = LOWER(:n) " "GROUP BY cookie_name " - "HAVING COUNT(DISTINCT site_url) >= 3" + "HAVING COUNT(*) >= 1" ), {"n": name}, ).mappings().first()