4171cf0efd
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / secret-scan (push) Has been skipped
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / validate-canonical-controls (push) Successful in 16s
CI / loc-budget (push) Failing after 18s
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / detect-changes (push) Successful in 9s
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Has been skipped
CI / test-go (push) Has been skipped
CI / iace-gt-coverage (push) Has been skipped
CI / test-python-dsms-gateway (push) Has been skipped
CI / test-python-backend (push) Successful in 44s
CI / test-python-document-crawler (push) Has been skipped
check_social_embedding: erkennt direkte FB/Insta/Twitter/YouTube- Embeds (connect.facebook.net, platform.twitter.com etc) vs Heise-Shariff vs 2-Klick-Loesungen (Embetty). Direkte Embeds ohne Schutz = HIGH (EuGH C-40/17 Fashion-ID — der Site-Betreiber wird zum gemeinsam Verantwortlichen und braucht Einwilligung VOR dem Drittanbieter-Call). Shariff oder 2-Klick erkannt = INFO (positives Signal). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
298 lines
11 KiB
Python
298 lines
11 KiB
Python
"""
|
|
P35 + P77 + P78 — Post-hoc Textsignal-Checks auf den geladenen
|
|
Dokumenten-Texten (DSE / Cookie-Richtlinie / Banner-Text).
|
|
|
|
P35 — "Speichern" als mehrdeutiges Reject-Label im Banner. Wenn das
|
|
einzige Schliess-Element nur "Speichern" heisst (statt
|
|
"Alle ablehnen" / "Nur notwendige"), ist das ein MEDIUM-Finding,
|
|
weil der Nutzer nicht versteht ob er gerade akzeptiert oder
|
|
abgelehnt hat.
|
|
|
|
P77 — Cookie-Doc-Architecture: wenn keine eigene Cookie-Richtlinie
|
|
ausgeliefert wurde, aber die DSE einen prominent benannten
|
|
Cookie-Abschnitt enthaelt (mit Vendor-Liste + Speicherdauer),
|
|
ist das ein gleichwertiger OEM-Pattern. Liefert positives Signal
|
|
statt MEDIUM-Finding "Cookie-Richtlinie fehlt".
|
|
|
|
P78 — JC-Detection in DSE-Text: erkennt 'gemeinsam Verantwortliche'-
|
|
Klauseln (Art. 26 DSGVO) im DSE-Text. Liefert positives Signal
|
|
"JC-Konstrukt dokumentiert" — verhindert False-Positive
|
|
"JC nicht erwaehnt obwohl Kooperation mit Konzern-Schwester".
|
|
|
|
Alle drei liefern dict shape {"severity": ...} oder positive-signal-dict.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import re
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_REJECT_LABEL_KEYS = (
|
|
"alle ablehnen", "ablehnen", "reject all", "deny all",
|
|
"nur notwendige", "nur essenzielle", "nur erforderliche",
|
|
"essentials only", "verweigern", "block all",
|
|
)
|
|
|
|
_SAVE_ONLY_KEYS = (
|
|
"speichern", "auswahl speichern", "save selection",
|
|
"auswahl bestaetigen",
|
|
)
|
|
|
|
_COOKIE_SECTION_HEADINGS = (
|
|
"cookies und tracking", "cookies und vergleichbare technologien",
|
|
"cookies und aehnliche technologien", "verwendung von cookies",
|
|
"informationen zu cookies", "uebersicht der cookies",
|
|
"eingesetzte cookies", "cookies im einsatz",
|
|
)
|
|
|
|
_VENDOR_HINTS = (
|
|
"speicherdauer", "lebensdauer", "anbieter", "drittanbieter",
|
|
"datenempfaenger", "datenkategorie", "rechtsgrundlage",
|
|
)
|
|
|
|
_JC_PATTERNS = (
|
|
"gemeinsam verantwortlich", "joint controller",
|
|
"gemeinsame verantwortung", "art. 26 dsgvo", "art 26 dsgvo",
|
|
"vereinbarung gemaess art. 26", "joint-controller-vereinbarung",
|
|
"gemeinsame verarbeitung",
|
|
)
|
|
|
|
# P36 — Social-Media-Einbindung:
|
|
# "direct" = direkte FB/Insta/Twitter-Embeds laden bei Page-Load
|
|
# (HIGH-Risiko, Cookies vor Consent).
|
|
# "shariff" = Heise-Shariff-Buttons (clientseitig, kein 3rd-party-Call).
|
|
# "two_click" = zweistufige Loesung (Klick auf Platzhalter laed Tracker).
|
|
_SOCIAL_DIRECT_PATTERNS = (
|
|
"connect.facebook.net", "platform.twitter.com",
|
|
"platform.instagram.com", "platform.linkedin.com",
|
|
"youtube.com/embed", "syndication.twitter.com",
|
|
"//www.facebook.com/", "fb-pixel", "facebook-pixel",
|
|
)
|
|
_SOCIAL_SHARIFF_PATTERNS = (
|
|
"shariff", "ct_shariff", "data-shariff",
|
|
)
|
|
_SOCIAL_TWOCLICK_PATTERNS = (
|
|
"2-klick", "2klick", "zwei klick", "two-click",
|
|
"klick-zu-laden", "klick um zu laden", "platzhalter laed",
|
|
"embetty",
|
|
)
|
|
|
|
|
|
def check_save_only_reject(banner_result: dict) -> dict | None:
|
|
"""P35 — Banner hat keinen klaren Reject, nur "Speichern"."""
|
|
initial = ((banner_result or {}).get("phases") or {}).get("initial") or {}
|
|
if not isinstance(initial, dict):
|
|
return None
|
|
btext = (initial.get("banner_text") or "").lower()
|
|
if not btext or len(btext) < 30:
|
|
return None
|
|
has_clear_reject = any(k in btext for k in _REJECT_LABEL_KEYS)
|
|
has_save_only = any(k in btext for k in _SAVE_ONLY_KEYS)
|
|
if has_clear_reject or not has_save_only:
|
|
return None
|
|
return {
|
|
"severity": "MEDIUM",
|
|
"code": "save_label_ambiguous",
|
|
"label": (
|
|
'Banner verwendet "Speichern" ohne erkennbares "Ablehnen" '
|
|
'— mehrdeutig fuer den Nutzer'
|
|
),
|
|
"detail": (
|
|
'Der Button "Speichern" laesst offen, ob die aktuelle '
|
|
'Vorauswahl (oft alles aktiv) bestaetigt oder nur die '
|
|
'getroffene Auswahl uebernommen wird. EDPB 03/2022 empfiehlt '
|
|
'eindeutige Labels: "Alle akzeptieren" + "Alle ablehnen".'
|
|
),
|
|
"legal_basis": "Art. 7 (1) DSGVO + EDPB 03/2022 Guidelines on "
|
|
"deceptive design patterns.",
|
|
}
|
|
|
|
|
|
def check_cookies_in_dse(
|
|
doc_texts: dict[str, str],
|
|
cookie_doc_missing: bool,
|
|
) -> dict | None:
|
|
"""P77 — DSE hat eigenen Cookie-Abschnitt mit Vendor-Hints."""
|
|
if not cookie_doc_missing:
|
|
return None
|
|
dse = (doc_texts or {}).get("dse") or ""
|
|
if len(dse) < 1000:
|
|
return None
|
|
dse_lower = dse.lower()
|
|
has_heading = any(h in dse_lower for h in _COOKIE_SECTION_HEADINGS)
|
|
if not has_heading:
|
|
return None
|
|
vendor_hint_count = sum(1 for h in _VENDOR_HINTS if h in dse_lower)
|
|
if vendor_hint_count < 3:
|
|
return None # zu wenig substanziell
|
|
return {
|
|
"severity": "INFO", # Positives Signal, kein Finding
|
|
"code": "cookies_in_dse_accepted",
|
|
"label": (
|
|
"Cookie-Informationen sind im Datenschutz-Dokument enthalten "
|
|
"(eigener Abschnitt mit Vendor-Hinweisen)"
|
|
),
|
|
"detail": (
|
|
"Die Praxis vieler OEM-Sites, Cookies als eigenen Abschnitt "
|
|
'in der DSE zu fuehren (statt als separate Datei), wird als '
|
|
"gleichwertig akzeptiert. Empfehlung trotzdem: separate "
|
|
"Cookie-Richtlinie erleichtert kuenftige Aenderungen und "
|
|
"Versionierung."
|
|
),
|
|
"legal_basis": "Art. 13(1)(c) DSGVO — Form ist nicht vorgegeben, "
|
|
"Inhalt muss vollstaendig sein.",
|
|
}
|
|
|
|
|
|
def check_jc_clause_in_dse(doc_texts: dict[str, str]) -> dict | None:
|
|
"""P78 — DSE enthaelt Art. 26 JC-Klausel."""
|
|
dse = (doc_texts or {}).get("dse") or ""
|
|
if not dse:
|
|
return None
|
|
dse_lower = dse.lower()
|
|
matches = [p for p in _JC_PATTERNS if p in dse_lower]
|
|
if not matches:
|
|
return None
|
|
return {
|
|
"severity": "INFO",
|
|
"code": "jc_clause_documented",
|
|
"label": "Gemeinsame Verantwortlichkeit (Art. 26 DSGVO) im "
|
|
"DSE-Text dokumentiert",
|
|
"detail": (
|
|
f'Erkannte Signale: {", ".join(sorted(set(matches))[:3])}. '
|
|
'Das verhindert das False-Positive "JC-Konstrukt nicht '
|
|
'erwaehnt" bei Sites mit Konzern-Schwesterunternehmen.'
|
|
),
|
|
"legal_basis": "Art. 26 DSGVO + EDPB 7/2020 Guidelines on the "
|
|
"concepts of controller and processor.",
|
|
}
|
|
|
|
|
|
def check_social_embedding(
|
|
doc_texts: dict[str, str],
|
|
homepage_html: str | None = None,
|
|
) -> dict | None:
|
|
"""P36 — direkte Social-Embeds vs Shariff vs 2-Klick."""
|
|
sources: list[str] = []
|
|
for key in ("dse", "cookie", "impressum"):
|
|
v = (doc_texts or {}).get(key) or ""
|
|
if v:
|
|
sources.append(v[:50000])
|
|
if homepage_html:
|
|
sources.append(homepage_html[:50000])
|
|
if not sources:
|
|
return None
|
|
blob = " ".join(sources).lower()
|
|
direct_hits = [p for p in _SOCIAL_DIRECT_PATTERNS if p in blob]
|
|
has_shariff = any(p in blob for p in _SOCIAL_SHARIFF_PATTERNS)
|
|
has_twoclick = any(p in blob for p in _SOCIAL_TWOCLICK_PATTERNS)
|
|
|
|
if not direct_hits and not has_shariff and not has_twoclick:
|
|
return None
|
|
if direct_hits and not (has_shariff or has_twoclick):
|
|
return {
|
|
"severity": "HIGH",
|
|
"code": "social_direct_embed",
|
|
"label": "Direkte Social-Media-Embeds ohne 2-Klick-Schutz "
|
|
"oder Shariff erkannt",
|
|
"detail": (
|
|
f'Gefundene Drittanbieter-Skripte: '
|
|
f'{", ".join(sorted(set(direct_hits))[:4])}. '
|
|
"Diese laden i.d.R. Cookies/Pixel ohne Einwilligung. "
|
|
"Empfehlung: Heise-Shariff (clientseitig) oder "
|
|
"2-Klick-Loesung (Embetty, eigener Platzhalter)."
|
|
),
|
|
"legal_basis": "EuGH C-40/17 (Fashion-ID) — Einbinden eines "
|
|
"Facebook-Like-Buttons macht den Site-Betreiber "
|
|
"zum gemeinsam Verantwortlichen + benoetigt "
|
|
"Einwilligung VOR dem Drittanbieter-Call.",
|
|
}
|
|
if has_shariff or has_twoclick:
|
|
return {
|
|
"severity": "INFO",
|
|
"code": "social_protected_embed",
|
|
"label": (
|
|
"Datenschutzfreundliche Social-Media-Einbindung erkannt "
|
|
f"({'Shariff' if has_shariff else '2-Klick-Loesung'})"
|
|
),
|
|
"detail": (
|
|
"Drittanbieter-Skripte werden erst nach aktivem Klick "
|
|
"geladen — kein Tracking ohne Einwilligung."
|
|
),
|
|
"legal_basis": "EuGH C-40/17 + EDPB Guidelines 8/2020.",
|
|
}
|
|
return None
|
|
|
|
|
|
def run_all(
|
|
banner_result: dict | None,
|
|
doc_texts: dict[str, str] | None,
|
|
cookie_doc_missing: bool = False,
|
|
homepage_html: str | None = None,
|
|
) -> list[dict]:
|
|
findings: list[dict] = []
|
|
try:
|
|
f = check_save_only_reject(banner_result or {})
|
|
if f:
|
|
findings.append(f)
|
|
except Exception as e:
|
|
logger.warning("P35 save_only_reject failed: %s", e)
|
|
try:
|
|
f = check_cookies_in_dse(doc_texts or {}, cookie_doc_missing)
|
|
if f:
|
|
findings.append(f)
|
|
except Exception as e:
|
|
logger.warning("P77 cookies_in_dse failed: %s", e)
|
|
try:
|
|
f = check_jc_clause_in_dse(doc_texts or {})
|
|
if f:
|
|
findings.append(f)
|
|
except Exception as e:
|
|
logger.warning("P78 jc_clause failed: %s", e)
|
|
try:
|
|
f = check_social_embedding(doc_texts or {}, homepage_html)
|
|
if f:
|
|
findings.append(f)
|
|
except Exception as e:
|
|
logger.warning("P36 social_embedding failed: %s", e)
|
|
return findings
|
|
|
|
|
|
def build_signals_block_html(findings: list[dict]) -> str:
|
|
if not findings:
|
|
return ""
|
|
pos = [f for f in findings if f.get("severity") == "INFO"]
|
|
neg = [f for f in findings if f.get("severity") != "INFO"]
|
|
items: list[str] = []
|
|
for f in neg + pos:
|
|
sev = f.get("severity", "MEDIUM")
|
|
if sev == "INFO":
|
|
color = "#16a34a"
|
|
tag = "✓ POSITIV"
|
|
elif sev == "HIGH":
|
|
color = "#dc2626"
|
|
tag = "HOCH"
|
|
else:
|
|
color = "#d97706"
|
|
tag = "MITTEL"
|
|
items.append(
|
|
f'<li style="margin-bottom:8px;font-size:11px;line-height:1.5">'
|
|
f'<strong style="color:{color}">[{tag}] {f.get("label","")}</strong>'
|
|
f'<div style="color:#475569;margin-top:2px">{f.get("detail","")}</div>'
|
|
f'<div style="color:#94a3b8;margin-top:2px;font-style:italic">'
|
|
f'{f.get("legal_basis","")}</div></li>'
|
|
)
|
|
return (
|
|
'<div style="font-family:-apple-system,BlinkMacSystemFont,sans-serif;'
|
|
'max-width:760px;margin:0 auto 16px;padding:12px 16px;'
|
|
'background:#f8fafc;border:1px solid #e2e8f0;border-radius:6px">'
|
|
'<div style="font-size:11px;color:#475569;text-transform:uppercase;'
|
|
'letter-spacing:1.2px;margin-bottom:4px;font-weight:600">'
|
|
'Weitere Textsignale</div>'
|
|
'<ul style="margin:6px 0 0 18px;padding:0">'
|
|
+ "".join(items) +
|
|
'</ul></div>'
|
|
)
|