feat: Phase 9 — Authenticated Testing + Legal Basis Validator (lit. mapping)
Phase 9: Playwright login + 5 post-login checks: - §312k BGB: Kündigungsbutton (2 Klicks) - Art. 17 DSGVO: Konto löschen - Art. 20 DSGVO: Daten exportieren - Art. 7(3): Einwilligungen widerrufen - Art. 15: Profildaten einsehen Auto-detects login form selectors. Credentials destroyed after test. Legal Basis Validator: Checks 7 common lit-mapping mistakes: - Cookie tracking on lit. f instead of lit. a (Planet49) - Analytics on lit. b (contract overextension) - Klarna without Art. 22 reference - Session recording without consent Integrated into website scan pipeline. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -21,6 +21,7 @@ from compliance.services.dse_matcher import build_text_references, TextReference
|
||||
from compliance.services.mandatory_content_checker import (
|
||||
check_mandatory_documents, check_dse_mandatory_content, MandatoryFinding,
|
||||
)
|
||||
from compliance.services.legal_basis_validator import validate_legal_bases
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -132,6 +133,22 @@ async def scan_website_endpoint(req: ScanRequest):
|
||||
text=f"{mf.text}" + (f" — {mf.suggestion}" if mf.suggestion else ""),
|
||||
))
|
||||
|
||||
# Step 8b: Validate legal bases (lit. a-f) in DSE
|
||||
if dse_text:
|
||||
lit_findings = validate_legal_bases(dse_text)
|
||||
for lf in lit_findings:
|
||||
findings.append(ScanFinding(
|
||||
code=f"LIT-{lf.purpose.upper()}",
|
||||
severity=lf.severity,
|
||||
text=lf.text,
|
||||
text_reference=TextReferenceModel(
|
||||
found=True, source_url=req.url,
|
||||
original_text=lf.original_text,
|
||||
issue="incorrect", correction_type="replace",
|
||||
correction_text=f"Korrekte Rechtsgrundlage: {lf.correct_basis} ({lf.legal_ref})",
|
||||
) if lf.original_text else None,
|
||||
))
|
||||
|
||||
# Step 9: Generate corrections for pre-launch mode
|
||||
if not is_live and findings:
|
||||
await _add_corrections(findings, dse_text)
|
||||
|
||||
@@ -0,0 +1,155 @@
|
||||
"""
|
||||
Legal Basis Validator — checks if the correct DSGVO legal basis (lit. a-f)
|
||||
is used for each processing purpose in the privacy policy.
|
||||
|
||||
Common mistakes:
|
||||
- Cookie tracking on lit. f (legitimate interest) instead of lit. a (consent)
|
||||
- Marketing emails on lit. f instead of lit. a
|
||||
- Analytics on lit. b (contract) — incorrect overextension
|
||||
- Klarna credit check without Art. 22 reference
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class LitFinding:
|
||||
purpose: str
|
||||
stated_basis: str
|
||||
correct_basis: str
|
||||
severity: str
|
||||
text: str
|
||||
legal_ref: str
|
||||
original_text: str = ""
|
||||
|
||||
|
||||
# Purpose → correct legal basis mapping
|
||||
# Based on: DSK Kurzpapiere, Planet49 (EuGH C-673/17), BGH Cookie-Urteil
|
||||
CORRECT_BASIS: dict[str, dict] = {
|
||||
"cookie_tracking": {
|
||||
"correct": "lit. a (Einwilligung)",
|
||||
"wrong_patterns": ["berechtigtes interesse", "lit. f", "lit.f", "legitimate interest"],
|
||||
"detect_patterns": ["cookie", "tracking", "pixel", "analytics.*cookie"],
|
||||
"ref": "EuGH C-673/17 (Planet49), §25 TDDDG",
|
||||
},
|
||||
"web_analytics": {
|
||||
"correct": "lit. a (Einwilligung)",
|
||||
"wrong_patterns": ["berechtigtes interesse", "lit. f", "lit.f", "vertragserfuellung", "lit. b", "lit.b"],
|
||||
"detect_patterns": ["google analytics", "webanalyse", "web analytics", "reichweitenmessung",
|
||||
"nutzungsanalyse", "hotjar", "matomo"],
|
||||
"ref": "DSK Orientierungshilfe Telemedien, §25 TDDDG",
|
||||
},
|
||||
"marketing_email": {
|
||||
"correct": "lit. a (Einwilligung)",
|
||||
"wrong_patterns": ["berechtigtes interesse", "lit. f", "lit.f"],
|
||||
"detect_patterns": ["newsletter", "marketing.*mail", "werbe.*mail", "werbe.*email",
|
||||
"marketing.*email", "werbliche.*kommunikation"],
|
||||
"ref": "Art. 7 DSGVO, §7 UWG (Double Opt-In)",
|
||||
},
|
||||
"remarketing": {
|
||||
"correct": "lit. a (Einwilligung)",
|
||||
"wrong_patterns": ["berechtigtes interesse", "lit. f", "lit.f"],
|
||||
"detect_patterns": ["remarketing", "retargeting", "personalisierte werbung",
|
||||
"personalized advertising", "custom audience"],
|
||||
"ref": "§25 TDDDG, EuGH C-673/17",
|
||||
},
|
||||
"credit_check": {
|
||||
"correct": "lit. b/f + Art. 22 DSGVO Hinweis",
|
||||
"wrong_patterns": [], # Not about wrong basis, but missing Art. 22
|
||||
"detect_patterns": ["bonitaet", "bonität", "kreditprüfung", "kreditpruefung",
|
||||
"schufa", "auskunftei", "klarna.*rechnung", "ratenzahlung"],
|
||||
"ref": "Art. 22 DSGVO (automatisierte Einzelentscheidung)",
|
||||
"must_contain": ["art. 22", "art.22", "automatisierte entscheidung",
|
||||
"automated decision", "einzelentscheidung"],
|
||||
},
|
||||
"social_media_embed": {
|
||||
"correct": "lit. a (Einwilligung)",
|
||||
"wrong_patterns": ["berechtigtes interesse", "lit. f", "lit.f"],
|
||||
"detect_patterns": ["facebook.*plugin", "social.*plugin", "like.*button",
|
||||
"share.*button", "instagram.*embed", "twitter.*embed"],
|
||||
"ref": "EuGH C-40/17 (Fashion ID), 2-Klick-Loesung",
|
||||
},
|
||||
"session_recording": {
|
||||
"correct": "lit. a (Einwilligung)",
|
||||
"wrong_patterns": ["berechtigtes interesse", "lit. f", "lit.f"],
|
||||
"detect_patterns": ["session.?recording", "session.?replay", "heatmap",
|
||||
"mouseflow", "hotjar.*recording", "clarity.*recording",
|
||||
"fullstory", "lucky orange"],
|
||||
"ref": "§25 TDDDG, Aufzeichnung von Nutzerverhalten",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def validate_legal_bases(dse_text: str) -> list[LitFinding]:
|
||||
"""Check if correct legal bases are used in the privacy policy."""
|
||||
findings = []
|
||||
text_lower = dse_text.lower()
|
||||
|
||||
for purpose_id, rules in CORRECT_BASIS.items():
|
||||
# Step 1: Is this purpose mentioned in the DSE?
|
||||
purpose_found = False
|
||||
matched_text = ""
|
||||
for pattern in rules["detect_patterns"]:
|
||||
match = re.search(pattern, text_lower)
|
||||
if match:
|
||||
purpose_found = True
|
||||
# Extract surrounding context (200 chars)
|
||||
start = max(0, match.start() - 100)
|
||||
end = min(len(text_lower), match.end() + 200)
|
||||
matched_text = dse_text[start:end].strip()
|
||||
break
|
||||
|
||||
if not purpose_found:
|
||||
continue
|
||||
|
||||
context_lower = matched_text.lower()
|
||||
|
||||
# Step 2: Check if wrong legal basis is stated
|
||||
for wrong in rules["wrong_patterns"]:
|
||||
if wrong in context_lower:
|
||||
findings.append(LitFinding(
|
||||
purpose=purpose_id,
|
||||
stated_basis=wrong,
|
||||
correct_basis=rules["correct"],
|
||||
severity="HIGH",
|
||||
text=f"Falsche Rechtsgrundlage: '{_purpose_label(purpose_id)}' nutzt "
|
||||
f"'{wrong}' statt '{rules['correct']}'",
|
||||
legal_ref=rules["ref"],
|
||||
original_text=matched_text[:300],
|
||||
))
|
||||
break
|
||||
|
||||
# Step 3: Special check — must_contain (e.g., Art. 22 for credit checks)
|
||||
if "must_contain" in rules:
|
||||
has_required = any(req in context_lower for req in rules["must_contain"])
|
||||
if not has_required:
|
||||
findings.append(LitFinding(
|
||||
purpose=purpose_id,
|
||||
stated_basis="(fehlt)",
|
||||
correct_basis=rules["correct"],
|
||||
severity="HIGH",
|
||||
text=f"Pflichthinweis fehlt: '{_purpose_label(purpose_id)}' erwaehnt "
|
||||
f"keine automatisierte Entscheidungsfindung ({rules['ref']})",
|
||||
legal_ref=rules["ref"],
|
||||
original_text=matched_text[:300],
|
||||
))
|
||||
|
||||
return findings
|
||||
|
||||
|
||||
def _purpose_label(purpose_id: str) -> str:
|
||||
"""German label for purpose ID."""
|
||||
labels = {
|
||||
"cookie_tracking": "Cookie-Tracking",
|
||||
"web_analytics": "Webanalyse",
|
||||
"marketing_email": "Marketing-Emails/Newsletter",
|
||||
"remarketing": "Remarketing/Retargeting",
|
||||
"credit_check": "Bonitaetspruefung",
|
||||
"social_media_embed": "Social Media Einbindung",
|
||||
"session_recording": "Session Recording/Heatmaps",
|
||||
}
|
||||
return labels.get(purpose_id, purpose_id)
|
||||
Reference in New Issue
Block a user