275bdf9848
Build + Deploy / build-admin-compliance (push) Successful in 1m49s
Build + Deploy / build-backend-compliance (push) Successful in 2m57s
Build + Deploy / build-ai-sdk (push) Successful in 50s
Build + Deploy / build-developer-portal (push) Successful in 1m2s
Build + Deploy / build-tts (push) Successful in 1m23s
Build + Deploy / build-document-crawler (push) Successful in 39s
Build + Deploy / build-dsms-gateway (push) Successful in 23s
Build + Deploy / build-dsms-node (push) Successful in 10s
CI / branch-name (push) Has been skipped
CI / guardrail-integrity (push) Has been skipped
CI / loc-budget (push) Failing after 21s
CI / secret-scan (push) Has been skipped
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / nodejs-build (push) Successful in 2m31s
CI / dep-audit (push) Has been skipped
CI / sbom-scan (push) Has been skipped
CI / test-go (push) Failing after 41s
CI / test-python-backend (push) Successful in 40s
CI / test-python-document-crawler (push) Successful in 25s
CI / test-python-dsms-gateway (push) Successful in 20s
CI / validate-canonical-controls (push) Successful in 13s
Build + Deploy / trigger-orca (push) Successful in 2m46s
These files existed on the feature branch but were never cherry-picked to main, causing ModuleNotFoundError on import: - dse_parser.py — parses DSE HTML into structured sections - dse_matcher.py — matches detected services against DSE sections - mandatory_content_checker.py — checks Art. 13 DSGVO mandatory fields - legal_basis_validator.py — validates legal basis (lit. a-f) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
180 lines
7.4 KiB
Python
180 lines
7.4 KiB
Python
"""
|
|
Legal Basis Validator — checks if the correct DSGVO legal basis (lit. a-f)
|
|
is used for each processing purpose in the privacy policy.
|
|
|
|
⚠️ TECHNISCHE SCHULD / HARDCODED KNOWLEDGE:
|
|
Dieses Modul enthält hartkodierte Rechtsgrundlagen-Zuordnungen (CORRECT_BASIS dict).
|
|
Das ist ein TEMPORAERER Fallback bis die Control Library entsprechende Controls hat.
|
|
|
|
MITTELFRISTIGES ZIEL: Dieses Dict durch RAG/Control-Library-Abfragen ersetzen.
|
|
Neue Controls sollten in der Pipeline generiert werden, z.B.:
|
|
"Cookie-Tracking erfordert Art. 6(1)(a) Einwilligung (EuGH C-673/17 Planet49)"
|
|
→ canonical_controls mit scope_conditions + legal_ref
|
|
|
|
BIS DAHIN: Dieses Dict wird als Fallback genutzt mit einem Warning-Log wenn
|
|
es herangezogen wird. Bei jedem neuen Gesetz/Urteil muss SOWOHL die Pipeline
|
|
als auch dieses Dict aktualisiert werden — oder besser: das Dict entfernen und
|
|
nur noch Controls nutzen.
|
|
|
|
Erstellt: 2026-04-29 | Review-Datum: 2026-07-01 | Owner: Agent-Team
|
|
|
|
Common mistakes detected:
|
|
- Cookie tracking on lit. f (legitimate interest) instead of lit. a (consent)
|
|
- Marketing emails on lit. f instead of lit. a
|
|
- Analytics on lit. b (contract) — incorrect overextension
|
|
- Klarna credit check without Art. 22 reference
|
|
"""
|
|
|
|
import logging
|
|
import re
|
|
from dataclasses import dataclass
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class LitFinding:
|
|
purpose: str
|
|
stated_basis: str
|
|
correct_basis: str
|
|
severity: str
|
|
text: str
|
|
legal_ref: str
|
|
original_text: str = ""
|
|
|
|
|
|
# Purpose → correct legal basis mapping
|
|
# Based on: DSK Kurzpapiere, Planet49 (EuGH C-673/17), BGH Cookie-Urteil
|
|
CORRECT_BASIS: dict[str, dict] = {
|
|
"cookie_tracking": {
|
|
"correct": "lit. a (Einwilligung)",
|
|
"wrong_patterns": ["berechtigtes interesse", "lit. f", "lit.f", "legitimate interest"],
|
|
"detect_patterns": ["cookie", "tracking", "pixel", "analytics.*cookie"],
|
|
"ref": "EuGH C-673/17 (Planet49), §25 TDDDG",
|
|
},
|
|
"web_analytics": {
|
|
"correct": "lit. a (Einwilligung)",
|
|
"wrong_patterns": ["berechtigtes interesse", "lit. f", "lit.f", "vertragserfuellung", "lit. b", "lit.b"],
|
|
"detect_patterns": ["google analytics", "webanalyse", "web analytics", "reichweitenmessung",
|
|
"nutzungsanalyse", "hotjar", "matomo"],
|
|
"ref": "DSK Orientierungshilfe Telemedien, §25 TDDDG",
|
|
},
|
|
"marketing_email": {
|
|
"correct": "lit. a (Einwilligung)",
|
|
"wrong_patterns": ["berechtigtes interesse", "lit. f", "lit.f"],
|
|
"detect_patterns": ["newsletter", "marketing.*mail", "werbe.*mail", "werbe.*email",
|
|
"marketing.*email", "werbliche.*kommunikation"],
|
|
"ref": "Art. 7 DSGVO, §7 UWG (Double Opt-In)",
|
|
},
|
|
"remarketing": {
|
|
"correct": "lit. a (Einwilligung)",
|
|
"wrong_patterns": ["berechtigtes interesse", "lit. f", "lit.f"],
|
|
"detect_patterns": ["remarketing", "retargeting", "personalisierte werbung",
|
|
"personalized advertising", "custom audience"],
|
|
"ref": "§25 TDDDG, EuGH C-673/17",
|
|
},
|
|
"credit_check": {
|
|
"correct": "lit. b/f + Art. 22 DSGVO Hinweis",
|
|
"wrong_patterns": [], # Not about wrong basis, but missing Art. 22
|
|
"detect_patterns": ["bonitaet", "bonität", "kreditprüfung", "kreditpruefung",
|
|
"schufa", "auskunftei", "klarna.*rechnung", "ratenzahlung"],
|
|
"ref": "Art. 22 DSGVO (automatisierte Einzelentscheidung)",
|
|
"must_contain": ["art. 22", "art.22", "automatisierte entscheidung",
|
|
"automated decision", "einzelentscheidung"],
|
|
},
|
|
"social_media_embed": {
|
|
"correct": "lit. a (Einwilligung)",
|
|
"wrong_patterns": ["berechtigtes interesse", "lit. f", "lit.f"],
|
|
"detect_patterns": ["facebook.*plugin", "social.*plugin", "like.*button",
|
|
"share.*button", "instagram.*embed", "twitter.*embed"],
|
|
"ref": "EuGH C-40/17 (Fashion ID), 2-Klick-Loesung",
|
|
},
|
|
"session_recording": {
|
|
"correct": "lit. a (Einwilligung)",
|
|
"wrong_patterns": ["berechtigtes interesse", "lit. f", "lit.f"],
|
|
"detect_patterns": ["session.?recording", "session.?replay", "heatmap",
|
|
"mouseflow", "hotjar.*recording", "clarity.*recording",
|
|
"fullstory", "lucky orange"],
|
|
"ref": "§25 TDDDG, Aufzeichnung von Nutzerverhalten",
|
|
},
|
|
}
|
|
|
|
|
|
def validate_legal_bases(dse_text: str) -> list[LitFinding]:
|
|
"""Check if correct legal bases are used in the privacy policy.
|
|
|
|
⚠️ Uses HARDCODED CORRECT_BASIS dict as fallback.
|
|
TODO: Replace with RAG/Control Library query when lit-mapping Controls exist.
|
|
"""
|
|
logger.warning(
|
|
"legal_basis_validator: Using HARDCODED rules (CORRECT_BASIS dict). "
|
|
"This should be replaced with Control Library queries. Review date: 2026-07-01"
|
|
)
|
|
findings = []
|
|
text_lower = dse_text.lower()
|
|
|
|
for purpose_id, rules in CORRECT_BASIS.items():
|
|
# Step 1: Is this purpose mentioned in the DSE?
|
|
purpose_found = False
|
|
matched_text = ""
|
|
for pattern in rules["detect_patterns"]:
|
|
match = re.search(pattern, text_lower)
|
|
if match:
|
|
purpose_found = True
|
|
# Extract surrounding context (200 chars)
|
|
start = max(0, match.start() - 100)
|
|
end = min(len(text_lower), match.end() + 200)
|
|
matched_text = dse_text[start:end].strip()
|
|
break
|
|
|
|
if not purpose_found:
|
|
continue
|
|
|
|
context_lower = matched_text.lower()
|
|
|
|
# Step 2: Check if wrong legal basis is stated
|
|
for wrong in rules["wrong_patterns"]:
|
|
if wrong in context_lower:
|
|
findings.append(LitFinding(
|
|
purpose=purpose_id,
|
|
stated_basis=wrong,
|
|
correct_basis=rules["correct"],
|
|
severity="HIGH",
|
|
text=f"Falsche Rechtsgrundlage: '{_purpose_label(purpose_id)}' nutzt "
|
|
f"'{wrong}' statt '{rules['correct']}'",
|
|
legal_ref=rules["ref"],
|
|
original_text=matched_text[:300],
|
|
))
|
|
break
|
|
|
|
# Step 3: Special check — must_contain (e.g., Art. 22 for credit checks)
|
|
if "must_contain" in rules:
|
|
has_required = any(req in context_lower for req in rules["must_contain"])
|
|
if not has_required:
|
|
findings.append(LitFinding(
|
|
purpose=purpose_id,
|
|
stated_basis="(fehlt)",
|
|
correct_basis=rules["correct"],
|
|
severity="HIGH",
|
|
text=f"Pflichthinweis fehlt: '{_purpose_label(purpose_id)}' erwaehnt "
|
|
f"keine automatisierte Entscheidungsfindung ({rules['ref']})",
|
|
legal_ref=rules["ref"],
|
|
original_text=matched_text[:300],
|
|
))
|
|
|
|
return findings
|
|
|
|
|
|
def _purpose_label(purpose_id: str) -> str:
|
|
"""German label for purpose ID."""
|
|
labels = {
|
|
"cookie_tracking": "Cookie-Tracking",
|
|
"web_analytics": "Webanalyse",
|
|
"marketing_email": "Marketing-Emails/Newsletter",
|
|
"remarketing": "Remarketing/Retargeting",
|
|
"credit_check": "Bonitaetspruefung",
|
|
"social_media_embed": "Social Media Einbindung",
|
|
"session_recording": "Session Recording/Heatmaps",
|
|
}
|
|
return labels.get(purpose_id, purpose_id)
|