This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/backend/gdpr_export_service.py
BreakPilot Dev 19855efacc
Some checks failed
Tests / Go Tests (push) Has been cancelled
Tests / Python Tests (push) Has been cancelled
Tests / Integration Tests (push) Has been cancelled
Tests / Go Lint (push) Has been cancelled
Tests / Python Lint (push) Has been cancelled
Tests / Security Scan (push) Has been cancelled
Tests / All Checks Passed (push) Has been cancelled
Security Scanning / Secret Scanning (push) Has been cancelled
Security Scanning / Dependency Vulnerability Scan (push) Has been cancelled
Security Scanning / Go Security Scan (push) Has been cancelled
Security Scanning / Python Security Scan (push) Has been cancelled
Security Scanning / Node.js Security Scan (push) Has been cancelled
Security Scanning / Docker Image Security (push) Has been cancelled
Security Scanning / Security Summary (push) Has been cancelled
CI/CD Pipeline / Go Tests (push) Has been cancelled
CI/CD Pipeline / Python Tests (push) Has been cancelled
CI/CD Pipeline / Website Tests (push) Has been cancelled
CI/CD Pipeline / Linting (push) Has been cancelled
CI/CD Pipeline / Security Scan (push) Has been cancelled
CI/CD Pipeline / Docker Build & Push (push) Has been cancelled
CI/CD Pipeline / Integration Tests (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / CI Summary (push) Has been cancelled
ci/woodpecker/manual/build-ci-image Pipeline was successful
ci/woodpecker/manual/main Pipeline failed
feat: BreakPilot PWA - Full codebase (clean push without large binaries)
All services: admin-v2, studio-v2, website, ai-compliance-sdk,
consent-service, klausur-service, voice-service, and infrastructure.
Large PDFs and compiled binaries excluded via .gitignore.
2026-02-11 13:25:58 +01:00

461 lines
16 KiB
Python

"""
GDPR Export Service für BreakPilot
Generiert PDF-Datenauskunft gemäß DSGVO Art. 15
Datenkategorien mit Löschfristen:
- Stammdaten: Account-Löschung + 30 Tage
- Einwilligungen: 3 Jahre nach Widerruf/Ablauf
- IP-Adressen: 4 Wochen
- Session-Daten: Nach Sitzungsende
- Audit-Log (personenbezogen): 3 Jahre
- Analytics (Opt-in): 26 Monate
- Marketing (Opt-in): 12 Monate
"""
import os
import io
import uuid
import httpx
from datetime import datetime
from typing import Optional, Dict, Any, List
from pathlib import Path
from jinja2 import Environment, FileSystemLoader, select_autoescape
# WeasyPrint für PDF-Generierung
# WeasyPrint benötigt System-Libraries (GTK/Pango/Cairo)
# Falls nicht verfügbar, wird nur HTML-Export unterstützt
WEASYPRINT_AVAILABLE = False
HTML = None
CSS = None
try:
from weasyprint import HTML, CSS
WEASYPRINT_AVAILABLE = True
except (ImportError, OSError) as e:
# ImportError: weasyprint nicht installiert
# OSError: System-Libraries fehlen (libgobject, etc.)
print(f"WeasyPrint nicht verfügbar: {e}")
print("PDF-Export deaktiviert. HTML-Export ist weiterhin möglich.")
WEASYPRINT_AVAILABLE = False
# Consent Service URL
CONSENT_SERVICE_URL = os.getenv("CONSENT_SERVICE_URL", "http://localhost:8081")
class GDPRExportService:
"""Service für DSGVO-konforme Datenexporte als PDF"""
def __init__(self, template_dir: str = None):
"""
Initialisiert den Export Service.
Args:
template_dir: Pfad zum Templates-Verzeichnis
"""
if template_dir is None:
template_dir = str(Path(__file__).parent / "templates" / "gdpr")
self.template_dir = template_dir
self.jinja_env = Environment(
loader=FileSystemLoader(template_dir),
autoescape=select_autoescape(['html', 'xml'])
)
# Custom Filter registrieren
self.jinja_env.filters['format_datetime'] = self._format_datetime
self.jinja_env.filters['translate_action'] = self._translate_action
@staticmethod
def _format_datetime(value: Optional[str]) -> str:
"""Formatiert ISO-Datetime für Anzeige"""
if not value:
return "-"
try:
if isinstance(value, str):
# ISO Format: 2024-01-15T10:30:00Z
dt = datetime.fromisoformat(value.replace('Z', '+00:00'))
else:
dt = value
return dt.strftime("%d.%m.%Y %H:%M")
except (ValueError, AttributeError):
return str(value) if value else "-"
@staticmethod
def _translate_action(action: str) -> str:
"""Übersetzt Audit-Log Aktionen ins Deutsche"""
translations = {
"login": "Anmeldung",
"logout": "Abmeldung",
"register": "Registrierung",
"consent_given": "Einwilligung erteilt",
"consent_withdrawn": "Einwilligung widerrufen",
"cookie_consent_updated": "Cookie-Präferenzen aktualisiert",
"password_changed": "Passwort geändert",
"password_reset_requested": "Passwort-Reset angefordert",
"password_reset_completed": "Passwort zurückgesetzt",
"email_verified": "E-Mail verifiziert",
"profile_updated": "Profil aktualisiert",
"data_export_requested": "Datenexport angefordert",
"data_deletion_requested": "Datenlöschung angefordert",
"session_created": "Sitzung gestartet",
"session_revoked": "Sitzung beendet",
"version_published": "Version veröffentlicht",
"version_approved": "Version genehmigt",
"version_rejected": "Version abgelehnt",
}
return translations.get(action, action)
async def get_user_data(self, token: str) -> Dict[str, Any]:
"""
Holt alle Nutzerdaten vom Consent Service.
Args:
token: JWT Token des Nutzers
Returns:
Dictionary mit allen Nutzerdaten
"""
headers = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/json"
}
user_data = {
"user": {},
"consents": [],
"cookie_consents": [],
"audit_logs": [],
"sessions": []
}
async with httpx.AsyncClient() as client:
# Profildaten
try:
profile_resp = await client.get(
f"{CONSENT_SERVICE_URL}/api/v1/profile",
headers=headers,
timeout=10.0
)
if profile_resp.status_code == 200:
user_data["user"] = profile_resp.json()
except Exception as e:
print(f"Error fetching profile: {e}")
# Einwilligungen
try:
consents_resp = await client.get(
f"{CONSENT_SERVICE_URL}/api/v1/consent/my",
headers=headers,
timeout=10.0
)
if consents_resp.status_code == 200:
data = consents_resp.json()
user_data["consents"] = data.get("consents", data if isinstance(data, list) else [])
except Exception as e:
print(f"Error fetching consents: {e}")
# Cookie-Präferenzen
try:
cookies_resp = await client.get(
f"{CONSENT_SERVICE_URL}/api/v1/cookies/consent/my",
headers=headers,
timeout=10.0
)
if cookies_resp.status_code == 200:
data = cookies_resp.json()
user_data["cookie_consents"] = data.get("consents", data if isinstance(data, list) else [])
except Exception as e:
print(f"Error fetching cookie consents: {e}")
# Meine Daten (GDPR endpoint)
try:
my_data_resp = await client.get(
f"{CONSENT_SERVICE_URL}/api/v1/privacy/my-data",
headers=headers,
timeout=10.0
)
if my_data_resp.status_code == 200:
my_data = my_data_resp.json()
# Merge additional data
if "audit_log" in my_data:
user_data["audit_logs"] = my_data["audit_log"]
if "sessions" in my_data:
user_data["sessions"] = my_data["sessions"]
if "user" in my_data and not user_data["user"]:
user_data["user"] = my_data["user"]
except Exception as e:
print(f"Error fetching my-data: {e}")
# Aktive Sessions
try:
sessions_resp = await client.get(
f"{CONSENT_SERVICE_URL}/api/v1/profile/sessions",
headers=headers,
timeout=10.0
)
if sessions_resp.status_code == 200:
data = sessions_resp.json()
if not user_data["sessions"]:
user_data["sessions"] = data.get("sessions", data if isinstance(data, list) else [])
except Exception as e:
print(f"Error fetching sessions: {e}")
return user_data
def render_html(self, data: Dict[str, Any]) -> str:
"""
Rendert das HTML-Template mit den Nutzerdaten.
Args:
data: Nutzerdaten Dictionary
Returns:
Gerendertes HTML
"""
template = self.jinja_env.get_template("gdpr_export.html")
# Kontext für das Template vorbereiten
context = {
"export_date": datetime.now().strftime("%d.%m.%Y %H:%M"),
"document_id": f"GDPR-{uuid.uuid4().hex[:8].upper()}",
"user": data.get("user", {}),
"consents": data.get("consents", []),
"cookie_consents": data.get("cookie_consents", []),
"audit_logs": data.get("audit_logs", []),
# Company info (kann später aus Config kommen)
"company_name": "BreakPilot GmbH",
"company_address": "Musterstraße 1",
"company_city": "12345 Musterstadt",
"dpo_name": "Datenschutzbeauftragter",
"dpo_email": "datenschutz@breakpilot.app"
}
return template.render(**context)
def generate_pdf(self, html_content: str) -> bytes:
"""
Konvertiert HTML zu PDF mit WeasyPrint.
Args:
html_content: Gerendertes HTML
Returns:
PDF als Bytes
Raises:
RuntimeError: Wenn WeasyPrint nicht verfügbar ist
"""
if not WEASYPRINT_AVAILABLE:
raise RuntimeError(
"WeasyPrint ist nicht installiert. "
"Bitte installieren Sie: pip install weasyprint"
)
# PDF generieren
html = HTML(string=html_content, base_url=self.template_dir)
pdf_buffer = io.BytesIO()
html.write_pdf(pdf_buffer)
return pdf_buffer.getvalue()
async def generate_user_data_pdf(self, token: str) -> bytes:
"""
Komplette Pipeline: Daten holen, HTML rendern, PDF generieren.
Args:
token: JWT Token des Nutzers
Returns:
PDF als Bytes
"""
# 1. Nutzerdaten abrufen
user_data = await self.get_user_data(token)
# 2. HTML rendern
html_content = self.render_html(user_data)
# 3. PDF generieren
pdf_bytes = self.generate_pdf(html_content)
return pdf_bytes
async def generate_user_data_html(self, token: str) -> str:
"""
Generiert nur HTML (für Preview oder wenn PDF nicht verfügbar).
Args:
token: JWT Token des Nutzers
Returns:
Gerendertes HTML
"""
user_data = await self.get_user_data(token)
return self.render_html(user_data)
# Datenkategorien und Löschfristen (für API-Response)
DATA_RETENTION_POLICIES = [
{
"category": "stammdaten",
"name_de": "Stammdaten",
"name_en": "Master Data",
"description_de": "Name, E-Mail-Adresse, Kontoinformationen",
"description_en": "Name, email address, account information",
"retention_period": "Account-Löschung + 30 Tage",
"retention_days": None, # Abhängig von Account-Löschung
"legal_basis": "Vertragserfüllung (Art. 6 Abs. 1 lit. b DSGVO)",
"is_essential": True
},
{
"category": "consent_records",
"name_de": "Einwilligungen",
"name_en": "Consent Records",
"description_de": "Consent-Entscheidungen, Dokumentversionen",
"description_en": "Consent decisions, document versions",
"retention_period": "3 Jahre nach Widerruf/Ablauf",
"retention_days": 1095, # 3 Jahre
"legal_basis": "Gesetzliche Nachweispflicht (§ 7a UWG)",
"is_essential": True
},
{
"category": "ip_addresses",
"name_de": "IP-Adressen",
"name_en": "IP Addresses",
"description_de": "Technische Protokollierung bei Aktionen",
"description_en": "Technical logging during actions",
"retention_period": "4 Wochen",
"retention_days": 28,
"legal_basis": "Berechtigtes Interesse (Art. 6 Abs. 1 lit. f DSGVO)",
"is_essential": True
},
{
"category": "session_data",
"name_de": "Session-Daten",
"name_en": "Session Data",
"description_de": "Login-Tokens, Sitzungsinformationen",
"description_en": "Login tokens, session information",
"retention_period": "Nach Sitzungsende oder 24h Inaktivität",
"retention_days": 1,
"legal_basis": "Vertragserfüllung (Art. 6 Abs. 1 lit. b DSGVO)",
"is_essential": True
},
{
"category": "audit_log",
"name_de": "Audit-Log",
"name_en": "Audit Log",
"description_de": "Protokoll aller datenschutzrelevanten Aktionen",
"description_en": "Log of all privacy-relevant actions",
"retention_period": "3 Jahre (personenbezogen)",
"retention_days": 1095,
"legal_basis": "Berechtigtes Interesse / Compliance",
"is_essential": True
},
{
"category": "password_reset_tokens",
"name_de": "Passwort-Reset-Tokens",
"name_en": "Password Reset Tokens",
"description_de": "Temporäre Tokens für Passwort-Zurücksetzung",
"description_en": "Temporary tokens for password reset",
"retention_period": "24 Stunden oder nach Nutzung",
"retention_days": 1,
"legal_basis": "Vertragserfüllung",
"is_essential": True
},
{
"category": "email_verification_tokens",
"name_de": "E-Mail-Verifikations-Tokens",
"name_en": "Email Verification Tokens",
"description_de": "Tokens für E-Mail-Bestätigung",
"description_en": "Tokens for email confirmation",
"retention_period": "7 Tage oder nach Nutzung",
"retention_days": 7,
"legal_basis": "Vertragserfüllung",
"is_essential": True
},
{
"category": "analytics",
"name_de": "Analytics-Daten",
"name_en": "Analytics Data",
"description_de": "Nutzungsstatistiken (nur bei Zustimmung)",
"description_en": "Usage statistics (only with consent)",
"retention_period": "26 Monate",
"retention_days": 790,
"legal_basis": "Einwilligung (Art. 6 Abs. 1 lit. a DSGVO)",
"is_essential": False,
"cookie_category": "analytics"
},
{
"category": "marketing",
"name_de": "Marketing-Daten",
"name_en": "Marketing Data",
"description_de": "Werbe-Identifier (nur bei Zustimmung)",
"description_en": "Advertising identifiers (only with consent)",
"retention_period": "12 Monate",
"retention_days": 365,
"legal_basis": "Einwilligung (Art. 6 Abs. 1 lit. a DSGVO)",
"is_essential": False,
"cookie_category": "marketing"
},
{
"category": "functional",
"name_de": "Funktionale Daten",
"name_en": "Functional Data",
"description_de": "Personalisierung, Präferenzen (bei Zustimmung)",
"description_en": "Personalization, preferences (with consent)",
"retention_period": "6 Monate",
"retention_days": 180,
"legal_basis": "Einwilligung (Art. 6 Abs. 1 lit. a DSGVO)",
"is_essential": False,
"cookie_category": "functional"
},
{
"category": "export_requests",
"name_de": "Export-Anfragen",
"name_en": "Export Requests",
"description_de": "Anträge auf Datenauskunft",
"description_en": "Data access requests",
"retention_period": "30 Tage nach Abschluss",
"retention_days": 30,
"legal_basis": "Vertragserfüllung / DSGVO Art. 15",
"is_essential": True
},
{
"category": "deletion_requests",
"name_de": "Lösch-Anfragen",
"name_en": "Deletion Requests",
"description_de": "Anträge auf Datenlöschung (anonymisiert)",
"description_en": "Data deletion requests (anonymized)",
"retention_period": "3 Jahre (anonymisiert)",
"retention_days": 1095,
"legal_basis": "Nachweis der Löschung",
"is_essential": True
},
{
"category": "notifications",
"name_de": "Benachrichtigungen",
"name_en": "Notifications",
"description_de": "System-Benachrichtigungen an den Nutzer",
"description_en": "System notifications to user",
"retention_period": "90 Tage nach Lesen",
"retention_days": 90,
"legal_basis": "Vertragserfüllung",
"is_essential": True
}
]
def get_data_retention_policies() -> List[Dict[str, Any]]:
"""Gibt alle Datenkategorien mit Löschfristen zurück"""
return DATA_RETENTION_POLICIES
def get_essential_data_categories() -> List[Dict[str, Any]]:
"""Gibt nur essenzielle Datenkategorien zurück"""
return [p for p in DATA_RETENTION_POLICIES if p.get("is_essential", True)]
def get_optional_data_categories() -> List[Dict[str, Any]]:
"""Gibt optionale (opt-in) Datenkategorien zurück"""
return [p for p in DATA_RETENTION_POLICIES if not p.get("is_essential", True)]