A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.
This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).
Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
370 lines
11 KiB
Python
370 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
DSR Template Import Script
|
|
|
|
Importiert DOCX-Vorlagen aus dem Datenschutz-Ordner und erstellt
|
|
initiale Template-Versionen in der Datenbank.
|
|
|
|
Verwendung:
|
|
cd backend
|
|
source venv/bin/activate
|
|
python scripts/import_dsr_templates.py
|
|
|
|
Voraussetzungen:
|
|
pip install mammoth python-docx httpx
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import re
|
|
import asyncio
|
|
from pathlib import Path
|
|
from typing import Optional, Dict, List, Tuple
|
|
|
|
# Third-party imports
|
|
try:
|
|
import mammoth
|
|
import httpx
|
|
except ImportError:
|
|
print("Bitte installieren Sie die erforderlichen Pakete:")
|
|
print(" pip install mammoth httpx")
|
|
sys.exit(1)
|
|
|
|
# Configuration
|
|
CONSENT_SERVICE_URL = os.getenv("CONSENT_SERVICE_URL", "http://localhost:8081")
|
|
DOCS_PATH = Path(__file__).parent.parent.parent / "docs" / "Datenschutz"
|
|
|
|
# Mapping von DOCX-Dateien zu Template-Typen
|
|
DOCX_TEMPLATE_MAPPING: Dict[str, Dict] = {
|
|
# Eingangsbestätigungen
|
|
"Muster_1_Eingangsbestätigung": {
|
|
"template_type": "dsr_receipt_access",
|
|
"name": "Eingangsbestätigung (Auskunft Art. 15)",
|
|
"request_types": ["access"],
|
|
},
|
|
|
|
# Identitätsprüfung
|
|
"Muster_2_Anfrage Kontaktdaten": {
|
|
"template_type": "dsr_identity_request",
|
|
"name": "Anfrage Kontaktdaten zur Identifikation",
|
|
"request_types": ["access", "rectification", "erasure", "restriction", "portability"],
|
|
},
|
|
"Muster_2_Anfrage Identität": {
|
|
"template_type": "dsr_identity_request",
|
|
"name": "Anfrage Identitätsnachweis",
|
|
"request_types": ["access", "rectification", "erasure", "restriction", "portability"],
|
|
},
|
|
"Muster_3_Anfrage Identität": {
|
|
"template_type": "dsr_identity_request",
|
|
"name": "Anfrage Identitätsnachweis (erweitert)",
|
|
"request_types": ["access", "rectification", "erasure", "restriction", "portability"],
|
|
},
|
|
|
|
# Bearbeitungsbestätigungen
|
|
"Muster_3_Bearbeitungsbestätigung": {
|
|
"template_type": "dsr_processing_started",
|
|
"name": "Bearbeitungsbestätigung",
|
|
"request_types": ["access", "rectification", "erasure", "restriction", "portability"],
|
|
},
|
|
"Muster_4_Bearbeitungsbestätigung": {
|
|
"template_type": "dsr_processing_update",
|
|
"name": "Bearbeitungsupdate",
|
|
"request_types": ["access", "rectification", "erasure", "restriction", "portability"],
|
|
},
|
|
|
|
# Rückfragen
|
|
"Muster_4_Rückfragen Begehren": {
|
|
"template_type": "dsr_clarification_request",
|
|
"name": "Rückfragen zum Begehren",
|
|
"request_types": ["access", "rectification", "erasure", "restriction", "portability"],
|
|
},
|
|
"Muster_5_Rückfragen Umfang": {
|
|
"template_type": "dsr_clarification_request",
|
|
"name": "Rückfragen zum Umfang",
|
|
"request_types": ["access"],
|
|
},
|
|
|
|
# Abschluss - Auskunft
|
|
"Muster_5_ Negativauskunft": {
|
|
"template_type": "dsr_completed_access",
|
|
"name": "Negativauskunft (keine Daten gefunden)",
|
|
"request_types": ["access"],
|
|
"variant": "no_data",
|
|
},
|
|
"Muster_6_ Beauskunftung Art. 15": {
|
|
"template_type": "dsr_completed_access",
|
|
"name": "Beauskunftung nach Art. 15",
|
|
"request_types": ["access"],
|
|
},
|
|
|
|
# Abschluss - Berichtigung
|
|
"Muster_4_Information Berichtigung": {
|
|
"template_type": "dsr_completed_rectification",
|
|
"name": "Information über durchgeführte Berichtigung",
|
|
"request_types": ["rectification"],
|
|
},
|
|
|
|
# Abschluss - Löschung
|
|
"Muster_6_Information Löschung": {
|
|
"template_type": "dsr_completed_erasure",
|
|
"name": "Information über durchgeführte Löschung",
|
|
"request_types": ["erasure"],
|
|
},
|
|
|
|
# Abschluss - Datenübertragbarkeit
|
|
"Muster_5_Information Übermittlung": {
|
|
"template_type": "dsr_completed_portability",
|
|
"name": "Information über Datenübermittlung",
|
|
"request_types": ["portability"],
|
|
},
|
|
|
|
# Drittübermittlung
|
|
"Muster_4_Anfrage Drittübermittlung": {
|
|
"template_type": "dsr_third_party_notification",
|
|
"name": "Anfrage zur Drittübermittlung",
|
|
"request_types": ["rectification", "erasure", "restriction"],
|
|
},
|
|
}
|
|
|
|
# Standard-Platzhalter-Mappings
|
|
PLACEHOLDER_REPLACEMENTS = {
|
|
# Antragsteller
|
|
"[Name]": "{{requester_name}}",
|
|
"[Vorname Name]": "{{requester_name}}",
|
|
"[Anrede]": "{{requester_salutation}}",
|
|
"[E-Mail]": "{{requester_email}}",
|
|
"[Adresse]": "{{requester_address}}",
|
|
|
|
# Anfrage
|
|
"[Vorgangsnummer]": "{{request_number}}",
|
|
"[Aktenzeichen]": "{{request_number}}",
|
|
"[Datum des Eingangs]": "{{request_date}}",
|
|
"[Eingangsdatum]": "{{request_date}}",
|
|
"[Frist]": "{{deadline_date}}",
|
|
"[Fristdatum]": "{{deadline_date}}",
|
|
|
|
# Unternehmen
|
|
"[Firmenname]": "{{company_name}}",
|
|
"[Unternehmen]": "{{company_name}}",
|
|
"[Unternehmensname]": "{{company_name}}",
|
|
"[Firma]": "{{company_name}}",
|
|
|
|
# DSB
|
|
"[DSB Name]": "{{dpo_name}}",
|
|
"[Name DSB]": "{{dpo_name}}",
|
|
"[Datenschutzbeauftragter]": "{{dpo_name}}",
|
|
"[E-Mail DSB]": "{{dpo_email}}",
|
|
"[DSB E-Mail]": "{{dpo_email}}",
|
|
|
|
# Sonstiges
|
|
"[Datum]": "{{current_date}}",
|
|
"[Portal-URL]": "{{portal_url}}",
|
|
}
|
|
|
|
|
|
def convert_docx_to_html(docx_path: Path) -> Tuple[str, List[str]]:
|
|
"""Konvertiert DOCX zu HTML mit mammoth."""
|
|
with open(docx_path, "rb") as f:
|
|
result = mammoth.convert_to_html(f)
|
|
|
|
html = result.value
|
|
messages = [msg.message for msg in result.messages]
|
|
|
|
# Bereinige HTML
|
|
html = html.replace('<p></p>', '')
|
|
html = re.sub(r'\s+', ' ', html)
|
|
|
|
return html, messages
|
|
|
|
|
|
def replace_placeholders(html: str) -> str:
|
|
"""Ersetzt Word-Platzhalter durch Template-Variablen."""
|
|
for placeholder, variable in PLACEHOLDER_REPLACEMENTS.items():
|
|
html = html.replace(placeholder, variable)
|
|
|
|
# Suche nach nicht ersetzten Platzhaltern
|
|
remaining = re.findall(r'\[([^\]]+)\]', html)
|
|
if remaining:
|
|
print(f" Warnung: Nicht ersetzte Platzhalter gefunden: {remaining}")
|
|
|
|
return html
|
|
|
|
|
|
def html_to_text(html: str) -> str:
|
|
"""Konvertiert HTML zu Plain-Text."""
|
|
# Entferne HTML-Tags
|
|
text = re.sub(r'<br\s*/?>', '\n', html)
|
|
text = re.sub(r'</p>', '\n\n', text)
|
|
text = re.sub(r'<[^>]+>', '', text)
|
|
|
|
# Entferne übermäßige Leerzeilen
|
|
text = re.sub(r'\n{3,}', '\n\n', text)
|
|
|
|
# Decode HTML entities
|
|
text = text.replace(' ', ' ')
|
|
text = text.replace('&', '&')
|
|
text = text.replace('<', '<')
|
|
text = text.replace('>', '>')
|
|
text = text.replace('"', '"')
|
|
|
|
return text.strip()
|
|
|
|
|
|
def find_matching_template(filename: str) -> Optional[Dict]:
|
|
"""Findet das passende Template-Mapping für eine Datei."""
|
|
for pattern, mapping in DOCX_TEMPLATE_MAPPING.items():
|
|
if pattern in filename:
|
|
return mapping
|
|
return None
|
|
|
|
|
|
async def get_template_id(template_type: str) -> Optional[str]:
|
|
"""Holt die Template-ID aus der Datenbank."""
|
|
async with httpx.AsyncClient() as client:
|
|
try:
|
|
# Simuliere Admin-Token (für lokale Entwicklung)
|
|
headers = {
|
|
"Authorization": "Bearer dev-admin-token",
|
|
"Content-Type": "application/json"
|
|
}
|
|
|
|
response = await client.get(
|
|
f"{CONSENT_SERVICE_URL}/api/v1/admin/dsr-templates",
|
|
headers=headers,
|
|
timeout=10.0
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
data = response.json()
|
|
templates = data.get("templates", [])
|
|
for t in templates:
|
|
if t.get("template_type") == template_type:
|
|
return t.get("id")
|
|
except Exception as e:
|
|
print(f" Fehler beim Abrufen der Templates: {e}")
|
|
|
|
return None
|
|
|
|
|
|
async def create_template_version(
|
|
template_id: str,
|
|
version: str,
|
|
subject: str,
|
|
body_html: str,
|
|
body_text: str
|
|
) -> bool:
|
|
"""Erstellt eine neue Template-Version."""
|
|
async with httpx.AsyncClient() as client:
|
|
try:
|
|
headers = {
|
|
"Authorization": "Bearer dev-admin-token",
|
|
"Content-Type": "application/json"
|
|
}
|
|
|
|
response = await client.post(
|
|
f"{CONSENT_SERVICE_URL}/api/v1/admin/dsr-templates/{template_id}/versions",
|
|
headers=headers,
|
|
json={
|
|
"version": version,
|
|
"language": "de",
|
|
"subject": subject,
|
|
"body_html": body_html,
|
|
"body_text": body_text
|
|
},
|
|
timeout=30.0
|
|
)
|
|
|
|
return response.status_code in (200, 201)
|
|
except Exception as e:
|
|
print(f" Fehler beim Erstellen der Version: {e}")
|
|
return False
|
|
|
|
|
|
async def process_docx_file(docx_path: Path) -> bool:
|
|
"""Verarbeitet eine einzelne DOCX-Datei."""
|
|
filename = docx_path.stem
|
|
print(f"\nVerarbeite: {filename}")
|
|
|
|
# Finde passendes Template
|
|
mapping = find_matching_template(filename)
|
|
if not mapping:
|
|
print(f" Übersprungen: Kein Mapping gefunden")
|
|
return False
|
|
|
|
template_type = mapping["template_type"]
|
|
print(f" Template-Typ: {template_type}")
|
|
|
|
# Konvertiere DOCX zu HTML
|
|
try:
|
|
html, warnings = convert_docx_to_html(docx_path)
|
|
if warnings:
|
|
print(f" Warnungen: {warnings[:3]}") # Zeige max. 3 Warnungen
|
|
except Exception as e:
|
|
print(f" Fehler bei der Konvertierung: {e}")
|
|
return False
|
|
|
|
# Ersetze Platzhalter
|
|
html = replace_placeholders(html)
|
|
text = html_to_text(html)
|
|
|
|
# Extrahiere Versionsnummer aus Dateinamen
|
|
version_match = re.search(r'_v(\d+)', filename)
|
|
version = f"1.{version_match.group(1)}.0" if version_match else "1.0.0"
|
|
|
|
# Generiere Betreff
|
|
subject = mapping["name"]
|
|
if "{{request_number}}" not in subject:
|
|
subject = f"{subject} - {{{{request_number}}}}"
|
|
|
|
print(f" Version: {version}")
|
|
print(f" Betreff: {subject}")
|
|
print(f" HTML-Länge: {len(html)} Zeichen")
|
|
|
|
# Speichere als lokale Datei für manuelle Überprüfung
|
|
output_dir = DOCS_PATH / "converted"
|
|
output_dir.mkdir(exist_ok=True)
|
|
|
|
with open(output_dir / f"{template_type}_{version}.html", "w", encoding="utf-8") as f:
|
|
f.write(html)
|
|
with open(output_dir / f"{template_type}_{version}.txt", "w", encoding="utf-8") as f:
|
|
f.write(text)
|
|
|
|
print(f" Gespeichert in: {output_dir}")
|
|
|
|
return True
|
|
|
|
|
|
async def main():
|
|
"""Hauptfunktion."""
|
|
print("=" * 60)
|
|
print("DSR Template Import Script")
|
|
print("=" * 60)
|
|
|
|
if not DOCS_PATH.exists():
|
|
print(f"Fehler: Verzeichnis nicht gefunden: {DOCS_PATH}")
|
|
sys.exit(1)
|
|
|
|
# Finde alle DOCX-Dateien
|
|
docx_files = list(DOCS_PATH.glob("*.docx"))
|
|
print(f"\nGefunden: {len(docx_files)} DOCX-Dateien")
|
|
|
|
# Verarbeite jede Datei
|
|
success_count = 0
|
|
for docx_path in sorted(docx_files):
|
|
if await process_docx_file(docx_path):
|
|
success_count += 1
|
|
|
|
print("\n" + "=" * 60)
|
|
print(f"Verarbeitet: {success_count}/{len(docx_files)} Dateien")
|
|
print("=" * 60)
|
|
|
|
print("\nHinweis: Die konvertierten Vorlagen wurden im Ordner")
|
|
print(f" {DOCS_PATH / 'converted'}")
|
|
print("gespeichert und können manuell überprüft werden.")
|
|
print("\nUm die Vorlagen in die Datenbank zu laden, verwenden Sie")
|
|
print("das Admin-Panel unter /app → Consent Admin → Betroffenenanfragen")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|