This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/backend/scripts/import_dsr_templates.py
Benjamin Admin 21a844cb8a fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.

This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).

Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00

370 lines
11 KiB
Python

#!/usr/bin/env python3
"""
DSR Template Import Script
Importiert DOCX-Vorlagen aus dem Datenschutz-Ordner und erstellt
initiale Template-Versionen in der Datenbank.
Verwendung:
cd backend
source venv/bin/activate
python scripts/import_dsr_templates.py
Voraussetzungen:
pip install mammoth python-docx httpx
"""
import os
import sys
import re
import asyncio
from pathlib import Path
from typing import Optional, Dict, List, Tuple
# Third-party imports
try:
import mammoth
import httpx
except ImportError:
print("Bitte installieren Sie die erforderlichen Pakete:")
print(" pip install mammoth httpx")
sys.exit(1)
# Configuration
CONSENT_SERVICE_URL = os.getenv("CONSENT_SERVICE_URL", "http://localhost:8081")
DOCS_PATH = Path(__file__).parent.parent.parent / "docs" / "Datenschutz"
# Mapping von DOCX-Dateien zu Template-Typen
DOCX_TEMPLATE_MAPPING: Dict[str, Dict] = {
# Eingangsbestätigungen
"Muster_1_Eingangsbestätigung": {
"template_type": "dsr_receipt_access",
"name": "Eingangsbestätigung (Auskunft Art. 15)",
"request_types": ["access"],
},
# Identitätsprüfung
"Muster_2_Anfrage Kontaktdaten": {
"template_type": "dsr_identity_request",
"name": "Anfrage Kontaktdaten zur Identifikation",
"request_types": ["access", "rectification", "erasure", "restriction", "portability"],
},
"Muster_2_Anfrage Identität": {
"template_type": "dsr_identity_request",
"name": "Anfrage Identitätsnachweis",
"request_types": ["access", "rectification", "erasure", "restriction", "portability"],
},
"Muster_3_Anfrage Identität": {
"template_type": "dsr_identity_request",
"name": "Anfrage Identitätsnachweis (erweitert)",
"request_types": ["access", "rectification", "erasure", "restriction", "portability"],
},
# Bearbeitungsbestätigungen
"Muster_3_Bearbeitungsbestätigung": {
"template_type": "dsr_processing_started",
"name": "Bearbeitungsbestätigung",
"request_types": ["access", "rectification", "erasure", "restriction", "portability"],
},
"Muster_4_Bearbeitungsbestätigung": {
"template_type": "dsr_processing_update",
"name": "Bearbeitungsupdate",
"request_types": ["access", "rectification", "erasure", "restriction", "portability"],
},
# Rückfragen
"Muster_4_Rückfragen Begehren": {
"template_type": "dsr_clarification_request",
"name": "Rückfragen zum Begehren",
"request_types": ["access", "rectification", "erasure", "restriction", "portability"],
},
"Muster_5_Rückfragen Umfang": {
"template_type": "dsr_clarification_request",
"name": "Rückfragen zum Umfang",
"request_types": ["access"],
},
# Abschluss - Auskunft
"Muster_5_ Negativauskunft": {
"template_type": "dsr_completed_access",
"name": "Negativauskunft (keine Daten gefunden)",
"request_types": ["access"],
"variant": "no_data",
},
"Muster_6_ Beauskunftung Art. 15": {
"template_type": "dsr_completed_access",
"name": "Beauskunftung nach Art. 15",
"request_types": ["access"],
},
# Abschluss - Berichtigung
"Muster_4_Information Berichtigung": {
"template_type": "dsr_completed_rectification",
"name": "Information über durchgeführte Berichtigung",
"request_types": ["rectification"],
},
# Abschluss - Löschung
"Muster_6_Information Löschung": {
"template_type": "dsr_completed_erasure",
"name": "Information über durchgeführte Löschung",
"request_types": ["erasure"],
},
# Abschluss - Datenübertragbarkeit
"Muster_5_Information Übermittlung": {
"template_type": "dsr_completed_portability",
"name": "Information über Datenübermittlung",
"request_types": ["portability"],
},
# Drittübermittlung
"Muster_4_Anfrage Drittübermittlung": {
"template_type": "dsr_third_party_notification",
"name": "Anfrage zur Drittübermittlung",
"request_types": ["rectification", "erasure", "restriction"],
},
}
# Standard-Platzhalter-Mappings
PLACEHOLDER_REPLACEMENTS = {
# Antragsteller
"[Name]": "{{requester_name}}",
"[Vorname Name]": "{{requester_name}}",
"[Anrede]": "{{requester_salutation}}",
"[E-Mail]": "{{requester_email}}",
"[Adresse]": "{{requester_address}}",
# Anfrage
"[Vorgangsnummer]": "{{request_number}}",
"[Aktenzeichen]": "{{request_number}}",
"[Datum des Eingangs]": "{{request_date}}",
"[Eingangsdatum]": "{{request_date}}",
"[Frist]": "{{deadline_date}}",
"[Fristdatum]": "{{deadline_date}}",
# Unternehmen
"[Firmenname]": "{{company_name}}",
"[Unternehmen]": "{{company_name}}",
"[Unternehmensname]": "{{company_name}}",
"[Firma]": "{{company_name}}",
# DSB
"[DSB Name]": "{{dpo_name}}",
"[Name DSB]": "{{dpo_name}}",
"[Datenschutzbeauftragter]": "{{dpo_name}}",
"[E-Mail DSB]": "{{dpo_email}}",
"[DSB E-Mail]": "{{dpo_email}}",
# Sonstiges
"[Datum]": "{{current_date}}",
"[Portal-URL]": "{{portal_url}}",
}
def convert_docx_to_html(docx_path: Path) -> Tuple[str, List[str]]:
"""Konvertiert DOCX zu HTML mit mammoth."""
with open(docx_path, "rb") as f:
result = mammoth.convert_to_html(f)
html = result.value
messages = [msg.message for msg in result.messages]
# Bereinige HTML
html = html.replace('<p></p>', '')
html = re.sub(r'\s+', ' ', html)
return html, messages
def replace_placeholders(html: str) -> str:
"""Ersetzt Word-Platzhalter durch Template-Variablen."""
for placeholder, variable in PLACEHOLDER_REPLACEMENTS.items():
html = html.replace(placeholder, variable)
# Suche nach nicht ersetzten Platzhaltern
remaining = re.findall(r'\[([^\]]+)\]', html)
if remaining:
print(f" Warnung: Nicht ersetzte Platzhalter gefunden: {remaining}")
return html
def html_to_text(html: str) -> str:
"""Konvertiert HTML zu Plain-Text."""
# Entferne HTML-Tags
text = re.sub(r'<br\s*/?>', '\n', html)
text = re.sub(r'</p>', '\n\n', text)
text = re.sub(r'<[^>]+>', '', text)
# Entferne übermäßige Leerzeilen
text = re.sub(r'\n{3,}', '\n\n', text)
# Decode HTML entities
text = text.replace('&nbsp;', ' ')
text = text.replace('&amp;', '&')
text = text.replace('&lt;', '<')
text = text.replace('&gt;', '>')
text = text.replace('&quot;', '"')
return text.strip()
def find_matching_template(filename: str) -> Optional[Dict]:
"""Findet das passende Template-Mapping für eine Datei."""
for pattern, mapping in DOCX_TEMPLATE_MAPPING.items():
if pattern in filename:
return mapping
return None
async def get_template_id(template_type: str) -> Optional[str]:
"""Holt die Template-ID aus der Datenbank."""
async with httpx.AsyncClient() as client:
try:
# Simuliere Admin-Token (für lokale Entwicklung)
headers = {
"Authorization": "Bearer dev-admin-token",
"Content-Type": "application/json"
}
response = await client.get(
f"{CONSENT_SERVICE_URL}/api/v1/admin/dsr-templates",
headers=headers,
timeout=10.0
)
if response.status_code == 200:
data = response.json()
templates = data.get("templates", [])
for t in templates:
if t.get("template_type") == template_type:
return t.get("id")
except Exception as e:
print(f" Fehler beim Abrufen der Templates: {e}")
return None
async def create_template_version(
template_id: str,
version: str,
subject: str,
body_html: str,
body_text: str
) -> bool:
"""Erstellt eine neue Template-Version."""
async with httpx.AsyncClient() as client:
try:
headers = {
"Authorization": "Bearer dev-admin-token",
"Content-Type": "application/json"
}
response = await client.post(
f"{CONSENT_SERVICE_URL}/api/v1/admin/dsr-templates/{template_id}/versions",
headers=headers,
json={
"version": version,
"language": "de",
"subject": subject,
"body_html": body_html,
"body_text": body_text
},
timeout=30.0
)
return response.status_code in (200, 201)
except Exception as e:
print(f" Fehler beim Erstellen der Version: {e}")
return False
async def process_docx_file(docx_path: Path) -> bool:
"""Verarbeitet eine einzelne DOCX-Datei."""
filename = docx_path.stem
print(f"\nVerarbeite: {filename}")
# Finde passendes Template
mapping = find_matching_template(filename)
if not mapping:
print(f" Übersprungen: Kein Mapping gefunden")
return False
template_type = mapping["template_type"]
print(f" Template-Typ: {template_type}")
# Konvertiere DOCX zu HTML
try:
html, warnings = convert_docx_to_html(docx_path)
if warnings:
print(f" Warnungen: {warnings[:3]}") # Zeige max. 3 Warnungen
except Exception as e:
print(f" Fehler bei der Konvertierung: {e}")
return False
# Ersetze Platzhalter
html = replace_placeholders(html)
text = html_to_text(html)
# Extrahiere Versionsnummer aus Dateinamen
version_match = re.search(r'_v(\d+)', filename)
version = f"1.{version_match.group(1)}.0" if version_match else "1.0.0"
# Generiere Betreff
subject = mapping["name"]
if "{{request_number}}" not in subject:
subject = f"{subject} - {{{{request_number}}}}"
print(f" Version: {version}")
print(f" Betreff: {subject}")
print(f" HTML-Länge: {len(html)} Zeichen")
# Speichere als lokale Datei für manuelle Überprüfung
output_dir = DOCS_PATH / "converted"
output_dir.mkdir(exist_ok=True)
with open(output_dir / f"{template_type}_{version}.html", "w", encoding="utf-8") as f:
f.write(html)
with open(output_dir / f"{template_type}_{version}.txt", "w", encoding="utf-8") as f:
f.write(text)
print(f" Gespeichert in: {output_dir}")
return True
async def main():
"""Hauptfunktion."""
print("=" * 60)
print("DSR Template Import Script")
print("=" * 60)
if not DOCS_PATH.exists():
print(f"Fehler: Verzeichnis nicht gefunden: {DOCS_PATH}")
sys.exit(1)
# Finde alle DOCX-Dateien
docx_files = list(DOCS_PATH.glob("*.docx"))
print(f"\nGefunden: {len(docx_files)} DOCX-Dateien")
# Verarbeite jede Datei
success_count = 0
for docx_path in sorted(docx_files):
if await process_docx_file(docx_path):
success_count += 1
print("\n" + "=" * 60)
print(f"Verarbeitet: {success_count}/{len(docx_files)} Dateien")
print("=" * 60)
print("\nHinweis: Die konvertierten Vorlagen wurden im Ordner")
print(f" {DOCS_PATH / 'converted'}")
print("gespeichert und können manuell überprüft werden.")
print("\nUm die Vorlagen in die Datenbank zu laden, verwenden Sie")
print("das Admin-Panel unter /app → Consent Admin → Betroffenenanfragen")
if __name__ == "__main__":
asyncio.run(main())