feat: Document Templates V2 — DSFA, TOM, VVT, AVV, Verpflichtung, Art.13/14

Erweiterte Compliance-Vorlagen fuer den Document Generator:
- DSFA V2: Schwellwertanalyse (9 WP248-Kriterien), SDM-basierte TOM,
  strukturierte Risikobewertung, KI-Modul (AI Act), Art.36-Pruefung
- TOM V2: 7 SDM-Gewaehrleistungsziele, Sektor-Erweiterungen,
  NIS2/ISO27001/AI Act Varianten
- VVT V2: 6 Branchen-Muster (IT/SaaS, Gesundheit, Handel, Handwerk,
  Bildung, Beratung) + allgemeine Art.30-Vorlage
- AVV V2: Vollstaendiger Art.28-Vertrag mit TOM-Anlage
- Verpflichtungserklaerung: Mitarbeiter-Vertraulichkeit
- Art.13/14 Informationspflichten-Muster

Enthalt SQL-Migrations (compliance_legal_templates), Python-Generatoren
und Qdrant-Cleanup-Skript. Feature-Branch fuer spaetere Integration
in breakpilot-compliance.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-10 11:39:39 +02:00
parent 441d5740bd
commit fc71117bf2
10 changed files with 3126 additions and 0 deletions

View File

@@ -0,0 +1,137 @@
#!/usr/bin/env python3
"""Cleanup script: Delete temporary DPA template documents from Qdrant.
Removes all points with payload field `temp_vorlagen=true` from
the bp_compliance_datenschutz collection.
Usage:
python cleanup_temp_vorlagen.py --dry-run # Preview only
python cleanup_temp_vorlagen.py # Execute deletion
python cleanup_temp_vorlagen.py --qdrant-url http://localhost:6333
"""
import argparse
import json
import sys
from typing import Optional
from urllib.request import Request, urlopen
from urllib.error import URLError
def qdrant_request(base_url: str, method: str, path: str, body: Optional[dict] = None) -> dict:
url = f"{base_url}{path}"
data = json.dumps(body).encode() if body else None
headers = {"Content-Type": "application/json"} if data else {}
req = Request(url, data=data, headers=headers, method=method)
with urlopen(req, timeout=30) as resp:
return json.loads(resp.read())
def count_temp_vorlagen(base_url: str, collection: str) -> int:
"""Count points with temp_vorlagen=true."""
body = {
"filter": {
"must": [
{"key": "temp_vorlagen", "match": {"value": True}}
]
},
"limit": 0,
"exact": True,
}
result = qdrant_request(base_url, "POST", f"/collections/{collection}/points/count", body)
return result.get("result", {}).get("count", 0)
def list_temp_regulation_ids(base_url: str, collection: str) -> list[str]:
"""Get distinct regulation_ids of temp documents."""
body = {
"filter": {
"must": [
{"key": "temp_vorlagen", "match": {"value": True}}
]
},
"limit": 500,
"with_payload": ["regulation_id", "title", "source"],
}
result = qdrant_request(base_url, "POST", f"/collections/{collection}/points/scroll", body)
points = result.get("result", {}).get("points", [])
seen = {}
for p in points:
payload = p.get("payload", {})
rid = payload.get("regulation_id", "unknown")
if rid not in seen:
seen[rid] = {
"regulation_id": rid,
"title": payload.get("title", ""),
"source": payload.get("source", ""),
}
return list(seen.values())
def delete_temp_vorlagen(base_url: str, collection: str) -> int:
"""Delete all points with temp_vorlagen=true."""
body = {
"filter": {
"must": [
{"key": "temp_vorlagen", "match": {"value": True}}
]
}
}
result = qdrant_request(base_url, "POST", f"/collections/{collection}/points/delete", body)
status = result.get("status", "unknown")
return status
def main():
parser = argparse.ArgumentParser(description="Delete temp DPA templates from Qdrant")
parser.add_argument("--qdrant-url", default="http://localhost:6333",
help="Qdrant URL (default: http://localhost:6333)")
parser.add_argument("--collection", default="bp_compliance_datenschutz",
help="Qdrant collection name")
parser.add_argument("--dry-run", action="store_true",
help="Only count and list, do not delete")
args = parser.parse_args()
print(f"Qdrant URL: {args.qdrant_url}")
print(f"Collection: {args.collection}")
print()
try:
count = count_temp_vorlagen(args.qdrant_url, args.collection)
except URLError as e:
print(f"ERROR: Cannot connect to Qdrant at {args.qdrant_url}: {e}")
sys.exit(1)
print(f"Gefundene Punkte mit temp_vorlagen=true: {count}")
if count == 0:
print("Nichts zu loeschen.")
return
docs = list_temp_regulation_ids(args.qdrant_url, args.collection)
print(f"\nBetroffene Dokumente ({len(docs)}):")
for doc in sorted(docs, key=lambda d: d["regulation_id"]):
source = f" [{doc['source']}]" if doc.get("source") else ""
title = f"{doc['title']}" if doc.get("title") else ""
print(f" - {doc['regulation_id']}{title}{source}")
if args.dry_run:
print(f"\n[DRY-RUN] Wuerde {count} Punkte loeschen. Keine Aenderung durchgefuehrt.")
return
print(f"\nLoesche {count} Punkte ...")
status = delete_temp_vorlagen(args.qdrant_url, args.collection)
print(f"Status: {status}")
remaining = count_temp_vorlagen(args.qdrant_url, args.collection)
print(f"Verbleibende temp_vorlagen Punkte: {remaining}")
if remaining == 0:
print("Cleanup erfolgreich abgeschlossen.")
else:
print(f"WARNUNG: {remaining} Punkte konnten nicht geloescht werden.")
if __name__ == "__main__":
main()