feat: Document Templates V2 — DSFA, TOM, VVT, AVV, Verpflichtung, Art.13/14
Erweiterte Compliance-Vorlagen fuer den Document Generator: - DSFA V2: Schwellwertanalyse (9 WP248-Kriterien), SDM-basierte TOM, strukturierte Risikobewertung, KI-Modul (AI Act), Art.36-Pruefung - TOM V2: 7 SDM-Gewaehrleistungsziele, Sektor-Erweiterungen, NIS2/ISO27001/AI Act Varianten - VVT V2: 6 Branchen-Muster (IT/SaaS, Gesundheit, Handel, Handwerk, Bildung, Beratung) + allgemeine Art.30-Vorlage - AVV V2: Vollstaendiger Art.28-Vertrag mit TOM-Anlage - Verpflichtungserklaerung: Mitarbeiter-Vertraulichkeit - Art.13/14 Informationspflichten-Muster Enthalt SQL-Migrations (compliance_legal_templates), Python-Generatoren und Qdrant-Cleanup-Skript. Feature-Branch fuer spaetere Integration in breakpilot-compliance. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
137
document-templates/scripts/cleanup_temp_vorlagen.py
Normal file
137
document-templates/scripts/cleanup_temp_vorlagen.py
Normal file
@@ -0,0 +1,137 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Cleanup script: Delete temporary DPA template documents from Qdrant.
|
||||
|
||||
Removes all points with payload field `temp_vorlagen=true` from
|
||||
the bp_compliance_datenschutz collection.
|
||||
|
||||
Usage:
|
||||
python cleanup_temp_vorlagen.py --dry-run # Preview only
|
||||
python cleanup_temp_vorlagen.py # Execute deletion
|
||||
python cleanup_temp_vorlagen.py --qdrant-url http://localhost:6333
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from typing import Optional
|
||||
from urllib.request import Request, urlopen
|
||||
from urllib.error import URLError
|
||||
|
||||
|
||||
def qdrant_request(base_url: str, method: str, path: str, body: Optional[dict] = None) -> dict:
|
||||
url = f"{base_url}{path}"
|
||||
data = json.dumps(body).encode() if body else None
|
||||
headers = {"Content-Type": "application/json"} if data else {}
|
||||
req = Request(url, data=data, headers=headers, method=method)
|
||||
with urlopen(req, timeout=30) as resp:
|
||||
return json.loads(resp.read())
|
||||
|
||||
|
||||
def count_temp_vorlagen(base_url: str, collection: str) -> int:
|
||||
"""Count points with temp_vorlagen=true."""
|
||||
body = {
|
||||
"filter": {
|
||||
"must": [
|
||||
{"key": "temp_vorlagen", "match": {"value": True}}
|
||||
]
|
||||
},
|
||||
"limit": 0,
|
||||
"exact": True,
|
||||
}
|
||||
result = qdrant_request(base_url, "POST", f"/collections/{collection}/points/count", body)
|
||||
return result.get("result", {}).get("count", 0)
|
||||
|
||||
|
||||
def list_temp_regulation_ids(base_url: str, collection: str) -> list[str]:
|
||||
"""Get distinct regulation_ids of temp documents."""
|
||||
body = {
|
||||
"filter": {
|
||||
"must": [
|
||||
{"key": "temp_vorlagen", "match": {"value": True}}
|
||||
]
|
||||
},
|
||||
"limit": 500,
|
||||
"with_payload": ["regulation_id", "title", "source"],
|
||||
}
|
||||
result = qdrant_request(base_url, "POST", f"/collections/{collection}/points/scroll", body)
|
||||
points = result.get("result", {}).get("points", [])
|
||||
|
||||
seen = {}
|
||||
for p in points:
|
||||
payload = p.get("payload", {})
|
||||
rid = payload.get("regulation_id", "unknown")
|
||||
if rid not in seen:
|
||||
seen[rid] = {
|
||||
"regulation_id": rid,
|
||||
"title": payload.get("title", ""),
|
||||
"source": payload.get("source", ""),
|
||||
}
|
||||
return list(seen.values())
|
||||
|
||||
|
||||
def delete_temp_vorlagen(base_url: str, collection: str) -> int:
|
||||
"""Delete all points with temp_vorlagen=true."""
|
||||
body = {
|
||||
"filter": {
|
||||
"must": [
|
||||
{"key": "temp_vorlagen", "match": {"value": True}}
|
||||
]
|
||||
}
|
||||
}
|
||||
result = qdrant_request(base_url, "POST", f"/collections/{collection}/points/delete", body)
|
||||
status = result.get("status", "unknown")
|
||||
return status
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Delete temp DPA templates from Qdrant")
|
||||
parser.add_argument("--qdrant-url", default="http://localhost:6333",
|
||||
help="Qdrant URL (default: http://localhost:6333)")
|
||||
parser.add_argument("--collection", default="bp_compliance_datenschutz",
|
||||
help="Qdrant collection name")
|
||||
parser.add_argument("--dry-run", action="store_true",
|
||||
help="Only count and list, do not delete")
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f"Qdrant URL: {args.qdrant_url}")
|
||||
print(f"Collection: {args.collection}")
|
||||
print()
|
||||
|
||||
try:
|
||||
count = count_temp_vorlagen(args.qdrant_url, args.collection)
|
||||
except URLError as e:
|
||||
print(f"ERROR: Cannot connect to Qdrant at {args.qdrant_url}: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Gefundene Punkte mit temp_vorlagen=true: {count}")
|
||||
|
||||
if count == 0:
|
||||
print("Nichts zu loeschen.")
|
||||
return
|
||||
|
||||
docs = list_temp_regulation_ids(args.qdrant_url, args.collection)
|
||||
print(f"\nBetroffene Dokumente ({len(docs)}):")
|
||||
for doc in sorted(docs, key=lambda d: d["regulation_id"]):
|
||||
source = f" [{doc['source']}]" if doc.get("source") else ""
|
||||
title = f" — {doc['title']}" if doc.get("title") else ""
|
||||
print(f" - {doc['regulation_id']}{title}{source}")
|
||||
|
||||
if args.dry_run:
|
||||
print(f"\n[DRY-RUN] Wuerde {count} Punkte loeschen. Keine Aenderung durchgefuehrt.")
|
||||
return
|
||||
|
||||
print(f"\nLoesche {count} Punkte ...")
|
||||
status = delete_temp_vorlagen(args.qdrant_url, args.collection)
|
||||
print(f"Status: {status}")
|
||||
|
||||
remaining = count_temp_vorlagen(args.qdrant_url, args.collection)
|
||||
print(f"Verbleibende temp_vorlagen Punkte: {remaining}")
|
||||
|
||||
if remaining == 0:
|
||||
print("Cleanup erfolgreich abgeschlossen.")
|
||||
else:
|
||||
print(f"WARNUNG: {remaining} Punkte konnten nicht geloescht werden.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user