fix: Restore all files lost during destructive rebase

A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.

This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).

Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-02-09 09:51:32 +01:00
parent f7487ee240
commit bfdaf63ba9
2009 changed files with 749983 additions and 1731 deletions

View File

@@ -0,0 +1,369 @@
#!/usr/bin/env python3
"""
DSR Template Import Script
Importiert DOCX-Vorlagen aus dem Datenschutz-Ordner und erstellt
initiale Template-Versionen in der Datenbank.
Verwendung:
cd backend
source venv/bin/activate
python scripts/import_dsr_templates.py
Voraussetzungen:
pip install mammoth python-docx httpx
"""
import os
import sys
import re
import asyncio
from pathlib import Path
from typing import Optional, Dict, List, Tuple
# Third-party imports
try:
import mammoth
import httpx
except ImportError:
print("Bitte installieren Sie die erforderlichen Pakete:")
print(" pip install mammoth httpx")
sys.exit(1)
# Configuration
CONSENT_SERVICE_URL = os.getenv("CONSENT_SERVICE_URL", "http://localhost:8081")
DOCS_PATH = Path(__file__).parent.parent.parent / "docs" / "Datenschutz"
# Mapping von DOCX-Dateien zu Template-Typen
DOCX_TEMPLATE_MAPPING: Dict[str, Dict] = {
# Eingangsbestätigungen
"Muster_1_Eingangsbestätigung": {
"template_type": "dsr_receipt_access",
"name": "Eingangsbestätigung (Auskunft Art. 15)",
"request_types": ["access"],
},
# Identitätsprüfung
"Muster_2_Anfrage Kontaktdaten": {
"template_type": "dsr_identity_request",
"name": "Anfrage Kontaktdaten zur Identifikation",
"request_types": ["access", "rectification", "erasure", "restriction", "portability"],
},
"Muster_2_Anfrage Identität": {
"template_type": "dsr_identity_request",
"name": "Anfrage Identitätsnachweis",
"request_types": ["access", "rectification", "erasure", "restriction", "portability"],
},
"Muster_3_Anfrage Identität": {
"template_type": "dsr_identity_request",
"name": "Anfrage Identitätsnachweis (erweitert)",
"request_types": ["access", "rectification", "erasure", "restriction", "portability"],
},
# Bearbeitungsbestätigungen
"Muster_3_Bearbeitungsbestätigung": {
"template_type": "dsr_processing_started",
"name": "Bearbeitungsbestätigung",
"request_types": ["access", "rectification", "erasure", "restriction", "portability"],
},
"Muster_4_Bearbeitungsbestätigung": {
"template_type": "dsr_processing_update",
"name": "Bearbeitungsupdate",
"request_types": ["access", "rectification", "erasure", "restriction", "portability"],
},
# Rückfragen
"Muster_4_Rückfragen Begehren": {
"template_type": "dsr_clarification_request",
"name": "Rückfragen zum Begehren",
"request_types": ["access", "rectification", "erasure", "restriction", "portability"],
},
"Muster_5_Rückfragen Umfang": {
"template_type": "dsr_clarification_request",
"name": "Rückfragen zum Umfang",
"request_types": ["access"],
},
# Abschluss - Auskunft
"Muster_5_ Negativauskunft": {
"template_type": "dsr_completed_access",
"name": "Negativauskunft (keine Daten gefunden)",
"request_types": ["access"],
"variant": "no_data",
},
"Muster_6_ Beauskunftung Art. 15": {
"template_type": "dsr_completed_access",
"name": "Beauskunftung nach Art. 15",
"request_types": ["access"],
},
# Abschluss - Berichtigung
"Muster_4_Information Berichtigung": {
"template_type": "dsr_completed_rectification",
"name": "Information über durchgeführte Berichtigung",
"request_types": ["rectification"],
},
# Abschluss - Löschung
"Muster_6_Information Löschung": {
"template_type": "dsr_completed_erasure",
"name": "Information über durchgeführte Löschung",
"request_types": ["erasure"],
},
# Abschluss - Datenübertragbarkeit
"Muster_5_Information Übermittlung": {
"template_type": "dsr_completed_portability",
"name": "Information über Datenübermittlung",
"request_types": ["portability"],
},
# Drittübermittlung
"Muster_4_Anfrage Drittübermittlung": {
"template_type": "dsr_third_party_notification",
"name": "Anfrage zur Drittübermittlung",
"request_types": ["rectification", "erasure", "restriction"],
},
}
# Standard-Platzhalter-Mappings
PLACEHOLDER_REPLACEMENTS = {
# Antragsteller
"[Name]": "{{requester_name}}",
"[Vorname Name]": "{{requester_name}}",
"[Anrede]": "{{requester_salutation}}",
"[E-Mail]": "{{requester_email}}",
"[Adresse]": "{{requester_address}}",
# Anfrage
"[Vorgangsnummer]": "{{request_number}}",
"[Aktenzeichen]": "{{request_number}}",
"[Datum des Eingangs]": "{{request_date}}",
"[Eingangsdatum]": "{{request_date}}",
"[Frist]": "{{deadline_date}}",
"[Fristdatum]": "{{deadline_date}}",
# Unternehmen
"[Firmenname]": "{{company_name}}",
"[Unternehmen]": "{{company_name}}",
"[Unternehmensname]": "{{company_name}}",
"[Firma]": "{{company_name}}",
# DSB
"[DSB Name]": "{{dpo_name}}",
"[Name DSB]": "{{dpo_name}}",
"[Datenschutzbeauftragter]": "{{dpo_name}}",
"[E-Mail DSB]": "{{dpo_email}}",
"[DSB E-Mail]": "{{dpo_email}}",
# Sonstiges
"[Datum]": "{{current_date}}",
"[Portal-URL]": "{{portal_url}}",
}
def convert_docx_to_html(docx_path: Path) -> Tuple[str, List[str]]:
"""Konvertiert DOCX zu HTML mit mammoth."""
with open(docx_path, "rb") as f:
result = mammoth.convert_to_html(f)
html = result.value
messages = [msg.message for msg in result.messages]
# Bereinige HTML
html = html.replace('<p></p>', '')
html = re.sub(r'\s+', ' ', html)
return html, messages
def replace_placeholders(html: str) -> str:
"""Ersetzt Word-Platzhalter durch Template-Variablen."""
for placeholder, variable in PLACEHOLDER_REPLACEMENTS.items():
html = html.replace(placeholder, variable)
# Suche nach nicht ersetzten Platzhaltern
remaining = re.findall(r'\[([^\]]+)\]', html)
if remaining:
print(f" Warnung: Nicht ersetzte Platzhalter gefunden: {remaining}")
return html
def html_to_text(html: str) -> str:
"""Konvertiert HTML zu Plain-Text."""
# Entferne HTML-Tags
text = re.sub(r'<br\s*/?>', '\n', html)
text = re.sub(r'</p>', '\n\n', text)
text = re.sub(r'<[^>]+>', '', text)
# Entferne übermäßige Leerzeilen
text = re.sub(r'\n{3,}', '\n\n', text)
# Decode HTML entities
text = text.replace('&nbsp;', ' ')
text = text.replace('&amp;', '&')
text = text.replace('&lt;', '<')
text = text.replace('&gt;', '>')
text = text.replace('&quot;', '"')
return text.strip()
def find_matching_template(filename: str) -> Optional[Dict]:
"""Findet das passende Template-Mapping für eine Datei."""
for pattern, mapping in DOCX_TEMPLATE_MAPPING.items():
if pattern in filename:
return mapping
return None
async def get_template_id(template_type: str) -> Optional[str]:
"""Holt die Template-ID aus der Datenbank."""
async with httpx.AsyncClient() as client:
try:
# Simuliere Admin-Token (für lokale Entwicklung)
headers = {
"Authorization": "Bearer dev-admin-token",
"Content-Type": "application/json"
}
response = await client.get(
f"{CONSENT_SERVICE_URL}/api/v1/admin/dsr-templates",
headers=headers,
timeout=10.0
)
if response.status_code == 200:
data = response.json()
templates = data.get("templates", [])
for t in templates:
if t.get("template_type") == template_type:
return t.get("id")
except Exception as e:
print(f" Fehler beim Abrufen der Templates: {e}")
return None
async def create_template_version(
template_id: str,
version: str,
subject: str,
body_html: str,
body_text: str
) -> bool:
"""Erstellt eine neue Template-Version."""
async with httpx.AsyncClient() as client:
try:
headers = {
"Authorization": "Bearer dev-admin-token",
"Content-Type": "application/json"
}
response = await client.post(
f"{CONSENT_SERVICE_URL}/api/v1/admin/dsr-templates/{template_id}/versions",
headers=headers,
json={
"version": version,
"language": "de",
"subject": subject,
"body_html": body_html,
"body_text": body_text
},
timeout=30.0
)
return response.status_code in (200, 201)
except Exception as e:
print(f" Fehler beim Erstellen der Version: {e}")
return False
async def process_docx_file(docx_path: Path) -> bool:
"""Verarbeitet eine einzelne DOCX-Datei."""
filename = docx_path.stem
print(f"\nVerarbeite: {filename}")
# Finde passendes Template
mapping = find_matching_template(filename)
if not mapping:
print(f" Übersprungen: Kein Mapping gefunden")
return False
template_type = mapping["template_type"]
print(f" Template-Typ: {template_type}")
# Konvertiere DOCX zu HTML
try:
html, warnings = convert_docx_to_html(docx_path)
if warnings:
print(f" Warnungen: {warnings[:3]}") # Zeige max. 3 Warnungen
except Exception as e:
print(f" Fehler bei der Konvertierung: {e}")
return False
# Ersetze Platzhalter
html = replace_placeholders(html)
text = html_to_text(html)
# Extrahiere Versionsnummer aus Dateinamen
version_match = re.search(r'_v(\d+)', filename)
version = f"1.{version_match.group(1)}.0" if version_match else "1.0.0"
# Generiere Betreff
subject = mapping["name"]
if "{{request_number}}" not in subject:
subject = f"{subject} - {{{{request_number}}}}"
print(f" Version: {version}")
print(f" Betreff: {subject}")
print(f" HTML-Länge: {len(html)} Zeichen")
# Speichere als lokale Datei für manuelle Überprüfung
output_dir = DOCS_PATH / "converted"
output_dir.mkdir(exist_ok=True)
with open(output_dir / f"{template_type}_{version}.html", "w", encoding="utf-8") as f:
f.write(html)
with open(output_dir / f"{template_type}_{version}.txt", "w", encoding="utf-8") as f:
f.write(text)
print(f" Gespeichert in: {output_dir}")
return True
async def main():
"""Hauptfunktion."""
print("=" * 60)
print("DSR Template Import Script")
print("=" * 60)
if not DOCS_PATH.exists():
print(f"Fehler: Verzeichnis nicht gefunden: {DOCS_PATH}")
sys.exit(1)
# Finde alle DOCX-Dateien
docx_files = list(DOCS_PATH.glob("*.docx"))
print(f"\nGefunden: {len(docx_files)} DOCX-Dateien")
# Verarbeite jede Datei
success_count = 0
for docx_path in sorted(docx_files):
if await process_docx_file(docx_path):
success_count += 1
print("\n" + "=" * 60)
print(f"Verarbeitet: {success_count}/{len(docx_files)} Dateien")
print("=" * 60)
print("\nHinweis: Die konvertierten Vorlagen wurden im Ordner")
print(f" {DOCS_PATH / 'converted'}")
print("gespeichert und können manuell überprüft werden.")
print("\nUm die Vorlagen in die Datenbank zu laden, verwenden Sie")
print("das Admin-Panel unter /app → Consent Admin → Betroffenenanfragen")
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,218 @@
#!/usr/bin/env python3
"""
Load Initial EduSearch Seeds into the Database.
This script uses the bulk import API to load all German education sources
that were provided by the user.
"""
import httpx
import asyncio
import os
API_BASE = os.environ.get("LLM_GATEWAY_URL", "http://localhost:8000")
# All German education seeds organized by category
INITIAL_SEEDS = [
# ===== BUNDESEBENE (Federal) =====
{"url": "https://www.kmk.org", "name": "Kultusministerkonferenz (KMK)", "description": "Lehrpläne, Bildungsstandards, Abiturregelungen", "category": "federal", "trust_boost": 0.95, "source_type": "GOV", "scope": "FEDERAL"},
{"url": "https://www.bildungsserver.de", "name": "Deutscher Bildungsserver (DIPF)", "description": "Zentrale Meta-Plattform für alle Länder", "category": "federal", "trust_boost": 0.95, "source_type": "GOV", "scope": "FEDERAL"},
{"url": "https://www.bpb.de", "name": "Bundeszentrale für politische Bildung", "description": "Unterrichtsmaterialien, Dossiers, Arbeitsblätter", "category": "federal", "trust_boost": 0.90, "source_type": "GOV", "scope": "FEDERAL"},
{"url": "https://www.bmbf.de", "name": "Bundesministerium für Bildung und Forschung", "description": "Förderprogramme, Bildungsberichte, Initiativen", "category": "federal", "trust_boost": 0.95, "source_type": "GOV", "scope": "FEDERAL"},
{"url": "https://www.iqb.hu-berlin.de", "name": "Institut zur Qualitätsentwicklung (IQB)", "description": "Bildungsstandards, Vergleichsarbeiten, Abiturpools", "category": "federal", "trust_boost": 0.95, "source_type": "GOV", "scope": "FEDERAL"},
# ===== BADEN-WÜRTTEMBERG (BW) =====
{"url": "https://km-bw.de", "name": "BW Kultusministerium", "description": "Baden-Württemberg Kultusministerium", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BW"},
{"url": "https://www.bildungsplaene-bw.de", "name": "BW Bildungspläne", "description": "Bildungspläne Baden-Württemberg", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BW"},
{"url": "https://zsl.kultus-bw.de", "name": "BW Zentrum für Schulqualität", "description": "ZSL Baden-Württemberg", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BW"},
{"url": "https://lehrerfortbildung-bw.de", "name": "BW Lehrerfortbildung", "description": "Lehrerfortbildung Baden-Württemberg", "category": "states", "trust_boost": 0.85, "source_type": "GOV", "scope": "STATE", "state": "BW"},
{"url": "https://rp.baden-wuerttemberg.de", "name": "BW Regierungspräsidien", "description": "Bildungsaufsicht Baden-Württemberg", "category": "authorities", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BW"},
# ===== BAYERN (BY) =====
{"url": "https://www.km.bayern.de", "name": "Bayern Kultusministerium", "description": "Bayerisches Staatsministerium für Unterricht und Kultus", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BY"},
{"url": "https://www.isb.bayern.de", "name": "Bayern ISB", "description": "Staatsinstitut für Schulqualität und Bildungsforschung", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BY"},
{"url": "https://www.mebis.bayern.de", "name": "Bayern mebis", "description": "Medien-Bildung-Service Bayern", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BY"},
{"url": "https://www.bycs.de", "name": "Bayern Cloud Schule", "description": "Bayerische Schulcloud", "category": "states", "trust_boost": 0.85, "source_type": "GOV", "scope": "STATE", "state": "BY"},
{"url": "https://www.schulberatung.bayern.de", "name": "Bayern Schulberatung", "description": "Staatliche Schulberatung Bayern", "category": "states", "trust_boost": 0.85, "source_type": "GOV", "scope": "STATE", "state": "BY"},
# ===== BERLIN (BE) =====
{"url": "https://www.berlin.de/sen/bjf", "name": "Berlin Senatsverwaltung", "description": "Senatsverwaltung für Bildung, Jugend und Familie", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BE"},
{"url": "https://bildungsserver.berlin-brandenburg.de", "name": "Berlin-Brandenburg Bildungsserver", "description": "Gemeinsamer Bildungsserver Berlin-Brandenburg", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BE"},
{"url": "https://www.berlin.de/schule", "name": "Berlin Schulportal", "description": "Berliner Schulportal", "category": "states", "trust_boost": 0.85, "source_type": "GOV", "scope": "STATE", "state": "BE"},
{"url": "https://www.berlin.de/landesinstitut-schule-medien", "name": "Berlin LISUM", "description": "Landesinstitut für Schule und Medien", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BE"},
# ===== BRANDENBURG (BB) =====
{"url": "https://mbjs.brandenburg.de", "name": "Brandenburg MBJS", "description": "Ministerium für Bildung, Jugend und Sport", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BB"},
{"url": "https://lisum.berlin-brandenburg.de", "name": "Brandenburg LISUM", "description": "Landesinstitut für Schule und Medien", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BB"},
{"url": "https://www.schulportal.brandenburg.de", "name": "Brandenburg Schulportal", "description": "Schulportal Brandenburg", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BB"},
{"url": "https://lehrplan.brandenburg.de", "name": "Brandenburg Lehrpläne", "description": "Rahmenlehrpläne Brandenburg", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BB"},
# ===== BREMEN (HB) =====
{"url": "https://www.bildung.bremen.de", "name": "Bremen Bildung", "description": "Senatorin für Kinder und Bildung Bremen", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HB"},
{"url": "https://www.lis.bremen.de", "name": "Bremen LIS", "description": "Landesinstitut für Schule Bremen", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "HB"},
{"url": "https://www.bildungsplaene.bremen.de", "name": "Bremen Bildungspläne", "description": "Bildungspläne Bremen", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HB"},
# ===== HAMBURG (HH) =====
{"url": "https://www.hamburg.de/bsb", "name": "Hamburg BSB", "description": "Behörde für Schule und Berufsbildung", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HH"},
{"url": "https://li.hamburg.de", "name": "Hamburg Landesinstitut", "description": "Landesinstitut für Lehrerbildung und Schulentwicklung", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HH"},
{"url": "https://www.bildungsplaene.hamburg.de", "name": "Hamburg Bildungspläne", "description": "Hamburger Bildungspläne", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HH"},
# ===== HESSEN (HE) =====
{"url": "https://kultusministerium.hessen.de", "name": "Hessen Kultusministerium", "description": "Hessisches Kultusministerium", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HE"},
{"url": "https://lehrplaene.hessen.de", "name": "Hessen Lehrpläne", "description": "Kerncurricula Hessen", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HE"},
{"url": "https://www.schulportal.hessen.de", "name": "Hessen Schulportal", "description": "Hessisches Schulportal", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "HE"},
{"url": "https://la.hessen.de", "name": "Hessen Lehrkräfteakademie", "description": "Hessische Lehrkräfteakademie", "category": "authorities", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "HE"},
# ===== MECKLENBURG-VORPOMMERN (MV) =====
{"url": "https://www.regierung-mv.de/Landesregierung/bm", "name": "MV Bildungsministerium", "description": "Bildungsministerium Mecklenburg-Vorpommern", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "MV"},
{"url": "https://www.bildung-mv.de", "name": "MV Bildungsportal", "description": "Bildungsserver Mecklenburg-Vorpommern", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "MV"},
# ===== NIEDERSACHSEN (NI) =====
{"url": "https://www.mk.niedersachsen.de", "name": "Niedersachsen MK", "description": "Niedersächsisches Kultusministerium", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "NI"},
{"url": "https://www.nibis.de", "name": "Niedersachsen NiBiS", "description": "Niedersächsischer Bildungsserver", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "NI"},
# ===== NORDRHEIN-WESTFALEN (NW) =====
{"url": "https://www.msb.nrw", "name": "NRW Schulministerium", "description": "Ministerium für Schule und Bildung NRW", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "NW"},
{"url": "https://www.schulentwicklung.nrw.de", "name": "NRW Schulentwicklung", "description": "Schulentwicklung NRW", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "NW"},
{"url": "https://www.qua-lis.nrw.de", "name": "NRW QUA-LiS", "description": "Qualitäts- und UnterstützungsAgentur", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "NW"},
{"url": "https://www.standardsicherung.schulministerium.nrw.de", "name": "NRW Standardsicherung", "description": "Standardsicherung und Prüfungen NRW", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "NW"},
# ===== RHEINLAND-PFALZ (RP) =====
{"url": "https://bm.rlp.de", "name": "RLP Bildungsministerium", "description": "Ministerium für Bildung Rheinland-Pfalz", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "RP"},
{"url": "https://bildung.rlp.de", "name": "RLP Bildungsserver", "description": "Bildungsserver Rheinland-Pfalz", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "RP"},
{"url": "https://lehrplaene.rlp.de", "name": "RLP Lehrpläne", "description": "Lehrpläne Rheinland-Pfalz", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "RP"},
# ===== SAARLAND (SL) =====
{"url": "https://www.saarland.de/mbk", "name": "Saarland MBK", "description": "Ministerium für Bildung und Kultur Saarland", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "SL"},
{"url": "https://www.bildungsserver.saarland.de", "name": "Saarland Bildungsserver", "description": "Bildungsserver Saarland", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "SL"},
# ===== SACHSEN (SN) =====
{"url": "https://www.smk.sachsen.de", "name": "Sachsen SMK", "description": "Sächsisches Staatsministerium für Kultus", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "SN"},
{"url": "https://www.schule.sachsen.de", "name": "Sachsen Schulportal", "description": "Sächsisches Schulportal", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "SN"},
# ===== SACHSEN-ANHALT (ST) =====
{"url": "https://mb.sachsen-anhalt.de", "name": "Sachsen-Anhalt MB", "description": "Ministerium für Bildung Sachsen-Anhalt", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "ST"},
{"url": "https://lisa.sachsen-anhalt.de", "name": "Sachsen-Anhalt LISA", "description": "Landesinstitut für Schulqualität", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "ST"},
{"url": "https://www.bildung-lsa.de", "name": "Sachsen-Anhalt Bildungsserver", "description": "Bildungsserver Sachsen-Anhalt", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "ST"},
# ===== SCHLESWIG-HOLSTEIN (SH) =====
{"url": "https://www.schleswig-holstein.de/BILDUNG", "name": "SH Bildungsministerium", "description": "Ministerium für Allgemeine und Berufliche Bildung", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "SH"},
{"url": "https://fachanforderungen.schleswig-holstein.de", "name": "SH Fachanforderungen", "description": "Fachanforderungen Schleswig-Holstein", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "SH"},
# ===== THÜRINGEN (TH) =====
{"url": "https://bildung.thueringen.de", "name": "Thüringen Bildungsministerium", "description": "Thüringer Ministerium für Bildung", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "TH"},
{"url": "https://www.schulportal-thueringen.de", "name": "Thüringen Schulportal", "description": "Thüringer Schulportal", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "TH"},
# ===== WISSENSCHAFT & STUDIEN =====
{"url": "https://www.bertelsmann-stiftung.de/bildung", "name": "Bertelsmann Stiftung", "description": "Bildungsstudien und Ländermonitor", "category": "science", "trust_boost": 0.85, "source_type": "NGO", "scope": "FEDERAL"},
{"url": "https://www.oecd.org/pisa", "name": "OECD PISA", "description": "Internationale Schulleistungsstudie PISA", "category": "science", "trust_boost": 0.90, "source_type": "INT", "scope": "INTERNATIONAL"},
# ===== BILDUNGSPORTALE =====
{"url": "https://www.lehrer-online.de", "name": "Lehrer-Online", "description": "Unterrichtsmaterialien und Fachinformationen", "category": "portals", "trust_boost": 0.80, "source_type": "PORTAL", "scope": "FEDERAL"},
{"url": "https://www.4teachers.de", "name": "4teachers", "description": "Unterrichtsmaterialien von Lehrern für Lehrer", "category": "portals", "trust_boost": 0.75, "source_type": "PORTAL", "scope": "FEDERAL"},
{"url": "https://www.zum.de", "name": "ZUM", "description": "Zentrale für Unterrichtsmedien im Internet", "category": "portals", "trust_boost": 0.80, "source_type": "NGO", "scope": "FEDERAL"},
]
async def load_seeds():
"""
Load initial seeds via bulk import API.
Returns:
bool: True if successful, False otherwise
Raises:
httpx.ConnectError: If API is not reachable
httpx.TimeoutException: If request times out
"""
print(f"Loading {len(INITIAL_SEEDS)} seeds into {API_BASE}...")
print(f"Seeds breakdown:")
categories = {}
for seed in INITIAL_SEEDS:
cat = seed.get("category", "unknown")
categories[cat] = categories.get(cat, 0) + 1
for cat, count in sorted(categories.items()):
print(f" - {cat}: {count}")
try:
async with httpx.AsyncClient(timeout=60.0) as client:
# Check API health first
try:
health_response = await client.get(f"{API_BASE}/health")
if health_response.status_code != 200:
print(f"WARNING: Health check returned {health_response.status_code}")
except httpx.ConnectError:
print(f"ERROR: Cannot connect to API at {API_BASE}")
print("Make sure the backend service is running:")
print(" docker compose up -d backend")
return False
# Import seeds
print("\nImporting seeds...")
response = await client.post(
f"{API_BASE}/v1/edu-search/seeds/bulk-import",
json={"seeds": INITIAL_SEEDS}
)
if response.status_code == 200:
result = response.json()
imported = result.get('imported', 0)
skipped = result.get('skipped', 0)
errors = result.get('errors', [])
print(f"\nResult:")
print(f" Imported: {imported}")
print(f" Skipped (duplicates): {skipped}")
if errors:
print(f" Errors: {len(errors)}")
for err in errors[:5]:
print(f" - {err}")
if len(errors) > 5:
print(f" ... and {len(errors) - 5} more")
else:
print(f"\nERROR: Import failed with status {response.status_code}")
try:
error_detail = response.json()
print(f" Detail: {error_detail.get('detail', response.text)}")
except Exception:
print(f" Response: {response.text[:500]}")
return False
# Get stats
print("\nFetching statistics...")
stats_response = await client.get(f"{API_BASE}/v1/edu-search/stats")
if stats_response.status_code == 200:
stats = stats_response.json()
print(f"\nDatabase Statistics:")
print(f" Total seeds: {stats.get('total_seeds', 0)}")
print(f" Enabled seeds: {stats.get('enabled_seeds', 0)}")
print(f" Disabled seeds: {stats.get('disabled_seeds', 0)}")
print(f" Avg trust boost: {stats.get('avg_trust_boost', 0):.2f}")
per_category = stats.get('seeds_per_category', {})
if per_category:
print(f"\n Seeds per category:")
for cat, count in sorted(per_category.items()):
print(f" - {cat}: {count}")
print("\nDone!")
return True
except httpx.ConnectError as e:
print(f"\nERROR: Connection failed - {e}")
print(f"Make sure the API is running at {API_BASE}")
return False
except httpx.TimeoutException:
print(f"\nERROR: Request timed out")
print("The server may be overloaded. Try again later.")
return False
except Exception as e:
print(f"\nERROR: Unexpected error - {e}")
return False
if __name__ == "__main__":
import sys
success = asyncio.run(load_seeds())
sys.exit(0 if success else 1)

View File

@@ -0,0 +1,538 @@
#!/usr/bin/env python3
"""
Load German Universities (Hochschulen) as EduSearch Seeds.
Based on the HRK (Hochschulrektorenkonferenz) database, Germany has approximately:
- 120 Universities (Universitäten)
- 220 Universities of Applied Sciences (Fachhochschulen/HAW)
- 50 Art/Music Academies
- 100+ Private Universities
This script loads a comprehensive list of German higher education institutions.
"""
import httpx
import asyncio
import os
API_BASE = os.environ.get("LLM_GATEWAY_URL", "http://localhost:8000")
# =============================================================================
# GERMAN UNIVERSITIES (UNIVERSITÄTEN) - Public
# =============================================================================
UNIVERSITAETEN = [
# ===== BADEN-WÜRTTEMBERG =====
{"url": "https://www.uni-freiburg.de", "name": "Albert-Ludwigs-Universität Freiburg", "state": "BW"},
{"url": "https://www.uni-heidelberg.de", "name": "Ruprecht-Karls-Universität Heidelberg", "state": "BW"},
{"url": "https://www.uni-konstanz.de", "name": "Universität Konstanz", "state": "BW"},
{"url": "https://www.uni-mannheim.de", "name": "Universität Mannheim", "state": "BW"},
{"url": "https://www.uni-stuttgart.de", "name": "Universität Stuttgart", "state": "BW"},
{"url": "https://www.kit.edu", "name": "Karlsruher Institut für Technologie (KIT)", "state": "BW"},
{"url": "https://www.uni-tuebingen.de", "name": "Eberhard Karls Universität Tübingen", "state": "BW"},
{"url": "https://www.uni-ulm.de", "name": "Universität Ulm", "state": "BW"},
{"url": "https://www.uni-hohenheim.de", "name": "Universität Hohenheim", "state": "BW"},
# ===== BAYERN =====
{"url": "https://www.lmu.de", "name": "Ludwig-Maximilians-Universität München", "state": "BY"},
{"url": "https://www.tum.de", "name": "Technische Universität München", "state": "BY"},
{"url": "https://www.fau.de", "name": "Friedrich-Alexander-Universität Erlangen-Nürnberg", "state": "BY"},
{"url": "https://www.uni-wuerzburg.de", "name": "Julius-Maximilians-Universität Würzburg", "state": "BY"},
{"url": "https://www.uni-regensburg.de", "name": "Universität Regensburg", "state": "BY"},
{"url": "https://www.uni-augsburg.de", "name": "Universität Augsburg", "state": "BY"},
{"url": "https://www.uni-bamberg.de", "name": "Otto-Friedrich-Universität Bamberg", "state": "BY"},
{"url": "https://www.uni-bayreuth.de", "name": "Universität Bayreuth", "state": "BY"},
{"url": "https://www.uni-passau.de", "name": "Universität Passau", "state": "BY"},
{"url": "https://www.ku.de", "name": "Katholische Universität Eichstätt-Ingolstadt", "state": "BY"},
# ===== BERLIN =====
{"url": "https://www.fu-berlin.de", "name": "Freie Universität Berlin", "state": "BE"},
{"url": "https://www.hu-berlin.de", "name": "Humboldt-Universität zu Berlin", "state": "BE"},
{"url": "https://www.tu-berlin.de", "name": "Technische Universität Berlin", "state": "BE"},
{"url": "https://www.charite.de", "name": "Charité Universitätsmedizin Berlin", "state": "BE"},
# ===== BRANDENBURG =====
{"url": "https://www.uni-potsdam.de", "name": "Universität Potsdam", "state": "BB"},
{"url": "https://www.b-tu.de", "name": "Brandenburgische Technische Universität Cottbus-Senftenberg", "state": "BB"},
{"url": "https://www.europa-uni.de", "name": "Europa-Universität Viadrina Frankfurt (Oder)", "state": "BB"},
# ===== BREMEN =====
{"url": "https://www.uni-bremen.de", "name": "Universität Bremen", "state": "HB"},
{"url": "https://www.jacobs-university.de", "name": "Jacobs University Bremen", "state": "HB"},
# ===== HAMBURG =====
{"url": "https://www.uni-hamburg.de", "name": "Universität Hamburg", "state": "HH"},
{"url": "https://www.tuhh.de", "name": "Technische Universität Hamburg", "state": "HH"},
{"url": "https://www.hsu-hh.de", "name": "Helmut-Schmidt-Universität Hamburg", "state": "HH"},
{"url": "https://www.hcu-hamburg.de", "name": "HafenCity Universität Hamburg", "state": "HH"},
# ===== HESSEN =====
{"url": "https://www.uni-frankfurt.de", "name": "Goethe-Universität Frankfurt am Main", "state": "HE"},
{"url": "https://www.tu-darmstadt.de", "name": "Technische Universität Darmstadt", "state": "HE"},
{"url": "https://www.uni-giessen.de", "name": "Justus-Liebig-Universität Gießen", "state": "HE"},
{"url": "https://www.uni-marburg.de", "name": "Philipps-Universität Marburg", "state": "HE"},
{"url": "https://www.uni-kassel.de", "name": "Universität Kassel", "state": "HE"},
# ===== MECKLENBURG-VORPOMMERN =====
{"url": "https://www.uni-rostock.de", "name": "Universität Rostock", "state": "MV"},
{"url": "https://www.uni-greifswald.de", "name": "Universität Greifswald", "state": "MV"},
# ===== NIEDERSACHSEN =====
{"url": "https://www.uni-goettingen.de", "name": "Georg-August-Universität Göttingen", "state": "NI"},
{"url": "https://www.uni-hannover.de", "name": "Leibniz Universität Hannover", "state": "NI"},
{"url": "https://www.tu-braunschweig.de", "name": "Technische Universität Braunschweig", "state": "NI"},
{"url": "https://www.tu-clausthal.de", "name": "Technische Universität Clausthal", "state": "NI"},
{"url": "https://www.uni-oldenburg.de", "name": "Carl von Ossietzky Universität Oldenburg", "state": "NI"},
{"url": "https://www.uni-osnabrueck.de", "name": "Universität Osnabrück", "state": "NI"},
{"url": "https://www.uni-hildesheim.de", "name": "Universität Hildesheim", "state": "NI"},
{"url": "https://www.leuphana.de", "name": "Leuphana Universität Lüneburg", "state": "NI"},
{"url": "https://www.uni-vechta.de", "name": "Universität Vechta", "state": "NI"},
{"url": "https://www.mh-hannover.de", "name": "Medizinische Hochschule Hannover", "state": "NI"},
{"url": "https://www.tiho-hannover.de", "name": "Stiftung Tierärztliche Hochschule Hannover", "state": "NI"},
# ===== NORDRHEIN-WESTFALEN =====
{"url": "https://www.uni-koeln.de", "name": "Universität zu Köln", "state": "NW"},
{"url": "https://www.uni-bonn.de", "name": "Rheinische Friedrich-Wilhelms-Universität Bonn", "state": "NW"},
{"url": "https://www.uni-muenster.de", "name": "Westfälische Wilhelms-Universität Münster", "state": "NW"},
{"url": "https://www.rwth-aachen.de", "name": "RWTH Aachen", "state": "NW"},
{"url": "https://www.tu-dortmund.de", "name": "Technische Universität Dortmund", "state": "NW"},
{"url": "https://www.ruhr-uni-bochum.de", "name": "Ruhr-Universität Bochum", "state": "NW"},
{"url": "https://www.uni-due.de", "name": "Universität Duisburg-Essen", "state": "NW"},
{"url": "https://www.hhu.de", "name": "Heinrich-Heine-Universität Düsseldorf", "state": "NW"},
{"url": "https://www.uni-bielefeld.de", "name": "Universität Bielefeld", "state": "NW"},
{"url": "https://www.uni-paderborn.de", "name": "Universität Paderborn", "state": "NW"},
{"url": "https://www.uni-siegen.de", "name": "Universität Siegen", "state": "NW"},
{"url": "https://www.uni-wuppertal.de", "name": "Bergische Universität Wuppertal", "state": "NW"},
{"url": "https://www.fernuni-hagen.de", "name": "FernUniversität in Hagen", "state": "NW"},
{"url": "https://www.dshs-koeln.de", "name": "Deutsche Sporthochschule Köln", "state": "NW"},
# ===== RHEINLAND-PFALZ =====
{"url": "https://www.uni-mainz.de", "name": "Johannes Gutenberg-Universität Mainz", "state": "RP"},
{"url": "https://www.uni-trier.de", "name": "Universität Trier", "state": "RP"},
{"url": "https://www.uni-koblenz.de", "name": "Universität Koblenz", "state": "RP"},
{"url": "https://rptu.de", "name": "RPTU Kaiserslautern-Landau", "state": "RP"},
# ===== SAARLAND =====
{"url": "https://www.uni-saarland.de", "name": "Universität des Saarlandes", "state": "SL"},
# ===== SACHSEN =====
{"url": "https://www.tu-dresden.de", "name": "Technische Universität Dresden", "state": "SN"},
{"url": "https://www.uni-leipzig.de", "name": "Universität Leipzig", "state": "SN"},
{"url": "https://www.tu-chemnitz.de", "name": "Technische Universität Chemnitz", "state": "SN"},
{"url": "https://tu-freiberg.de", "name": "TU Bergakademie Freiberg", "state": "SN"},
# ===== SACHSEN-ANHALT =====
{"url": "https://www.uni-halle.de", "name": "Martin-Luther-Universität Halle-Wittenberg", "state": "ST"},
{"url": "https://www.ovgu.de", "name": "Otto-von-Guericke-Universität Magdeburg", "state": "ST"},
# ===== SCHLESWIG-HOLSTEIN =====
{"url": "https://www.uni-kiel.de", "name": "Christian-Albrechts-Universität zu Kiel", "state": "SH"},
{"url": "https://www.uni-luebeck.de", "name": "Universität zu Lübeck", "state": "SH"},
{"url": "https://www.uni-flensburg.de", "name": "Europa-Universität Flensburg", "state": "SH"},
# ===== THÜRINGEN =====
{"url": "https://www.uni-jena.de", "name": "Friedrich-Schiller-Universität Jena", "state": "TH"},
{"url": "https://www.tu-ilmenau.de", "name": "Technische Universität Ilmenau", "state": "TH"},
{"url": "https://www.uni-weimar.de", "name": "Bauhaus-Universität Weimar", "state": "TH"},
{"url": "https://www.uni-erfurt.de", "name": "Universität Erfurt", "state": "TH"},
]
# =============================================================================
# FACHHOCHSCHULEN / HOCHSCHULEN FÜR ANGEWANDTE WISSENSCHAFTEN (HAW)
# =============================================================================
FACHHOCHSCHULEN = [
# ===== BADEN-WÜRTTEMBERG =====
{"url": "https://www.hs-aalen.de", "name": "Hochschule Aalen", "state": "BW"},
{"url": "https://www.hs-albsig.de", "name": "Hochschule Albstadt-Sigmaringen", "state": "BW"},
{"url": "https://www.hs-biberach.de", "name": "Hochschule Biberach", "state": "BW"},
{"url": "https://www.hs-esslingen.de", "name": "Hochschule Esslingen", "state": "BW"},
{"url": "https://www.hs-furtwangen.de", "name": "Hochschule Furtwangen", "state": "BW"},
{"url": "https://www.hs-heilbronn.de", "name": "Hochschule Heilbronn", "state": "BW"},
{"url": "https://www.hka.de", "name": "Hochschule Karlsruhe", "state": "BW"},
{"url": "https://www.hs-kehl.de", "name": "Hochschule Kehl", "state": "BW"},
{"url": "https://www.htwg-konstanz.de", "name": "HTWG Konstanz", "state": "BW"},
{"url": "https://www.hs-ludwigsburg.de", "name": "Hochschule Ludwigsburg", "state": "BW"},
{"url": "https://www.hs-mannheim.de", "name": "Hochschule Mannheim", "state": "BW"},
{"url": "https://www.hdm-stuttgart.de", "name": "Hochschule der Medien Stuttgart", "state": "BW"},
{"url": "https://www.hs-nueringen-geislingen.de", "name": "Hochschule für Wirtschaft und Umwelt Nürtingen-Geislingen", "state": "BW"},
{"url": "https://www.hs-offenburg.de", "name": "Hochschule Offenburg", "state": "BW"},
{"url": "https://www.hs-pforzheim.de", "name": "Hochschule Pforzheim", "state": "BW"},
{"url": "https://www.hs-ravensburg-weingarten.de", "name": "RWU Ravensburg-Weingarten", "state": "BW"},
{"url": "https://www.reutlingen-university.de", "name": "Hochschule Reutlingen", "state": "BW"},
{"url": "https://www.hs-rottenburg.de", "name": "Hochschule für Forstwirtschaft Rottenburg", "state": "BW"},
{"url": "https://www.hft-stuttgart.de", "name": "Hochschule für Technik Stuttgart", "state": "BW"},
{"url": "https://www.hs-ulm.de", "name": "Technische Hochschule Ulm", "state": "BW"},
{"url": "https://www.dhbw.de", "name": "Duale Hochschule Baden-Württemberg", "state": "BW"},
# ===== BAYERN =====
{"url": "https://www.hm.edu", "name": "Hochschule München", "state": "BY"},
{"url": "https://www.oth-regensburg.de", "name": "OTH Regensburg", "state": "BY"},
{"url": "https://www.th-nuernberg.de", "name": "Technische Hochschule Nürnberg", "state": "BY"},
{"url": "https://www.th-deg.de", "name": "Technische Hochschule Deggendorf", "state": "BY"},
{"url": "https://www.haw-landshut.de", "name": "Hochschule Landshut", "state": "BY"},
{"url": "https://www.hs-kempten.de", "name": "Hochschule Kempten", "state": "BY"},
{"url": "https://www.hs-coburg.de", "name": "Hochschule Coburg", "state": "BY"},
{"url": "https://www.hs-ansbach.de", "name": "Hochschule Ansbach", "state": "BY"},
{"url": "https://www.hs-augsburg.de", "name": "Hochschule Augsburg", "state": "BY"},
{"url": "https://www.th-ab.de", "name": "Technische Hochschule Aschaffenburg", "state": "BY"},
{"url": "https://www.oth-aw.de", "name": "OTH Amberg-Weiden", "state": "BY"},
{"url": "https://www.hswt.de", "name": "Hochschule Weihenstephan-Triesdorf", "state": "BY"},
{"url": "https://www.th-rosenheim.de", "name": "Technische Hochschule Rosenheim", "state": "BY"},
{"url": "https://www.fhws.de", "name": "Hochschule für angewandte Wissenschaften Würzburg-Schweinfurt", "state": "BY"},
{"url": "https://www.hs-neu-ulm.de", "name": "Hochschule Neu-Ulm", "state": "BY"},
{"url": "https://www.th-ingolstadt.de", "name": "Technische Hochschule Ingolstadt", "state": "BY"},
{"url": "https://www.iubh.de", "name": "IU Internationale Hochschule", "state": "BY"},
# ===== BERLIN =====
{"url": "https://www.htw-berlin.de", "name": "HTW Berlin", "state": "BE"},
{"url": "https://www.bht-berlin.de", "name": "Berliner Hochschule für Technik", "state": "BE"},
{"url": "https://www.hwr-berlin.de", "name": "HWR Berlin", "state": "BE"},
{"url": "https://www.ash-berlin.eu", "name": "Alice Salomon Hochschule Berlin", "state": "BE"},
{"url": "https://www.khb.hfm-berlin.de", "name": "Kunsthochschule Berlin-Weißensee", "state": "BE"},
{"url": "https://www.hfpv-berlin.de", "name": "HfPV - Hochschule für Polizei und öffentliche Verwaltung NRW", "state": "BE"},
# ===== BRANDENBURG =====
{"url": "https://www.th-wildau.de", "name": "Technische Hochschule Wildau", "state": "BB"},
{"url": "https://www.fh-potsdam.de", "name": "Fachhochschule Potsdam", "state": "BB"},
{"url": "https://www.th-brandenburg.de", "name": "Technische Hochschule Brandenburg", "state": "BB"},
{"url": "https://www.hnee.de", "name": "Hochschule für nachhaltige Entwicklung Eberswalde", "state": "BB"},
# ===== BREMEN =====
{"url": "https://www.hs-bremen.de", "name": "Hochschule Bremen", "state": "HB"},
{"url": "https://www.hs-bremerhaven.de", "name": "Hochschule Bremerhaven", "state": "HB"},
# ===== HAMBURG =====
{"url": "https://www.haw-hamburg.de", "name": "HAW Hamburg", "state": "HH"},
{"url": "https://www.fh-wedel.de", "name": "Fachhochschule Wedel", "state": "HH"},
# ===== HESSEN =====
{"url": "https://www.h-da.de", "name": "Hochschule Darmstadt", "state": "HE"},
{"url": "https://www.thm.de", "name": "Technische Hochschule Mittelhessen", "state": "HE"},
{"url": "https://www.frankfurt-university.de", "name": "Frankfurt University of Applied Sciences", "state": "HE"},
{"url": "https://www.hs-fulda.de", "name": "Hochschule Fulda", "state": "HE"},
{"url": "https://www.hs-rm.de", "name": "Hochschule RheinMain", "state": "HE"},
# ===== MECKLENBURG-VORPOMMERN =====
{"url": "https://www.hs-wismar.de", "name": "Hochschule Wismar", "state": "MV"},
{"url": "https://www.hs-nb.de", "name": "Hochschule Neubrandenburg", "state": "MV"},
{"url": "https://www.hs-stralsund.de", "name": "Hochschule Stralsund", "state": "MV"},
# ===== NIEDERSACHSEN =====
{"url": "https://www.hs-hannover.de", "name": "Hochschule Hannover", "state": "NI"},
{"url": "https://www.ostfalia.de", "name": "Ostfalia Hochschule", "state": "NI"},
{"url": "https://www.jade-hs.de", "name": "Jade Hochschule", "state": "NI"},
{"url": "https://www.hs-osnabrueck.de", "name": "Hochschule Osnabrück", "state": "NI"},
{"url": "https://www.hawk.de", "name": "HAWK Hildesheim/Holzminden/Göttingen", "state": "NI"},
{"url": "https://www.hs-emden-leer.de", "name": "Hochschule Emden/Leer", "state": "NI"},
# ===== NORDRHEIN-WESTFALEN =====
{"url": "https://www.th-koeln.de", "name": "TH Köln", "state": "NW"},
{"url": "https://www.fh-dortmund.de", "name": "Fachhochschule Dortmund", "state": "NW"},
{"url": "https://www.fh-aachen.de", "name": "FH Aachen", "state": "NW"},
{"url": "https://www.hs-duesseldorf.de", "name": "Hochschule Düsseldorf", "state": "NW"},
{"url": "https://www.hs-niederrhein.de", "name": "Hochschule Niederrhein", "state": "NW"},
{"url": "https://www.hs-bochum.de", "name": "Hochschule Bochum", "state": "NW"},
{"url": "https://www.hs-owl.de", "name": "Technische Hochschule Ostwestfalen-Lippe", "state": "NW"},
{"url": "https://www.w-hs.de", "name": "Westfälische Hochschule", "state": "NW"},
{"url": "https://www.fh-bielefeld.de", "name": "FH Bielefeld", "state": "NW"},
{"url": "https://www.hs-hamm-lippstadt.de", "name": "Hochschule Hamm-Lippstadt", "state": "NW"},
{"url": "https://www.fh-swf.de", "name": "Fachhochschule Südwestfalen", "state": "NW"},
{"url": "https://www.hsbi.de", "name": "Hochschule Bielefeld", "state": "NW"},
{"url": "https://www.hs-rhein-waal.de", "name": "Hochschule Rhein-Waal", "state": "NW"},
{"url": "https://www.hs-ruhrwest.de", "name": "Hochschule Ruhr West", "state": "NW"},
# ===== RHEINLAND-PFALZ =====
{"url": "https://www.hs-mainz.de", "name": "Hochschule Mainz", "state": "RP"},
{"url": "https://www.hs-koblenz.de", "name": "Hochschule Koblenz", "state": "RP"},
{"url": "https://www.hs-worms.de", "name": "Hochschule Worms", "state": "RP"},
{"url": "https://www.hs-trier.de", "name": "Hochschule Trier", "state": "RP"},
{"url": "https://www.hs-kaiserslautern.de", "name": "Hochschule Kaiserslautern", "state": "RP"},
{"url": "https://www.hs-lu.de", "name": "Hochschule für Wirtschaft und Gesellschaft Ludwigshafen", "state": "RP"},
{"url": "https://www.hs-bingen.de", "name": "Technische Hochschule Bingen", "state": "RP"},
# ===== SAARLAND =====
{"url": "https://www.htwsaar.de", "name": "htw saar", "state": "SL"},
# ===== SACHSEN =====
{"url": "https://www.htw-dresden.de", "name": "HTW Dresden", "state": "SN"},
{"url": "https://www.htwk-leipzig.de", "name": "HTWK Leipzig", "state": "SN"},
{"url": "https://www.hs-mittweida.de", "name": "Hochschule Mittweida", "state": "SN"},
{"url": "https://www.fh-zwickau.de", "name": "Westsächsische Hochschule Zwickau", "state": "SN"},
{"url": "https://www.hs-zittau-goerlitz.de", "name": "Hochschule Zittau/Görlitz", "state": "SN"},
# ===== SACHSEN-ANHALT =====
{"url": "https://www.hs-magdeburg.de", "name": "Hochschule Magdeburg-Stendal", "state": "ST"},
{"url": "https://www.hs-harz.de", "name": "Hochschule Harz", "state": "ST"},
{"url": "https://www.hs-merseburg.de", "name": "Hochschule Merseburg", "state": "ST"},
{"url": "https://www.hs-anhalt.de", "name": "Hochschule Anhalt", "state": "ST"},
# ===== SCHLESWIG-HOLSTEIN =====
{"url": "https://www.fh-kiel.de", "name": "Fachhochschule Kiel", "state": "SH"},
{"url": "https://www.fh-westkueste.de", "name": "Fachhochschule Westküste", "state": "SH"},
{"url": "https://www.th-luebeck.de", "name": "Technische Hochschule Lübeck", "state": "SH"},
{"url": "https://www.fh-flensburg.de", "name": "Hochschule Flensburg", "state": "SH"},
# ===== THÜRINGEN =====
{"url": "https://www.fh-erfurt.de", "name": "Fachhochschule Erfurt", "state": "TH"},
{"url": "https://www.eah-jena.de", "name": "Ernst-Abbe-Hochschule Jena", "state": "TH"},
{"url": "https://www.hs-schmalkalden.de", "name": "Hochschule Schmalkalden", "state": "TH"},
{"url": "https://www.hs-nordhausen.de", "name": "Hochschule Nordhausen", "state": "TH"},
]
# =============================================================================
# PÄDAGOGISCHE HOCHSCHULEN (nur Baden-Württemberg)
# =============================================================================
PAEDAGOGISCHE_HOCHSCHULEN = [
{"url": "https://www.ph-freiburg.de", "name": "Pädagogische Hochschule Freiburg", "state": "BW"},
{"url": "https://www.ph-heidelberg.de", "name": "Pädagogische Hochschule Heidelberg", "state": "BW"},
{"url": "https://www.ph-karlsruhe.de", "name": "Pädagogische Hochschule Karlsruhe", "state": "BW"},
{"url": "https://www.ph-ludwigsburg.de", "name": "Pädagogische Hochschule Ludwigsburg", "state": "BW"},
{"url": "https://www.ph-schwäbisch-gmünd.de", "name": "Pädagogische Hochschule Schwäbisch Gmünd", "state": "BW"},
{"url": "https://www.ph-weingarten.de", "name": "Pädagogische Hochschule Weingarten", "state": "BW"},
]
# =============================================================================
# KUNST- UND MUSIKHOCHSCHULEN
# =============================================================================
KUNSTHOCHSCHULEN = [
# ===== BADEN-WÜRTTEMBERG =====
{"url": "https://www.abk-stuttgart.de", "name": "Staatliche Akademie der Bildenden Künste Stuttgart", "state": "BW"},
{"url": "https://www.hfg-karlsruhe.de", "name": "Staatliche Hochschule für Gestaltung Karlsruhe", "state": "BW"},
{"url": "https://www.mh-freiburg.de", "name": "Hochschule für Musik Freiburg", "state": "BW"},
{"url": "https://www.hmdk-stuttgart.de", "name": "Hochschule für Musik und Darstellende Kunst Stuttgart", "state": "BW"},
{"url": "https://www.hfm-karlsruhe.de", "name": "Hochschule für Musik Karlsruhe", "state": "BW"},
{"url": "https://www.hfm-trossingen.de", "name": "Hochschule für Musik Trossingen", "state": "BW"},
{"url": "https://www.filmakademie.de", "name": "Filmakademie Baden-Württemberg", "state": "BW"},
{"url": "https://www.popakademie.de", "name": "Popakademie Baden-Württemberg", "state": "BW"},
# ===== BAYERN =====
{"url": "https://www.adbk.de", "name": "Akademie der Bildenden Künste München", "state": "BY"},
{"url": "https://www.adbk-nuernberg.de", "name": "Akademie der Bildenden Künste Nürnberg", "state": "BY"},
{"url": "https://www.hmtm.de", "name": "Hochschule für Musik und Theater München", "state": "BY"},
{"url": "https://www.hfm-wuerzburg.de", "name": "Hochschule für Musik Würzburg", "state": "BY"},
{"url": "https://www.hff-muenchen.de", "name": "Hochschule für Fernsehen und Film München", "state": "BY"},
# ===== BERLIN =====
{"url": "https://www.udk-berlin.de", "name": "Universität der Künste Berlin", "state": "BE"},
{"url": "https://www.hfm-berlin.de", "name": "Hochschule für Musik Hanns Eisler Berlin", "state": "BE"},
{"url": "https://www.dffb.de", "name": "Deutsche Film- und Fernsehakademie Berlin", "state": "BE"},
# ===== HAMBURG =====
{"url": "https://www.hfbk-hamburg.de", "name": "Hochschule für bildende Künste Hamburg", "state": "HH"},
{"url": "https://www.hfmt-hamburg.de", "name": "Hochschule für Musik und Theater Hamburg", "state": "HH"},
# ===== HESSEN =====
{"url": "https://www.hfg-offenbach.de", "name": "Hochschule für Gestaltung Offenbach", "state": "HE"},
{"url": "https://www.hfmdk-frankfurt.de", "name": "Hochschule für Musik und Darstellende Kunst Frankfurt", "state": "HE"},
# ===== NORDRHEIN-WESTFALEN =====
{"url": "https://www.kunstakademie-duesseldorf.de", "name": "Kunstakademie Düsseldorf", "state": "NW"},
{"url": "https://www.kunstakademie-muenster.de", "name": "Kunstakademie Münster", "state": "NW"},
{"url": "https://www.hfmt-koeln.de", "name": "Hochschule für Musik und Tanz Köln", "state": "NW"},
{"url": "https://www.folkwang-uni.de", "name": "Folkwang Universität der Künste", "state": "NW"},
{"url": "https://www.rsh-duesseldorf.de", "name": "Robert Schumann Hochschule Düsseldorf", "state": "NW"},
{"url": "https://www.hfm-detmold.de", "name": "Hochschule für Musik Detmold", "state": "NW"},
# ===== SACHSEN =====
{"url": "https://www.hfbk-dresden.de", "name": "Hochschule für Bildende Künste Dresden", "state": "SN"},
{"url": "https://www.hmt-leipzig.de", "name": "Hochschule für Musik und Theater Leipzig", "state": "SN"},
{"url": "https://www.hfmdd.de", "name": "Hochschule für Musik Carl Maria von Weber Dresden", "state": "SN"},
{"url": "https://www.palucca.eu", "name": "Palucca Hochschule für Tanz Dresden", "state": "SN"},
# Other states
{"url": "https://www.hfk-bremen.de", "name": "Hochschule für Künste Bremen", "state": "HB"},
{"url": "https://www.burg-halle.de", "name": "Burg Giebichenstein Kunsthochschule Halle", "state": "ST"},
{"url": "https://www.hmtm-hannover.de", "name": "Hochschule für Musik, Theater und Medien Hannover", "state": "NI"},
{"url": "https://www.hfk-bremen.de", "name": "Hochschule für Künste Bremen", "state": "HB"},
{"url": "https://www.muho-mannheim.de", "name": "Hochschule für Musik und Darstellende Kunst Mannheim", "state": "BW"},
{"url": "https://www.hfm-saar.de", "name": "Hochschule für Musik Saar", "state": "SL"},
{"url": "https://www.hfm-weimar.de", "name": "Hochschule für Musik Franz Liszt Weimar", "state": "TH"},
{"url": "https://www.mh-luebeck.de", "name": "Musikhochschule Lübeck", "state": "SH"},
{"url": "https://www.hmt-rostock.de", "name": "Hochschule für Musik und Theater Rostock", "state": "MV"},
]
# =============================================================================
# PRIVATE HOCHSCHULEN (Auswahl der wichtigsten)
# =============================================================================
PRIVATE_HOCHSCHULEN = [
{"url": "https://www.srh.de", "name": "SRH Hochschule", "state": "BW"},
{"url": "https://www.escp.eu", "name": "ESCP Business School Berlin", "state": "BE"},
{"url": "https://www.hertie-school.org", "name": "Hertie School", "state": "BE"},
{"url": "https://www.steinbeis-hochschule.de", "name": "Steinbeis Hochschule", "state": "BE"},
{"url": "https://www.code.berlin", "name": "CODE University of Applied Sciences", "state": "BE"},
{"url": "https://www.whu.edu", "name": "WHU Otto Beisheim School of Management", "state": "RP"},
{"url": "https://www.ebs.edu", "name": "EBS Universität", "state": "HE"},
{"url": "https://www.fom.de", "name": "FOM Hochschule", "state": "NW"},
{"url": "https://www.macromedia-fachhochschule.de", "name": "Hochschule Macromedia", "state": "BY"},
{"url": "https://www.ism.de", "name": "International School of Management", "state": "NW"},
{"url": "https://www.hhl.de", "name": "HHL Leipzig Graduate School of Management", "state": "SN"},
{"url": "https://www.fs.de", "name": "Frankfurt School of Finance & Management", "state": "HE"},
{"url": "https://www.bits-iserlohn.de", "name": "BiTS Hochschule", "state": "NW"},
{"url": "https://www.umit.at", "name": "UMIT - Private Universität für Gesundheitswissenschaften", "state": "BY"},
{"url": "https://www.bucerius-law-school.de", "name": "Bucerius Law School", "state": "HH"},
{"url": "https://www.akad.de", "name": "AKAD Hochschule Stuttgart", "state": "BW"},
{"url": "https://www.diploma.de", "name": "DIPLOMA Hochschule", "state": "HE"},
{"url": "https://www.apollon-hochschule.de", "name": "APOLLON Hochschule", "state": "HB"},
{"url": "https://www.euro-fh.de", "name": "Euro-FH Hamburg", "state": "HH"},
{"url": "https://www.wings.de", "name": "WINGS Fernstudium", "state": "MV"},
]
def generate_seed(uni: dict, category: str, source_type: str, trust_boost: float) -> dict:
"""Generate a seed entry from university data."""
return {
"url": uni["url"],
"name": uni["name"],
"description": f"Deutsche Hochschule - {uni['name']}",
"category_name": category, # API expects category_name, not category
"trust_boost": trust_boost,
"source_type": source_type,
"scope": "STATE",
"state": uni.get("state", "")
}
def get_all_university_seeds() -> list:
"""Get all university seeds formatted for API."""
seeds = []
# Universitäten - höchster Trust
for uni in UNIVERSITAETEN:
seeds.append(generate_seed(uni, "universities", "UNI", 0.90))
# Fachhochschulen
for uni in FACHHOCHSCHULEN:
seeds.append(generate_seed(uni, "universities", "FH", 0.85))
# Pädagogische Hochschulen - sehr relevant für Bildung
for uni in PAEDAGOGISCHE_HOCHSCHULEN:
seeds.append(generate_seed(uni, "universities", "PH", 0.92))
# Kunst- und Musikhochschulen
for uni in KUNSTHOCHSCHULEN:
seeds.append(generate_seed(uni, "universities", "KUNST", 0.80))
# Private Hochschulen
for uni in PRIVATE_HOCHSCHULEN:
seeds.append(generate_seed(uni, "universities", "PRIVATE", 0.75))
return seeds
async def load_university_seeds():
"""Load university seeds via bulk import API."""
seeds = get_all_university_seeds()
print(f"Loading {len(seeds)} university seeds into {API_BASE}...")
print(f"\nBreakdown:")
print(f" - Universitäten: {len(UNIVERSITAETEN)}")
print(f" - Fachhochschulen: {len(FACHHOCHSCHULEN)}")
print(f" - Pädagogische Hochschulen: {len(PAEDAGOGISCHE_HOCHSCHULEN)}")
print(f" - Kunst-/Musikhochschulen: {len(KUNSTHOCHSCHULEN)}")
print(f" - Private Hochschulen: {len(PRIVATE_HOCHSCHULEN)}")
print(f" - TOTAL: {len(seeds)}")
# Count by state
by_state = {}
for seed in seeds:
state = seed.get("state", "unknown")
by_state[state] = by_state.get(state, 0) + 1
print(f"\nBy state:")
for state, count in sorted(by_state.items()):
print(f" - {state}: {count}")
try:
async with httpx.AsyncClient(timeout=120.0) as client:
# Check API health first
try:
health_response = await client.get(f"{API_BASE}/health")
if health_response.status_code != 200:
print(f"WARNING: Health check returned {health_response.status_code}")
except httpx.ConnectError:
print(f"ERROR: Cannot connect to API at {API_BASE}")
print("Make sure the backend service is running:")
print(" docker compose up -d backend")
return False
# Import seeds in batches to avoid timeout
batch_size = 100
total_imported = 0
total_skipped = 0
all_errors = []
for i in range(0, len(seeds), batch_size):
batch = seeds[i:i + batch_size]
print(f"\nImporting batch {i // batch_size + 1} ({len(batch)} seeds)...")
response = await client.post(
f"{API_BASE}/v1/edu-search/seeds/bulk-import",
json={"seeds": batch}
)
if response.status_code == 200:
result = response.json()
imported = result.get('imported', 0)
skipped = result.get('skipped', 0)
errors = result.get('errors', [])
total_imported += imported
total_skipped += skipped
all_errors.extend(errors)
print(f" Imported: {imported}, Skipped: {skipped}")
else:
print(f" ERROR: Batch failed with status {response.status_code}")
try:
error_detail = response.json()
print(f" Detail: {error_detail.get('detail', response.text[:200])}")
except Exception:
print(f" Response: {response.text[:200]}")
print(f"\n{'='*50}")
print(f"TOTAL RESULTS:")
print(f" Imported: {total_imported}")
print(f" Skipped (duplicates): {total_skipped}")
if all_errors:
print(f" Errors: {len(all_errors)}")
for err in all_errors[:10]:
print(f" - {err}")
if len(all_errors) > 10:
print(f" ... and {len(all_errors) - 10} more")
# Get stats
print("\nFetching statistics...")
stats_response = await client.get(f"{API_BASE}/v1/edu-search/stats")
if stats_response.status_code == 200:
stats = stats_response.json()
print(f"\nDatabase Statistics:")
print(f" Total seeds: {stats.get('total_seeds', 0)}")
print(f" Enabled seeds: {stats.get('enabled_seeds', 0)}")
print("\nDone!")
return True
except httpx.ConnectError as e:
print(f"\nERROR: Connection failed - {e}")
return False
except httpx.TimeoutException:
print(f"\nERROR: Request timed out")
return False
except Exception as e:
print(f"\nERROR: Unexpected error - {e}")
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
import sys
print("="*60)
print("German University Seeds Loader")
print("="*60)
success = asyncio.run(load_university_seeds())
sys.exit(0 if success else 1)

View File

@@ -0,0 +1,314 @@
#!/usr/bin/env python3
"""
Test script for Compliance AI API Endpoints.
Usage:
python scripts/test_compliance_ai_endpoints.py
Environment:
BACKEND_URL: Base URL of the backend (default: http://localhost:8000)
COMPLIANCE_LLM_PROVIDER: Set to "mock" for testing without API keys
"""
import asyncio
import os
import sys
from typing import Dict, Any
import httpx
class ComplianceAITester:
"""Tester for Compliance AI endpoints."""
def __init__(self, base_url: str = "http://localhost:8000"):
self.base_url = base_url.rstrip("/")
self.api_prefix = f"{self.base_url}/api/v1/compliance"
async def test_ai_status(self) -> Dict[str, Any]:
"""Test GET /ai/status endpoint."""
print("\n=== Testing AI Status ===")
async with httpx.AsyncClient() as client:
response = await client.get(f"{self.api_prefix}/ai/status")
response.raise_for_status()
data = response.json()
print(f"Provider: {data['provider']}")
print(f"Model: {data['model']}")
print(f"Available: {data['is_available']}")
print(f"Is Mock: {data['is_mock']}")
if data.get("error"):
print(f"Error: {data['error']}")
return data
async def test_interpret_requirement(self, requirement_id: str) -> Dict[str, Any]:
"""Test POST /ai/interpret endpoint."""
print(f"\n=== Testing Requirement Interpretation ===")
print(f"Requirement ID: {requirement_id}")
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.post(
f"{self.api_prefix}/ai/interpret",
json={
"requirement_id": requirement_id,
"force_refresh": False
}
)
if response.status_code == 404:
print(f"ERROR: Requirement {requirement_id} not found")
return {}
response.raise_for_status()
data = response.json()
print(f"\nSummary: {data['summary'][:100]}...")
print(f"Applicability: {data['applicability'][:100]}...")
print(f"Risk Level: {data['risk_level']}")
print(f"Affected Modules: {', '.join(data['affected_modules'])}")
print(f"Technical Measures: {len(data['technical_measures'])} measures")
print(f"Confidence: {data['confidence_score']:.2f}")
if data.get("error"):
print(f"Error: {data['error']}")
return data
async def test_suggest_controls(self, requirement_id: str) -> Dict[str, Any]:
"""Test POST /ai/suggest-controls endpoint."""
print(f"\n=== Testing Control Suggestions ===")
print(f"Requirement ID: {requirement_id}")
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.post(
f"{self.api_prefix}/ai/suggest-controls",
json={"requirement_id": requirement_id}
)
if response.status_code == 404:
print(f"ERROR: Requirement {requirement_id} not found")
return {}
response.raise_for_status()
data = response.json()
print(f"\nFound {len(data['suggestions'])} control suggestions:")
for i, ctrl in enumerate(data['suggestions'], 1):
print(f"\n{i}. {ctrl['control_id']}: {ctrl['title']}")
print(f" Domain: {ctrl['domain']}")
print(f" Priority: {ctrl['priority']}")
print(f" Automated: {ctrl['is_automated']}")
if ctrl['automation_tool']:
print(f" Tool: {ctrl['automation_tool']}")
print(f" Confidence: {ctrl['confidence_score']:.2f}")
return data
async def test_assess_module_risk(self, module_id: str) -> Dict[str, Any]:
"""Test POST /ai/assess-risk endpoint."""
print(f"\n=== Testing Module Risk Assessment ===")
print(f"Module ID: {module_id}")
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.post(
f"{self.api_prefix}/ai/assess-risk",
json={"module_id": module_id}
)
if response.status_code == 404:
print(f"ERROR: Module {module_id} not found")
return {}
response.raise_for_status()
data = response.json()
print(f"\nModule: {data['module_name']}")
print(f"Overall Risk: {data['overall_risk']}")
print(f"\nRisk Factors:")
for factor in data['risk_factors']:
print(f" - {factor['factor']}")
print(f" Severity: {factor['severity']}, Likelihood: {factor['likelihood']}")
print(f"\nRecommendations:")
for rec in data['recommendations']:
print(f" - {rec}")
print(f"\nCompliance Gaps:")
for gap in data['compliance_gaps']:
print(f" - {gap}")
print(f"\nConfidence: {data['confidence_score']:.2f}")
return data
async def test_gap_analysis(self, requirement_id: str) -> Dict[str, Any]:
"""Test POST /ai/gap-analysis endpoint."""
print(f"\n=== Testing Gap Analysis ===")
print(f"Requirement ID: {requirement_id}")
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.post(
f"{self.api_prefix}/ai/gap-analysis",
json={"requirement_id": requirement_id}
)
if response.status_code == 404:
print(f"ERROR: Requirement {requirement_id} not found")
return {}
response.raise_for_status()
data = response.json()
print(f"\nRequirement: {data['requirement_title']}")
print(f"Coverage Level: {data['coverage_level']}")
print(f"\nExisting Controls:")
for ctrl in data['existing_controls']:
print(f" - {ctrl}")
print(f"\nMissing Coverage:")
for missing in data['missing_coverage']:
print(f" - {missing}")
print(f"\nSuggested Actions:")
for action in data['suggested_actions']:
print(f" - {action}")
return data
async def test_batch_interpret(self, requirement_ids: list) -> Dict[str, Any]:
"""Test POST /ai/batch-interpret endpoint."""
print(f"\n=== Testing Batch Interpretation ===")
print(f"Requirements: {len(requirement_ids)}")
async with httpx.AsyncClient(timeout=120.0) as client:
response = await client.post(
f"{self.api_prefix}/ai/batch-interpret",
json={
"requirement_ids": requirement_ids,
"rate_limit": 1.0
}
)
response.raise_for_status()
data = response.json()
print(f"\nTotal: {data['total']}")
print(f"Processed: {data['processed']}")
print(f"Success Rate: {data['processed']/data['total']*100:.1f}%")
if data['interpretations']:
print(f"\nFirst interpretation:")
first = data['interpretations'][0]
print(f" ID: {first['requirement_id']}")
print(f" Summary: {first['summary'][:100]}...")
print(f" Risk: {first['risk_level']}")
return data
async def get_sample_requirement_id(self) -> str:
"""Get a sample requirement ID from the database."""
async with httpx.AsyncClient() as client:
# Try to get requirements
response = await client.get(f"{self.api_prefix}/requirements?limit=1")
if response.status_code == 200:
data = response.json()
if data["requirements"]:
return data["requirements"][0]["id"]
return None
async def get_sample_module_id(self) -> str:
"""Get a sample module ID from the database."""
async with httpx.AsyncClient() as client:
# Try to get modules
response = await client.get(f"{self.api_prefix}/modules")
if response.status_code == 200:
data = response.json()
if data["modules"]:
return data["modules"][0]["id"]
return None
async def run_all_tests(self):
"""Run all endpoint tests."""
print("=" * 70)
print("Compliance AI Endpoints Test Suite")
print("=" * 70)
# Test AI status first
try:
status = await self.test_ai_status()
if not status.get("is_available"):
print("\n⚠️ WARNING: AI provider is not available!")
print("Set COMPLIANCE_LLM_PROVIDER=mock for testing without API keys")
return
except Exception as e:
print(f"\n❌ ERROR: Could not connect to backend: {e}")
return
# Get sample IDs
print("\n--- Fetching sample data ---")
requirement_id = await self.get_sample_requirement_id()
module_id = await self.get_sample_module_id()
if not requirement_id:
print("\n⚠️ WARNING: No requirements found in database")
print("Run seed command first: POST /api/v1/compliance/seed")
return
print(f"Sample Requirement ID: {requirement_id}")
if module_id:
print(f"Sample Module ID: {module_id}")
# Run tests
tests = [
("Interpret Requirement", self.test_interpret_requirement(requirement_id)),
("Suggest Controls", self.test_suggest_controls(requirement_id)),
("Gap Analysis", self.test_gap_analysis(requirement_id)),
]
if module_id:
tests.append(("Assess Module Risk", self.test_assess_module_risk(module_id)))
# Execute tests
results = {"passed": 0, "failed": 0}
for test_name, test_coro in tests:
try:
await test_coro
results["passed"] += 1
print(f"\n{test_name} - PASSED")
except Exception as e:
results["failed"] += 1
print(f"\n{test_name} - FAILED: {e}")
# Summary
print("\n" + "=" * 70)
print("Test Summary")
print("=" * 70)
print(f"✅ Passed: {results['passed']}")
print(f"❌ Failed: {results['failed']}")
print(f"Total: {results['passed'] + results['failed']}")
print("=" * 70)
async def main():
"""Main entry point."""
backend_url = os.getenv("BACKEND_URL", "http://localhost:8000")
print(f"Backend URL: {backend_url}")
print(f"Provider: {os.getenv('COMPLIANCE_LLM_PROVIDER', 'default')}")
tester = ComplianceAITester(base_url=backend_url)
try:
await tester.run_all_tests()
except KeyboardInterrupt:
print("\n\nTest interrupted by user")
sys.exit(1)
if __name__ == "__main__":
asyncio.run(main())

141
backend/scripts/verify_sprint4.sh Executable file
View File

@@ -0,0 +1,141 @@
#!/bin/bash
# Verification script for Sprint 4 implementation
# Checks that all required components are present
echo "========================================"
echo "Sprint 4 - KI-Integration Verifikation"
echo "========================================"
echo ""
# Color codes
GREEN='\033[0;32m'
RED='\033[0;31m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
FAILED=0
PASSED=0
check_file() {
local file="$1"
local description="$2"
if [ -f "$file" ]; then
echo -e "${GREEN}${NC} $description"
echo "$file"
((PASSED++))
else
echo -e "${RED}${NC} $description"
echo "$file (NOT FOUND)"
((FAILED++))
fi
}
check_content() {
local file="$1"
local pattern="$2"
local description="$3"
if [ -f "$file" ] && grep -q "$pattern" "$file"; then
echo -e "${GREEN}${NC} $description"
((PASSED++))
else
echo -e "${RED}${NC} $description"
echo " → Pattern '$pattern' not found in $file"
((FAILED++))
fi
}
echo "1. Core Components"
echo "-------------------"
check_file "compliance/services/llm_provider.py" "LLM Provider Abstraction"
check_file "compliance/services/ai_compliance_assistant.py" "AI Compliance Assistant"
check_file "compliance/api/routes.py" "API Routes"
check_file "compliance/api/schemas.py" "API Schemas"
echo ""
echo "2. LLM Provider Classes"
echo "------------------------"
check_content "compliance/services/llm_provider.py" "class LLMProvider" "Abstrakte LLMProvider Klasse"
check_content "compliance/services/llm_provider.py" "class AnthropicProvider" "AnthropicProvider"
check_content "compliance/services/llm_provider.py" "class SelfHostedProvider" "SelfHostedProvider"
check_content "compliance/services/llm_provider.py" "class MockProvider" "MockProvider"
check_content "compliance/services/llm_provider.py" "def get_llm_provider" "get_llm_provider() Factory"
echo ""
echo "3. AI Assistant Methods"
echo "------------------------"
check_content "compliance/services/ai_compliance_assistant.py" "async def interpret_requirement" "interpret_requirement()"
check_content "compliance/services/ai_compliance_assistant.py" "async def suggest_controls" "suggest_controls()"
check_content "compliance/services/ai_compliance_assistant.py" "async def assess_module_risk" "assess_module_risk()"
check_content "compliance/services/ai_compliance_assistant.py" "async def analyze_gap" "analyze_gap()"
check_content "compliance/services/ai_compliance_assistant.py" "async def batch_interpret_requirements" "batch_interpret_requirements()"
echo ""
echo "4. API Endpoints"
echo "-----------------"
check_content "compliance/api/routes.py" "async def get_ai_status" "GET /ai/status"
check_content "compliance/api/routes.py" "async def interpret_requirement" "POST /ai/interpret"
check_content "compliance/api/routes.py" "async def suggest_controls" "POST /ai/suggest-controls"
check_content "compliance/api/routes.py" "async def assess_module_risk" "POST /ai/assess-risk"
check_content "compliance/api/routes.py" "async def analyze_gap" "POST /ai/gap-analysis"
check_content "compliance/api/routes.py" "async def batch_interpret_requirements" "POST /ai/batch-interpret"
echo ""
echo "5. Pydantic Schemas"
echo "--------------------"
check_content "compliance/api/schemas.py" "class AIStatusResponse" "AIStatusResponse"
check_content "compliance/api/schemas.py" "class AIInterpretationRequest" "AIInterpretationRequest"
check_content "compliance/api/schemas.py" "class AIInterpretationResponse" "AIInterpretationResponse"
check_content "compliance/api/schemas.py" "class AIControlSuggestionRequest" "AIControlSuggestionRequest"
check_content "compliance/api/schemas.py" "class AIControlSuggestionResponse" "AIControlSuggestionResponse"
check_content "compliance/api/schemas.py" "class AIRiskAssessmentRequest" "AIRiskAssessmentRequest"
check_content "compliance/api/schemas.py" "class AIRiskAssessmentResponse" "AIRiskAssessmentResponse"
check_content "compliance/api/schemas.py" "class AIGapAnalysisRequest" "AIGapAnalysisRequest"
check_content "compliance/api/schemas.py" "class AIGapAnalysisResponse" "AIGapAnalysisResponse"
echo ""
echo "6. Environment Variables"
echo "-------------------------"
check_content ".env.example" "COMPLIANCE_LLM_PROVIDER" "COMPLIANCE_LLM_PROVIDER"
check_content ".env.example" "ANTHROPIC_MODEL" "ANTHROPIC_MODEL"
check_content ".env.example" "SELF_HOSTED_LLM_URL" "SELF_HOSTED_LLM_URL"
check_content ".env.example" "COMPLIANCE_LLM_MAX_TOKENS" "COMPLIANCE_LLM_MAX_TOKENS"
echo ""
echo "7. Documentation"
echo "-----------------"
check_file "docs/compliance_ai_integration.md" "Vollständige Dokumentation"
check_file "compliance/README_AI.md" "Quick-Start Guide"
check_file "compliance/SPRINT_4_SUMMARY.md" "Sprint 4 Zusammenfassung"
echo ""
echo "8. Tests"
echo "---------"
check_file "tests/test_compliance_ai.py" "Unit Tests"
check_file "scripts/test_compliance_ai_endpoints.py" "Integration Test Script"
echo ""
echo "9. German Prompts"
echo "------------------"
check_content "compliance/services/ai_compliance_assistant.py" "Du bist ein Compliance-Experte" "Deutscher System Prompt"
check_content "compliance/services/ai_compliance_assistant.py" "Breakpilot" "Breakpilot-spezifisch"
check_content "compliance/services/ai_compliance_assistant.py" "EdTech" "EdTech Kontext"
echo ""
# Summary
echo "========================================"
echo "Zusammenfassung"
echo "========================================"
echo -e "${GREEN}Bestanden: $PASSED${NC}"
echo -e "${RED}Fehlgeschlagen: $FAILED${NC}"
echo ""
if [ $FAILED -eq 0 ]; then
echo -e "${GREEN}✓ Sprint 4 ist vollständig implementiert!${NC}"
exit 0
else
echo -e "${RED}✗ Es fehlen noch $FAILED Komponenten${NC}"
exit 1
fi