Some checks failed
Tests / Go Tests (push) Has been cancelled
Tests / Python Tests (push) Has been cancelled
Tests / Integration Tests (push) Has been cancelled
Tests / Go Lint (push) Has been cancelled
Tests / Python Lint (push) Has been cancelled
Tests / Security Scan (push) Has been cancelled
Tests / All Checks Passed (push) Has been cancelled
Security Scanning / Secret Scanning (push) Has been cancelled
Security Scanning / Dependency Vulnerability Scan (push) Has been cancelled
Security Scanning / Go Security Scan (push) Has been cancelled
Security Scanning / Python Security Scan (push) Has been cancelled
Security Scanning / Node.js Security Scan (push) Has been cancelled
Security Scanning / Docker Image Security (push) Has been cancelled
Security Scanning / Security Summary (push) Has been cancelled
CI/CD Pipeline / Go Tests (push) Has been cancelled
CI/CD Pipeline / Python Tests (push) Has been cancelled
CI/CD Pipeline / Website Tests (push) Has been cancelled
CI/CD Pipeline / Linting (push) Has been cancelled
CI/CD Pipeline / Security Scan (push) Has been cancelled
CI/CD Pipeline / Docker Build & Push (push) Has been cancelled
CI/CD Pipeline / Integration Tests (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / CI Summary (push) Has been cancelled
ci/woodpecker/manual/build-ci-image Pipeline was successful
ci/woodpecker/manual/main Pipeline failed
All services: admin-v2, studio-v2, website, ai-compliance-sdk, consent-service, klausur-service, voice-service, and infrastructure. Large PDFs and compiled binaries excluded via .gitignore.
219 lines
18 KiB
Python
219 lines
18 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Load Initial EduSearch Seeds into the Database.
|
|
|
|
This script uses the bulk import API to load all German education sources
|
|
that were provided by the user.
|
|
"""
|
|
|
|
import httpx
|
|
import asyncio
|
|
import os
|
|
|
|
API_BASE = os.environ.get("LLM_GATEWAY_URL", "http://localhost:8000")
|
|
|
|
# All German education seeds organized by category
|
|
INITIAL_SEEDS = [
|
|
# ===== BUNDESEBENE (Federal) =====
|
|
{"url": "https://www.kmk.org", "name": "Kultusministerkonferenz (KMK)", "description": "Lehrpläne, Bildungsstandards, Abiturregelungen", "category": "federal", "trust_boost": 0.95, "source_type": "GOV", "scope": "FEDERAL"},
|
|
{"url": "https://www.bildungsserver.de", "name": "Deutscher Bildungsserver (DIPF)", "description": "Zentrale Meta-Plattform für alle Länder", "category": "federal", "trust_boost": 0.95, "source_type": "GOV", "scope": "FEDERAL"},
|
|
{"url": "https://www.bpb.de", "name": "Bundeszentrale für politische Bildung", "description": "Unterrichtsmaterialien, Dossiers, Arbeitsblätter", "category": "federal", "trust_boost": 0.90, "source_type": "GOV", "scope": "FEDERAL"},
|
|
{"url": "https://www.bmbf.de", "name": "Bundesministerium für Bildung und Forschung", "description": "Förderprogramme, Bildungsberichte, Initiativen", "category": "federal", "trust_boost": 0.95, "source_type": "GOV", "scope": "FEDERAL"},
|
|
{"url": "https://www.iqb.hu-berlin.de", "name": "Institut zur Qualitätsentwicklung (IQB)", "description": "Bildungsstandards, Vergleichsarbeiten, Abiturpools", "category": "federal", "trust_boost": 0.95, "source_type": "GOV", "scope": "FEDERAL"},
|
|
|
|
# ===== BADEN-WÜRTTEMBERG (BW) =====
|
|
{"url": "https://km-bw.de", "name": "BW Kultusministerium", "description": "Baden-Württemberg Kultusministerium", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BW"},
|
|
{"url": "https://www.bildungsplaene-bw.de", "name": "BW Bildungspläne", "description": "Bildungspläne Baden-Württemberg", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BW"},
|
|
{"url": "https://zsl.kultus-bw.de", "name": "BW Zentrum für Schulqualität", "description": "ZSL Baden-Württemberg", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BW"},
|
|
{"url": "https://lehrerfortbildung-bw.de", "name": "BW Lehrerfortbildung", "description": "Lehrerfortbildung Baden-Württemberg", "category": "states", "trust_boost": 0.85, "source_type": "GOV", "scope": "STATE", "state": "BW"},
|
|
{"url": "https://rp.baden-wuerttemberg.de", "name": "BW Regierungspräsidien", "description": "Bildungsaufsicht Baden-Württemberg", "category": "authorities", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BW"},
|
|
|
|
# ===== BAYERN (BY) =====
|
|
{"url": "https://www.km.bayern.de", "name": "Bayern Kultusministerium", "description": "Bayerisches Staatsministerium für Unterricht und Kultus", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BY"},
|
|
{"url": "https://www.isb.bayern.de", "name": "Bayern ISB", "description": "Staatsinstitut für Schulqualität und Bildungsforschung", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BY"},
|
|
{"url": "https://www.mebis.bayern.de", "name": "Bayern mebis", "description": "Medien-Bildung-Service Bayern", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BY"},
|
|
{"url": "https://www.bycs.de", "name": "Bayern Cloud Schule", "description": "Bayerische Schulcloud", "category": "states", "trust_boost": 0.85, "source_type": "GOV", "scope": "STATE", "state": "BY"},
|
|
{"url": "https://www.schulberatung.bayern.de", "name": "Bayern Schulberatung", "description": "Staatliche Schulberatung Bayern", "category": "states", "trust_boost": 0.85, "source_type": "GOV", "scope": "STATE", "state": "BY"},
|
|
|
|
# ===== BERLIN (BE) =====
|
|
{"url": "https://www.berlin.de/sen/bjf", "name": "Berlin Senatsverwaltung", "description": "Senatsverwaltung für Bildung, Jugend und Familie", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BE"},
|
|
{"url": "https://bildungsserver.berlin-brandenburg.de", "name": "Berlin-Brandenburg Bildungsserver", "description": "Gemeinsamer Bildungsserver Berlin-Brandenburg", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BE"},
|
|
{"url": "https://www.berlin.de/schule", "name": "Berlin Schulportal", "description": "Berliner Schulportal", "category": "states", "trust_boost": 0.85, "source_type": "GOV", "scope": "STATE", "state": "BE"},
|
|
{"url": "https://www.berlin.de/landesinstitut-schule-medien", "name": "Berlin LISUM", "description": "Landesinstitut für Schule und Medien", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BE"},
|
|
|
|
# ===== BRANDENBURG (BB) =====
|
|
{"url": "https://mbjs.brandenburg.de", "name": "Brandenburg MBJS", "description": "Ministerium für Bildung, Jugend und Sport", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BB"},
|
|
{"url": "https://lisum.berlin-brandenburg.de", "name": "Brandenburg LISUM", "description": "Landesinstitut für Schule und Medien", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BB"},
|
|
{"url": "https://www.schulportal.brandenburg.de", "name": "Brandenburg Schulportal", "description": "Schulportal Brandenburg", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BB"},
|
|
{"url": "https://lehrplan.brandenburg.de", "name": "Brandenburg Lehrpläne", "description": "Rahmenlehrpläne Brandenburg", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BB"},
|
|
|
|
# ===== BREMEN (HB) =====
|
|
{"url": "https://www.bildung.bremen.de", "name": "Bremen Bildung", "description": "Senatorin für Kinder und Bildung Bremen", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HB"},
|
|
{"url": "https://www.lis.bremen.de", "name": "Bremen LIS", "description": "Landesinstitut für Schule Bremen", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "HB"},
|
|
{"url": "https://www.bildungsplaene.bremen.de", "name": "Bremen Bildungspläne", "description": "Bildungspläne Bremen", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HB"},
|
|
|
|
# ===== HAMBURG (HH) =====
|
|
{"url": "https://www.hamburg.de/bsb", "name": "Hamburg BSB", "description": "Behörde für Schule und Berufsbildung", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HH"},
|
|
{"url": "https://li.hamburg.de", "name": "Hamburg Landesinstitut", "description": "Landesinstitut für Lehrerbildung und Schulentwicklung", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HH"},
|
|
{"url": "https://www.bildungsplaene.hamburg.de", "name": "Hamburg Bildungspläne", "description": "Hamburger Bildungspläne", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HH"},
|
|
|
|
# ===== HESSEN (HE) =====
|
|
{"url": "https://kultusministerium.hessen.de", "name": "Hessen Kultusministerium", "description": "Hessisches Kultusministerium", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HE"},
|
|
{"url": "https://lehrplaene.hessen.de", "name": "Hessen Lehrpläne", "description": "Kerncurricula Hessen", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HE"},
|
|
{"url": "https://www.schulportal.hessen.de", "name": "Hessen Schulportal", "description": "Hessisches Schulportal", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "HE"},
|
|
{"url": "https://la.hessen.de", "name": "Hessen Lehrkräfteakademie", "description": "Hessische Lehrkräfteakademie", "category": "authorities", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "HE"},
|
|
|
|
# ===== MECKLENBURG-VORPOMMERN (MV) =====
|
|
{"url": "https://www.regierung-mv.de/Landesregierung/bm", "name": "MV Bildungsministerium", "description": "Bildungsministerium Mecklenburg-Vorpommern", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "MV"},
|
|
{"url": "https://www.bildung-mv.de", "name": "MV Bildungsportal", "description": "Bildungsserver Mecklenburg-Vorpommern", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "MV"},
|
|
|
|
# ===== NIEDERSACHSEN (NI) =====
|
|
{"url": "https://www.mk.niedersachsen.de", "name": "Niedersachsen MK", "description": "Niedersächsisches Kultusministerium", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "NI"},
|
|
{"url": "https://www.nibis.de", "name": "Niedersachsen NiBiS", "description": "Niedersächsischer Bildungsserver", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "NI"},
|
|
|
|
# ===== NORDRHEIN-WESTFALEN (NW) =====
|
|
{"url": "https://www.msb.nrw", "name": "NRW Schulministerium", "description": "Ministerium für Schule und Bildung NRW", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "NW"},
|
|
{"url": "https://www.schulentwicklung.nrw.de", "name": "NRW Schulentwicklung", "description": "Schulentwicklung NRW", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "NW"},
|
|
{"url": "https://www.qua-lis.nrw.de", "name": "NRW QUA-LiS", "description": "Qualitäts- und UnterstützungsAgentur", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "NW"},
|
|
{"url": "https://www.standardsicherung.schulministerium.nrw.de", "name": "NRW Standardsicherung", "description": "Standardsicherung und Prüfungen NRW", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "NW"},
|
|
|
|
# ===== RHEINLAND-PFALZ (RP) =====
|
|
{"url": "https://bm.rlp.de", "name": "RLP Bildungsministerium", "description": "Ministerium für Bildung Rheinland-Pfalz", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "RP"},
|
|
{"url": "https://bildung.rlp.de", "name": "RLP Bildungsserver", "description": "Bildungsserver Rheinland-Pfalz", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "RP"},
|
|
{"url": "https://lehrplaene.rlp.de", "name": "RLP Lehrpläne", "description": "Lehrpläne Rheinland-Pfalz", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "RP"},
|
|
|
|
# ===== SAARLAND (SL) =====
|
|
{"url": "https://www.saarland.de/mbk", "name": "Saarland MBK", "description": "Ministerium für Bildung und Kultur Saarland", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "SL"},
|
|
{"url": "https://www.bildungsserver.saarland.de", "name": "Saarland Bildungsserver", "description": "Bildungsserver Saarland", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "SL"},
|
|
|
|
# ===== SACHSEN (SN) =====
|
|
{"url": "https://www.smk.sachsen.de", "name": "Sachsen SMK", "description": "Sächsisches Staatsministerium für Kultus", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "SN"},
|
|
{"url": "https://www.schule.sachsen.de", "name": "Sachsen Schulportal", "description": "Sächsisches Schulportal", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "SN"},
|
|
|
|
# ===== SACHSEN-ANHALT (ST) =====
|
|
{"url": "https://mb.sachsen-anhalt.de", "name": "Sachsen-Anhalt MB", "description": "Ministerium für Bildung Sachsen-Anhalt", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "ST"},
|
|
{"url": "https://lisa.sachsen-anhalt.de", "name": "Sachsen-Anhalt LISA", "description": "Landesinstitut für Schulqualität", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "ST"},
|
|
{"url": "https://www.bildung-lsa.de", "name": "Sachsen-Anhalt Bildungsserver", "description": "Bildungsserver Sachsen-Anhalt", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "ST"},
|
|
|
|
# ===== SCHLESWIG-HOLSTEIN (SH) =====
|
|
{"url": "https://www.schleswig-holstein.de/BILDUNG", "name": "SH Bildungsministerium", "description": "Ministerium für Allgemeine und Berufliche Bildung", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "SH"},
|
|
{"url": "https://fachanforderungen.schleswig-holstein.de", "name": "SH Fachanforderungen", "description": "Fachanforderungen Schleswig-Holstein", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "SH"},
|
|
|
|
# ===== THÜRINGEN (TH) =====
|
|
{"url": "https://bildung.thueringen.de", "name": "Thüringen Bildungsministerium", "description": "Thüringer Ministerium für Bildung", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "TH"},
|
|
{"url": "https://www.schulportal-thueringen.de", "name": "Thüringen Schulportal", "description": "Thüringer Schulportal", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "TH"},
|
|
|
|
# ===== WISSENSCHAFT & STUDIEN =====
|
|
{"url": "https://www.bertelsmann-stiftung.de/bildung", "name": "Bertelsmann Stiftung", "description": "Bildungsstudien und Ländermonitor", "category": "science", "trust_boost": 0.85, "source_type": "NGO", "scope": "FEDERAL"},
|
|
{"url": "https://www.oecd.org/pisa", "name": "OECD PISA", "description": "Internationale Schulleistungsstudie PISA", "category": "science", "trust_boost": 0.90, "source_type": "INT", "scope": "INTERNATIONAL"},
|
|
|
|
# ===== BILDUNGSPORTALE =====
|
|
{"url": "https://www.lehrer-online.de", "name": "Lehrer-Online", "description": "Unterrichtsmaterialien und Fachinformationen", "category": "portals", "trust_boost": 0.80, "source_type": "PORTAL", "scope": "FEDERAL"},
|
|
{"url": "https://www.4teachers.de", "name": "4teachers", "description": "Unterrichtsmaterialien von Lehrern für Lehrer", "category": "portals", "trust_boost": 0.75, "source_type": "PORTAL", "scope": "FEDERAL"},
|
|
{"url": "https://www.zum.de", "name": "ZUM", "description": "Zentrale für Unterrichtsmedien im Internet", "category": "portals", "trust_boost": 0.80, "source_type": "NGO", "scope": "FEDERAL"},
|
|
]
|
|
|
|
|
|
async def load_seeds():
|
|
"""
|
|
Load initial seeds via bulk import API.
|
|
|
|
Returns:
|
|
bool: True if successful, False otherwise
|
|
|
|
Raises:
|
|
httpx.ConnectError: If API is not reachable
|
|
httpx.TimeoutException: If request times out
|
|
"""
|
|
print(f"Loading {len(INITIAL_SEEDS)} seeds into {API_BASE}...")
|
|
print(f"Seeds breakdown:")
|
|
categories = {}
|
|
for seed in INITIAL_SEEDS:
|
|
cat = seed.get("category", "unknown")
|
|
categories[cat] = categories.get(cat, 0) + 1
|
|
for cat, count in sorted(categories.items()):
|
|
print(f" - {cat}: {count}")
|
|
|
|
try:
|
|
async with httpx.AsyncClient(timeout=60.0) as client:
|
|
# Check API health first
|
|
try:
|
|
health_response = await client.get(f"{API_BASE}/health")
|
|
if health_response.status_code != 200:
|
|
print(f"WARNING: Health check returned {health_response.status_code}")
|
|
except httpx.ConnectError:
|
|
print(f"ERROR: Cannot connect to API at {API_BASE}")
|
|
print("Make sure the backend service is running:")
|
|
print(" docker compose up -d backend")
|
|
return False
|
|
|
|
# Import seeds
|
|
print("\nImporting seeds...")
|
|
response = await client.post(
|
|
f"{API_BASE}/v1/edu-search/seeds/bulk-import",
|
|
json={"seeds": INITIAL_SEEDS}
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
result = response.json()
|
|
imported = result.get('imported', 0)
|
|
skipped = result.get('skipped', 0)
|
|
errors = result.get('errors', [])
|
|
|
|
print(f"\nResult:")
|
|
print(f" Imported: {imported}")
|
|
print(f" Skipped (duplicates): {skipped}")
|
|
|
|
if errors:
|
|
print(f" Errors: {len(errors)}")
|
|
for err in errors[:5]:
|
|
print(f" - {err}")
|
|
if len(errors) > 5:
|
|
print(f" ... and {len(errors) - 5} more")
|
|
else:
|
|
print(f"\nERROR: Import failed with status {response.status_code}")
|
|
try:
|
|
error_detail = response.json()
|
|
print(f" Detail: {error_detail.get('detail', response.text)}")
|
|
except Exception:
|
|
print(f" Response: {response.text[:500]}")
|
|
return False
|
|
|
|
# Get stats
|
|
print("\nFetching statistics...")
|
|
stats_response = await client.get(f"{API_BASE}/v1/edu-search/stats")
|
|
if stats_response.status_code == 200:
|
|
stats = stats_response.json()
|
|
print(f"\nDatabase Statistics:")
|
|
print(f" Total seeds: {stats.get('total_seeds', 0)}")
|
|
print(f" Enabled seeds: {stats.get('enabled_seeds', 0)}")
|
|
print(f" Disabled seeds: {stats.get('disabled_seeds', 0)}")
|
|
print(f" Avg trust boost: {stats.get('avg_trust_boost', 0):.2f}")
|
|
|
|
per_category = stats.get('seeds_per_category', {})
|
|
if per_category:
|
|
print(f"\n Seeds per category:")
|
|
for cat, count in sorted(per_category.items()):
|
|
print(f" - {cat}: {count}")
|
|
|
|
print("\nDone!")
|
|
return True
|
|
|
|
except httpx.ConnectError as e:
|
|
print(f"\nERROR: Connection failed - {e}")
|
|
print(f"Make sure the API is running at {API_BASE}")
|
|
return False
|
|
except httpx.TimeoutException:
|
|
print(f"\nERROR: Request timed out")
|
|
print("The server may be overloaded. Try again later.")
|
|
return False
|
|
except Exception as e:
|
|
print(f"\nERROR: Unexpected error - {e}")
|
|
return False
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
success = asyncio.run(load_seeds())
|
|
sys.exit(0 if success else 1)
|