#!/usr/bin/env python3 """ Load Initial EduSearch Seeds into the Database. This script uses the bulk import API to load all German education sources that were provided by the user. """ import httpx import asyncio import os API_BASE = os.environ.get("LLM_GATEWAY_URL", "http://localhost:8000") # All German education seeds organized by category INITIAL_SEEDS = [ # ===== BUNDESEBENE (Federal) ===== {"url": "https://www.kmk.org", "name": "Kultusministerkonferenz (KMK)", "description": "Lehrpläne, Bildungsstandards, Abiturregelungen", "category": "federal", "trust_boost": 0.95, "source_type": "GOV", "scope": "FEDERAL"}, {"url": "https://www.bildungsserver.de", "name": "Deutscher Bildungsserver (DIPF)", "description": "Zentrale Meta-Plattform für alle Länder", "category": "federal", "trust_boost": 0.95, "source_type": "GOV", "scope": "FEDERAL"}, {"url": "https://www.bpb.de", "name": "Bundeszentrale für politische Bildung", "description": "Unterrichtsmaterialien, Dossiers, Arbeitsblätter", "category": "federal", "trust_boost": 0.90, "source_type": "GOV", "scope": "FEDERAL"}, {"url": "https://www.bmbf.de", "name": "Bundesministerium für Bildung und Forschung", "description": "Förderprogramme, Bildungsberichte, Initiativen", "category": "federal", "trust_boost": 0.95, "source_type": "GOV", "scope": "FEDERAL"}, {"url": "https://www.iqb.hu-berlin.de", "name": "Institut zur Qualitätsentwicklung (IQB)", "description": "Bildungsstandards, Vergleichsarbeiten, Abiturpools", "category": "federal", "trust_boost": 0.95, "source_type": "GOV", "scope": "FEDERAL"}, # ===== BADEN-WÜRTTEMBERG (BW) ===== {"url": "https://km-bw.de", "name": "BW Kultusministerium", "description": "Baden-Württemberg Kultusministerium", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BW"}, {"url": "https://www.bildungsplaene-bw.de", "name": "BW Bildungspläne", "description": "Bildungspläne Baden-Württemberg", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BW"}, {"url": "https://zsl.kultus-bw.de", "name": "BW Zentrum für Schulqualität", "description": "ZSL Baden-Württemberg", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BW"}, {"url": "https://lehrerfortbildung-bw.de", "name": "BW Lehrerfortbildung", "description": "Lehrerfortbildung Baden-Württemberg", "category": "states", "trust_boost": 0.85, "source_type": "GOV", "scope": "STATE", "state": "BW"}, {"url": "https://rp.baden-wuerttemberg.de", "name": "BW Regierungspräsidien", "description": "Bildungsaufsicht Baden-Württemberg", "category": "authorities", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BW"}, # ===== BAYERN (BY) ===== {"url": "https://www.km.bayern.de", "name": "Bayern Kultusministerium", "description": "Bayerisches Staatsministerium für Unterricht und Kultus", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BY"}, {"url": "https://www.isb.bayern.de", "name": "Bayern ISB", "description": "Staatsinstitut für Schulqualität und Bildungsforschung", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BY"}, {"url": "https://www.mebis.bayern.de", "name": "Bayern mebis", "description": "Medien-Bildung-Service Bayern", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BY"}, {"url": "https://www.bycs.de", "name": "Bayern Cloud Schule", "description": "Bayerische Schulcloud", "category": "states", "trust_boost": 0.85, "source_type": "GOV", "scope": "STATE", "state": "BY"}, {"url": "https://www.schulberatung.bayern.de", "name": "Bayern Schulberatung", "description": "Staatliche Schulberatung Bayern", "category": "states", "trust_boost": 0.85, "source_type": "GOV", "scope": "STATE", "state": "BY"}, # ===== BERLIN (BE) ===== {"url": "https://www.berlin.de/sen/bjf", "name": "Berlin Senatsverwaltung", "description": "Senatsverwaltung für Bildung, Jugend und Familie", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BE"}, {"url": "https://bildungsserver.berlin-brandenburg.de", "name": "Berlin-Brandenburg Bildungsserver", "description": "Gemeinsamer Bildungsserver Berlin-Brandenburg", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BE"}, {"url": "https://www.berlin.de/schule", "name": "Berlin Schulportal", "description": "Berliner Schulportal", "category": "states", "trust_boost": 0.85, "source_type": "GOV", "scope": "STATE", "state": "BE"}, {"url": "https://www.berlin.de/landesinstitut-schule-medien", "name": "Berlin LISUM", "description": "Landesinstitut für Schule und Medien", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BE"}, # ===== BRANDENBURG (BB) ===== {"url": "https://mbjs.brandenburg.de", "name": "Brandenburg MBJS", "description": "Ministerium für Bildung, Jugend und Sport", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BB"}, {"url": "https://lisum.berlin-brandenburg.de", "name": "Brandenburg LISUM", "description": "Landesinstitut für Schule und Medien", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BB"}, {"url": "https://www.schulportal.brandenburg.de", "name": "Brandenburg Schulportal", "description": "Schulportal Brandenburg", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BB"}, {"url": "https://lehrplan.brandenburg.de", "name": "Brandenburg Lehrpläne", "description": "Rahmenlehrpläne Brandenburg", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BB"}, # ===== BREMEN (HB) ===== {"url": "https://www.bildung.bremen.de", "name": "Bremen Bildung", "description": "Senatorin für Kinder und Bildung Bremen", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HB"}, {"url": "https://www.lis.bremen.de", "name": "Bremen LIS", "description": "Landesinstitut für Schule Bremen", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "HB"}, {"url": "https://www.bildungsplaene.bremen.de", "name": "Bremen Bildungspläne", "description": "Bildungspläne Bremen", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HB"}, # ===== HAMBURG (HH) ===== {"url": "https://www.hamburg.de/bsb", "name": "Hamburg BSB", "description": "Behörde für Schule und Berufsbildung", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HH"}, {"url": "https://li.hamburg.de", "name": "Hamburg Landesinstitut", "description": "Landesinstitut für Lehrerbildung und Schulentwicklung", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HH"}, {"url": "https://www.bildungsplaene.hamburg.de", "name": "Hamburg Bildungspläne", "description": "Hamburger Bildungspläne", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HH"}, # ===== HESSEN (HE) ===== {"url": "https://kultusministerium.hessen.de", "name": "Hessen Kultusministerium", "description": "Hessisches Kultusministerium", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HE"}, {"url": "https://lehrplaene.hessen.de", "name": "Hessen Lehrpläne", "description": "Kerncurricula Hessen", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HE"}, {"url": "https://www.schulportal.hessen.de", "name": "Hessen Schulportal", "description": "Hessisches Schulportal", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "HE"}, {"url": "https://la.hessen.de", "name": "Hessen Lehrkräfteakademie", "description": "Hessische Lehrkräfteakademie", "category": "authorities", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "HE"}, # ===== MECKLENBURG-VORPOMMERN (MV) ===== {"url": "https://www.regierung-mv.de/Landesregierung/bm", "name": "MV Bildungsministerium", "description": "Bildungsministerium Mecklenburg-Vorpommern", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "MV"}, {"url": "https://www.bildung-mv.de", "name": "MV Bildungsportal", "description": "Bildungsserver Mecklenburg-Vorpommern", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "MV"}, # ===== NIEDERSACHSEN (NI) ===== {"url": "https://www.mk.niedersachsen.de", "name": "Niedersachsen MK", "description": "Niedersächsisches Kultusministerium", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "NI"}, {"url": "https://www.nibis.de", "name": "Niedersachsen NiBiS", "description": "Niedersächsischer Bildungsserver", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "NI"}, # ===== NORDRHEIN-WESTFALEN (NW) ===== {"url": "https://www.msb.nrw", "name": "NRW Schulministerium", "description": "Ministerium für Schule und Bildung NRW", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "NW"}, {"url": "https://www.schulentwicklung.nrw.de", "name": "NRW Schulentwicklung", "description": "Schulentwicklung NRW", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "NW"}, {"url": "https://www.qua-lis.nrw.de", "name": "NRW QUA-LiS", "description": "Qualitäts- und UnterstützungsAgentur", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "NW"}, {"url": "https://www.standardsicherung.schulministerium.nrw.de", "name": "NRW Standardsicherung", "description": "Standardsicherung und Prüfungen NRW", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "NW"}, # ===== RHEINLAND-PFALZ (RP) ===== {"url": "https://bm.rlp.de", "name": "RLP Bildungsministerium", "description": "Ministerium für Bildung Rheinland-Pfalz", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "RP"}, {"url": "https://bildung.rlp.de", "name": "RLP Bildungsserver", "description": "Bildungsserver Rheinland-Pfalz", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "RP"}, {"url": "https://lehrplaene.rlp.de", "name": "RLP Lehrpläne", "description": "Lehrpläne Rheinland-Pfalz", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "RP"}, # ===== SAARLAND (SL) ===== {"url": "https://www.saarland.de/mbk", "name": "Saarland MBK", "description": "Ministerium für Bildung und Kultur Saarland", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "SL"}, {"url": "https://www.bildungsserver.saarland.de", "name": "Saarland Bildungsserver", "description": "Bildungsserver Saarland", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "SL"}, # ===== SACHSEN (SN) ===== {"url": "https://www.smk.sachsen.de", "name": "Sachsen SMK", "description": "Sächsisches Staatsministerium für Kultus", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "SN"}, {"url": "https://www.schule.sachsen.de", "name": "Sachsen Schulportal", "description": "Sächsisches Schulportal", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "SN"}, # ===== SACHSEN-ANHALT (ST) ===== {"url": "https://mb.sachsen-anhalt.de", "name": "Sachsen-Anhalt MB", "description": "Ministerium für Bildung Sachsen-Anhalt", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "ST"}, {"url": "https://lisa.sachsen-anhalt.de", "name": "Sachsen-Anhalt LISA", "description": "Landesinstitut für Schulqualität", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "ST"}, {"url": "https://www.bildung-lsa.de", "name": "Sachsen-Anhalt Bildungsserver", "description": "Bildungsserver Sachsen-Anhalt", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "ST"}, # ===== SCHLESWIG-HOLSTEIN (SH) ===== {"url": "https://www.schleswig-holstein.de/BILDUNG", "name": "SH Bildungsministerium", "description": "Ministerium für Allgemeine und Berufliche Bildung", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "SH"}, {"url": "https://fachanforderungen.schleswig-holstein.de", "name": "SH Fachanforderungen", "description": "Fachanforderungen Schleswig-Holstein", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "SH"}, # ===== THÜRINGEN (TH) ===== {"url": "https://bildung.thueringen.de", "name": "Thüringen Bildungsministerium", "description": "Thüringer Ministerium für Bildung", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "TH"}, {"url": "https://www.schulportal-thueringen.de", "name": "Thüringen Schulportal", "description": "Thüringer Schulportal", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "TH"}, # ===== WISSENSCHAFT & STUDIEN ===== {"url": "https://www.bertelsmann-stiftung.de/bildung", "name": "Bertelsmann Stiftung", "description": "Bildungsstudien und Ländermonitor", "category": "science", "trust_boost": 0.85, "source_type": "NGO", "scope": "FEDERAL"}, {"url": "https://www.oecd.org/pisa", "name": "OECD PISA", "description": "Internationale Schulleistungsstudie PISA", "category": "science", "trust_boost": 0.90, "source_type": "INT", "scope": "INTERNATIONAL"}, # ===== BILDUNGSPORTALE ===== {"url": "https://www.lehrer-online.de", "name": "Lehrer-Online", "description": "Unterrichtsmaterialien und Fachinformationen", "category": "portals", "trust_boost": 0.80, "source_type": "PORTAL", "scope": "FEDERAL"}, {"url": "https://www.4teachers.de", "name": "4teachers", "description": "Unterrichtsmaterialien von Lehrern für Lehrer", "category": "portals", "trust_boost": 0.75, "source_type": "PORTAL", "scope": "FEDERAL"}, {"url": "https://www.zum.de", "name": "ZUM", "description": "Zentrale für Unterrichtsmedien im Internet", "category": "portals", "trust_boost": 0.80, "source_type": "NGO", "scope": "FEDERAL"}, ] async def load_seeds(): """ Load initial seeds via bulk import API. Returns: bool: True if successful, False otherwise Raises: httpx.ConnectError: If API is not reachable httpx.TimeoutException: If request times out """ print(f"Loading {len(INITIAL_SEEDS)} seeds into {API_BASE}...") print(f"Seeds breakdown:") categories = {} for seed in INITIAL_SEEDS: cat = seed.get("category", "unknown") categories[cat] = categories.get(cat, 0) + 1 for cat, count in sorted(categories.items()): print(f" - {cat}: {count}") try: async with httpx.AsyncClient(timeout=60.0) as client: # Check API health first try: health_response = await client.get(f"{API_BASE}/health") if health_response.status_code != 200: print(f"WARNING: Health check returned {health_response.status_code}") except httpx.ConnectError: print(f"ERROR: Cannot connect to API at {API_BASE}") print("Make sure the backend service is running:") print(" docker compose up -d backend") return False # Import seeds print("\nImporting seeds...") response = await client.post( f"{API_BASE}/v1/edu-search/seeds/bulk-import", json={"seeds": INITIAL_SEEDS} ) if response.status_code == 200: result = response.json() imported = result.get('imported', 0) skipped = result.get('skipped', 0) errors = result.get('errors', []) print(f"\nResult:") print(f" Imported: {imported}") print(f" Skipped (duplicates): {skipped}") if errors: print(f" Errors: {len(errors)}") for err in errors[:5]: print(f" - {err}") if len(errors) > 5: print(f" ... and {len(errors) - 5} more") else: print(f"\nERROR: Import failed with status {response.status_code}") try: error_detail = response.json() print(f" Detail: {error_detail.get('detail', response.text)}") except Exception: print(f" Response: {response.text[:500]}") return False # Get stats print("\nFetching statistics...") stats_response = await client.get(f"{API_BASE}/v1/edu-search/stats") if stats_response.status_code == 200: stats = stats_response.json() print(f"\nDatabase Statistics:") print(f" Total seeds: {stats.get('total_seeds', 0)}") print(f" Enabled seeds: {stats.get('enabled_seeds', 0)}") print(f" Disabled seeds: {stats.get('disabled_seeds', 0)}") print(f" Avg trust boost: {stats.get('avg_trust_boost', 0):.2f}") per_category = stats.get('seeds_per_category', {}) if per_category: print(f"\n Seeds per category:") for cat, count in sorted(per_category.items()): print(f" - {cat}: {count}") print("\nDone!") return True except httpx.ConnectError as e: print(f"\nERROR: Connection failed - {e}") print(f"Make sure the API is running at {API_BASE}") return False except httpx.TimeoutException: print(f"\nERROR: Request timed out") print("The server may be overloaded. Try again later.") return False except Exception as e: print(f"\nERROR: Unexpected error - {e}") return False if __name__ == "__main__": import sys success = asyncio.run(load_seeds()) sys.exit(0 if success else 1)