This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/backend/scripts/load_initial_seeds.py
Benjamin Admin 21a844cb8a fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.

This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).

Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00

219 lines
18 KiB
Python

#!/usr/bin/env python3
"""
Load Initial EduSearch Seeds into the Database.
This script uses the bulk import API to load all German education sources
that were provided by the user.
"""
import httpx
import asyncio
import os
API_BASE = os.environ.get("LLM_GATEWAY_URL", "http://localhost:8000")
# All German education seeds organized by category
INITIAL_SEEDS = [
# ===== BUNDESEBENE (Federal) =====
{"url": "https://www.kmk.org", "name": "Kultusministerkonferenz (KMK)", "description": "Lehrpläne, Bildungsstandards, Abiturregelungen", "category": "federal", "trust_boost": 0.95, "source_type": "GOV", "scope": "FEDERAL"},
{"url": "https://www.bildungsserver.de", "name": "Deutscher Bildungsserver (DIPF)", "description": "Zentrale Meta-Plattform für alle Länder", "category": "federal", "trust_boost": 0.95, "source_type": "GOV", "scope": "FEDERAL"},
{"url": "https://www.bpb.de", "name": "Bundeszentrale für politische Bildung", "description": "Unterrichtsmaterialien, Dossiers, Arbeitsblätter", "category": "federal", "trust_boost": 0.90, "source_type": "GOV", "scope": "FEDERAL"},
{"url": "https://www.bmbf.de", "name": "Bundesministerium für Bildung und Forschung", "description": "Förderprogramme, Bildungsberichte, Initiativen", "category": "federal", "trust_boost": 0.95, "source_type": "GOV", "scope": "FEDERAL"},
{"url": "https://www.iqb.hu-berlin.de", "name": "Institut zur Qualitätsentwicklung (IQB)", "description": "Bildungsstandards, Vergleichsarbeiten, Abiturpools", "category": "federal", "trust_boost": 0.95, "source_type": "GOV", "scope": "FEDERAL"},
# ===== BADEN-WÜRTTEMBERG (BW) =====
{"url": "https://km-bw.de", "name": "BW Kultusministerium", "description": "Baden-Württemberg Kultusministerium", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BW"},
{"url": "https://www.bildungsplaene-bw.de", "name": "BW Bildungspläne", "description": "Bildungspläne Baden-Württemberg", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BW"},
{"url": "https://zsl.kultus-bw.de", "name": "BW Zentrum für Schulqualität", "description": "ZSL Baden-Württemberg", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BW"},
{"url": "https://lehrerfortbildung-bw.de", "name": "BW Lehrerfortbildung", "description": "Lehrerfortbildung Baden-Württemberg", "category": "states", "trust_boost": 0.85, "source_type": "GOV", "scope": "STATE", "state": "BW"},
{"url": "https://rp.baden-wuerttemberg.de", "name": "BW Regierungspräsidien", "description": "Bildungsaufsicht Baden-Württemberg", "category": "authorities", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BW"},
# ===== BAYERN (BY) =====
{"url": "https://www.km.bayern.de", "name": "Bayern Kultusministerium", "description": "Bayerisches Staatsministerium für Unterricht und Kultus", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BY"},
{"url": "https://www.isb.bayern.de", "name": "Bayern ISB", "description": "Staatsinstitut für Schulqualität und Bildungsforschung", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BY"},
{"url": "https://www.mebis.bayern.de", "name": "Bayern mebis", "description": "Medien-Bildung-Service Bayern", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BY"},
{"url": "https://www.bycs.de", "name": "Bayern Cloud Schule", "description": "Bayerische Schulcloud", "category": "states", "trust_boost": 0.85, "source_type": "GOV", "scope": "STATE", "state": "BY"},
{"url": "https://www.schulberatung.bayern.de", "name": "Bayern Schulberatung", "description": "Staatliche Schulberatung Bayern", "category": "states", "trust_boost": 0.85, "source_type": "GOV", "scope": "STATE", "state": "BY"},
# ===== BERLIN (BE) =====
{"url": "https://www.berlin.de/sen/bjf", "name": "Berlin Senatsverwaltung", "description": "Senatsverwaltung für Bildung, Jugend und Familie", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BE"},
{"url": "https://bildungsserver.berlin-brandenburg.de", "name": "Berlin-Brandenburg Bildungsserver", "description": "Gemeinsamer Bildungsserver Berlin-Brandenburg", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BE"},
{"url": "https://www.berlin.de/schule", "name": "Berlin Schulportal", "description": "Berliner Schulportal", "category": "states", "trust_boost": 0.85, "source_type": "GOV", "scope": "STATE", "state": "BE"},
{"url": "https://www.berlin.de/landesinstitut-schule-medien", "name": "Berlin LISUM", "description": "Landesinstitut für Schule und Medien", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BE"},
# ===== BRANDENBURG (BB) =====
{"url": "https://mbjs.brandenburg.de", "name": "Brandenburg MBJS", "description": "Ministerium für Bildung, Jugend und Sport", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BB"},
{"url": "https://lisum.berlin-brandenburg.de", "name": "Brandenburg LISUM", "description": "Landesinstitut für Schule und Medien", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BB"},
{"url": "https://www.schulportal.brandenburg.de", "name": "Brandenburg Schulportal", "description": "Schulportal Brandenburg", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BB"},
{"url": "https://lehrplan.brandenburg.de", "name": "Brandenburg Lehrpläne", "description": "Rahmenlehrpläne Brandenburg", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BB"},
# ===== BREMEN (HB) =====
{"url": "https://www.bildung.bremen.de", "name": "Bremen Bildung", "description": "Senatorin für Kinder und Bildung Bremen", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HB"},
{"url": "https://www.lis.bremen.de", "name": "Bremen LIS", "description": "Landesinstitut für Schule Bremen", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "HB"},
{"url": "https://www.bildungsplaene.bremen.de", "name": "Bremen Bildungspläne", "description": "Bildungspläne Bremen", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HB"},
# ===== HAMBURG (HH) =====
{"url": "https://www.hamburg.de/bsb", "name": "Hamburg BSB", "description": "Behörde für Schule und Berufsbildung", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HH"},
{"url": "https://li.hamburg.de", "name": "Hamburg Landesinstitut", "description": "Landesinstitut für Lehrerbildung und Schulentwicklung", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HH"},
{"url": "https://www.bildungsplaene.hamburg.de", "name": "Hamburg Bildungspläne", "description": "Hamburger Bildungspläne", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HH"},
# ===== HESSEN (HE) =====
{"url": "https://kultusministerium.hessen.de", "name": "Hessen Kultusministerium", "description": "Hessisches Kultusministerium", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HE"},
{"url": "https://lehrplaene.hessen.de", "name": "Hessen Lehrpläne", "description": "Kerncurricula Hessen", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HE"},
{"url": "https://www.schulportal.hessen.de", "name": "Hessen Schulportal", "description": "Hessisches Schulportal", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "HE"},
{"url": "https://la.hessen.de", "name": "Hessen Lehrkräfteakademie", "description": "Hessische Lehrkräfteakademie", "category": "authorities", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "HE"},
# ===== MECKLENBURG-VORPOMMERN (MV) =====
{"url": "https://www.regierung-mv.de/Landesregierung/bm", "name": "MV Bildungsministerium", "description": "Bildungsministerium Mecklenburg-Vorpommern", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "MV"},
{"url": "https://www.bildung-mv.de", "name": "MV Bildungsportal", "description": "Bildungsserver Mecklenburg-Vorpommern", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "MV"},
# ===== NIEDERSACHSEN (NI) =====
{"url": "https://www.mk.niedersachsen.de", "name": "Niedersachsen MK", "description": "Niedersächsisches Kultusministerium", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "NI"},
{"url": "https://www.nibis.de", "name": "Niedersachsen NiBiS", "description": "Niedersächsischer Bildungsserver", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "NI"},
# ===== NORDRHEIN-WESTFALEN (NW) =====
{"url": "https://www.msb.nrw", "name": "NRW Schulministerium", "description": "Ministerium für Schule und Bildung NRW", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "NW"},
{"url": "https://www.schulentwicklung.nrw.de", "name": "NRW Schulentwicklung", "description": "Schulentwicklung NRW", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "NW"},
{"url": "https://www.qua-lis.nrw.de", "name": "NRW QUA-LiS", "description": "Qualitäts- und UnterstützungsAgentur", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "NW"},
{"url": "https://www.standardsicherung.schulministerium.nrw.de", "name": "NRW Standardsicherung", "description": "Standardsicherung und Prüfungen NRW", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "NW"},
# ===== RHEINLAND-PFALZ (RP) =====
{"url": "https://bm.rlp.de", "name": "RLP Bildungsministerium", "description": "Ministerium für Bildung Rheinland-Pfalz", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "RP"},
{"url": "https://bildung.rlp.de", "name": "RLP Bildungsserver", "description": "Bildungsserver Rheinland-Pfalz", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "RP"},
{"url": "https://lehrplaene.rlp.de", "name": "RLP Lehrpläne", "description": "Lehrpläne Rheinland-Pfalz", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "RP"},
# ===== SAARLAND (SL) =====
{"url": "https://www.saarland.de/mbk", "name": "Saarland MBK", "description": "Ministerium für Bildung und Kultur Saarland", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "SL"},
{"url": "https://www.bildungsserver.saarland.de", "name": "Saarland Bildungsserver", "description": "Bildungsserver Saarland", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "SL"},
# ===== SACHSEN (SN) =====
{"url": "https://www.smk.sachsen.de", "name": "Sachsen SMK", "description": "Sächsisches Staatsministerium für Kultus", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "SN"},
{"url": "https://www.schule.sachsen.de", "name": "Sachsen Schulportal", "description": "Sächsisches Schulportal", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "SN"},
# ===== SACHSEN-ANHALT (ST) =====
{"url": "https://mb.sachsen-anhalt.de", "name": "Sachsen-Anhalt MB", "description": "Ministerium für Bildung Sachsen-Anhalt", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "ST"},
{"url": "https://lisa.sachsen-anhalt.de", "name": "Sachsen-Anhalt LISA", "description": "Landesinstitut für Schulqualität", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "ST"},
{"url": "https://www.bildung-lsa.de", "name": "Sachsen-Anhalt Bildungsserver", "description": "Bildungsserver Sachsen-Anhalt", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "ST"},
# ===== SCHLESWIG-HOLSTEIN (SH) =====
{"url": "https://www.schleswig-holstein.de/BILDUNG", "name": "SH Bildungsministerium", "description": "Ministerium für Allgemeine und Berufliche Bildung", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "SH"},
{"url": "https://fachanforderungen.schleswig-holstein.de", "name": "SH Fachanforderungen", "description": "Fachanforderungen Schleswig-Holstein", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "SH"},
# ===== THÜRINGEN (TH) =====
{"url": "https://bildung.thueringen.de", "name": "Thüringen Bildungsministerium", "description": "Thüringer Ministerium für Bildung", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "TH"},
{"url": "https://www.schulportal-thueringen.de", "name": "Thüringen Schulportal", "description": "Thüringer Schulportal", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "TH"},
# ===== WISSENSCHAFT & STUDIEN =====
{"url": "https://www.bertelsmann-stiftung.de/bildung", "name": "Bertelsmann Stiftung", "description": "Bildungsstudien und Ländermonitor", "category": "science", "trust_boost": 0.85, "source_type": "NGO", "scope": "FEDERAL"},
{"url": "https://www.oecd.org/pisa", "name": "OECD PISA", "description": "Internationale Schulleistungsstudie PISA", "category": "science", "trust_boost": 0.90, "source_type": "INT", "scope": "INTERNATIONAL"},
# ===== BILDUNGSPORTALE =====
{"url": "https://www.lehrer-online.de", "name": "Lehrer-Online", "description": "Unterrichtsmaterialien und Fachinformationen", "category": "portals", "trust_boost": 0.80, "source_type": "PORTAL", "scope": "FEDERAL"},
{"url": "https://www.4teachers.de", "name": "4teachers", "description": "Unterrichtsmaterialien von Lehrern für Lehrer", "category": "portals", "trust_boost": 0.75, "source_type": "PORTAL", "scope": "FEDERAL"},
{"url": "https://www.zum.de", "name": "ZUM", "description": "Zentrale für Unterrichtsmedien im Internet", "category": "portals", "trust_boost": 0.80, "source_type": "NGO", "scope": "FEDERAL"},
]
async def load_seeds():
"""
Load initial seeds via bulk import API.
Returns:
bool: True if successful, False otherwise
Raises:
httpx.ConnectError: If API is not reachable
httpx.TimeoutException: If request times out
"""
print(f"Loading {len(INITIAL_SEEDS)} seeds into {API_BASE}...")
print(f"Seeds breakdown:")
categories = {}
for seed in INITIAL_SEEDS:
cat = seed.get("category", "unknown")
categories[cat] = categories.get(cat, 0) + 1
for cat, count in sorted(categories.items()):
print(f" - {cat}: {count}")
try:
async with httpx.AsyncClient(timeout=60.0) as client:
# Check API health first
try:
health_response = await client.get(f"{API_BASE}/health")
if health_response.status_code != 200:
print(f"WARNING: Health check returned {health_response.status_code}")
except httpx.ConnectError:
print(f"ERROR: Cannot connect to API at {API_BASE}")
print("Make sure the backend service is running:")
print(" docker compose up -d backend")
return False
# Import seeds
print("\nImporting seeds...")
response = await client.post(
f"{API_BASE}/v1/edu-search/seeds/bulk-import",
json={"seeds": INITIAL_SEEDS}
)
if response.status_code == 200:
result = response.json()
imported = result.get('imported', 0)
skipped = result.get('skipped', 0)
errors = result.get('errors', [])
print(f"\nResult:")
print(f" Imported: {imported}")
print(f" Skipped (duplicates): {skipped}")
if errors:
print(f" Errors: {len(errors)}")
for err in errors[:5]:
print(f" - {err}")
if len(errors) > 5:
print(f" ... and {len(errors) - 5} more")
else:
print(f"\nERROR: Import failed with status {response.status_code}")
try:
error_detail = response.json()
print(f" Detail: {error_detail.get('detail', response.text)}")
except Exception:
print(f" Response: {response.text[:500]}")
return False
# Get stats
print("\nFetching statistics...")
stats_response = await client.get(f"{API_BASE}/v1/edu-search/stats")
if stats_response.status_code == 200:
stats = stats_response.json()
print(f"\nDatabase Statistics:")
print(f" Total seeds: {stats.get('total_seeds', 0)}")
print(f" Enabled seeds: {stats.get('enabled_seeds', 0)}")
print(f" Disabled seeds: {stats.get('disabled_seeds', 0)}")
print(f" Avg trust boost: {stats.get('avg_trust_boost', 0):.2f}")
per_category = stats.get('seeds_per_category', {})
if per_category:
print(f"\n Seeds per category:")
for cat, count in sorted(per_category.items()):
print(f" - {cat}: {count}")
print("\nDone!")
return True
except httpx.ConnectError as e:
print(f"\nERROR: Connection failed - {e}")
print(f"Make sure the API is running at {API_BASE}")
return False
except httpx.TimeoutException:
print(f"\nERROR: Request timed out")
print("The server may be overloaded. Try again later.")
return False
except Exception as e:
print(f"\nERROR: Unexpected error - {e}")
return False
if __name__ == "__main__":
import sys
success = asyncio.run(load_seeds())
sys.exit(0 if success else 1)