This repository has been archived on 2026-02-15. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
breakpilot-pwa/backend/scripts/load_initial_seeds.py
BreakPilot Dev 19855efacc
Some checks failed
Tests / Go Tests (push) Has been cancelled
Tests / Python Tests (push) Has been cancelled
Tests / Integration Tests (push) Has been cancelled
Tests / Go Lint (push) Has been cancelled
Tests / Python Lint (push) Has been cancelled
Tests / Security Scan (push) Has been cancelled
Tests / All Checks Passed (push) Has been cancelled
Security Scanning / Secret Scanning (push) Has been cancelled
Security Scanning / Dependency Vulnerability Scan (push) Has been cancelled
Security Scanning / Go Security Scan (push) Has been cancelled
Security Scanning / Python Security Scan (push) Has been cancelled
Security Scanning / Node.js Security Scan (push) Has been cancelled
Security Scanning / Docker Image Security (push) Has been cancelled
Security Scanning / Security Summary (push) Has been cancelled
CI/CD Pipeline / Go Tests (push) Has been cancelled
CI/CD Pipeline / Python Tests (push) Has been cancelled
CI/CD Pipeline / Website Tests (push) Has been cancelled
CI/CD Pipeline / Linting (push) Has been cancelled
CI/CD Pipeline / Security Scan (push) Has been cancelled
CI/CD Pipeline / Docker Build & Push (push) Has been cancelled
CI/CD Pipeline / Integration Tests (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / CI Summary (push) Has been cancelled
ci/woodpecker/manual/build-ci-image Pipeline was successful
ci/woodpecker/manual/main Pipeline failed
feat: BreakPilot PWA - Full codebase (clean push without large binaries)
All services: admin-v2, studio-v2, website, ai-compliance-sdk,
consent-service, klausur-service, voice-service, and infrastructure.
Large PDFs and compiled binaries excluded via .gitignore.
2026-02-11 13:25:58 +01:00

219 lines
18 KiB
Python

#!/usr/bin/env python3
"""
Load Initial EduSearch Seeds into the Database.
This script uses the bulk import API to load all German education sources
that were provided by the user.
"""
import httpx
import asyncio
import os
API_BASE = os.environ.get("LLM_GATEWAY_URL", "http://localhost:8000")
# All German education seeds organized by category
INITIAL_SEEDS = [
# ===== BUNDESEBENE (Federal) =====
{"url": "https://www.kmk.org", "name": "Kultusministerkonferenz (KMK)", "description": "Lehrpläne, Bildungsstandards, Abiturregelungen", "category": "federal", "trust_boost": 0.95, "source_type": "GOV", "scope": "FEDERAL"},
{"url": "https://www.bildungsserver.de", "name": "Deutscher Bildungsserver (DIPF)", "description": "Zentrale Meta-Plattform für alle Länder", "category": "federal", "trust_boost": 0.95, "source_type": "GOV", "scope": "FEDERAL"},
{"url": "https://www.bpb.de", "name": "Bundeszentrale für politische Bildung", "description": "Unterrichtsmaterialien, Dossiers, Arbeitsblätter", "category": "federal", "trust_boost": 0.90, "source_type": "GOV", "scope": "FEDERAL"},
{"url": "https://www.bmbf.de", "name": "Bundesministerium für Bildung und Forschung", "description": "Förderprogramme, Bildungsberichte, Initiativen", "category": "federal", "trust_boost": 0.95, "source_type": "GOV", "scope": "FEDERAL"},
{"url": "https://www.iqb.hu-berlin.de", "name": "Institut zur Qualitätsentwicklung (IQB)", "description": "Bildungsstandards, Vergleichsarbeiten, Abiturpools", "category": "federal", "trust_boost": 0.95, "source_type": "GOV", "scope": "FEDERAL"},
# ===== BADEN-WÜRTTEMBERG (BW) =====
{"url": "https://km-bw.de", "name": "BW Kultusministerium", "description": "Baden-Württemberg Kultusministerium", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BW"},
{"url": "https://www.bildungsplaene-bw.de", "name": "BW Bildungspläne", "description": "Bildungspläne Baden-Württemberg", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BW"},
{"url": "https://zsl.kultus-bw.de", "name": "BW Zentrum für Schulqualität", "description": "ZSL Baden-Württemberg", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BW"},
{"url": "https://lehrerfortbildung-bw.de", "name": "BW Lehrerfortbildung", "description": "Lehrerfortbildung Baden-Württemberg", "category": "states", "trust_boost": 0.85, "source_type": "GOV", "scope": "STATE", "state": "BW"},
{"url": "https://rp.baden-wuerttemberg.de", "name": "BW Regierungspräsidien", "description": "Bildungsaufsicht Baden-Württemberg", "category": "authorities", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BW"},
# ===== BAYERN (BY) =====
{"url": "https://www.km.bayern.de", "name": "Bayern Kultusministerium", "description": "Bayerisches Staatsministerium für Unterricht und Kultus", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BY"},
{"url": "https://www.isb.bayern.de", "name": "Bayern ISB", "description": "Staatsinstitut für Schulqualität und Bildungsforschung", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BY"},
{"url": "https://www.mebis.bayern.de", "name": "Bayern mebis", "description": "Medien-Bildung-Service Bayern", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BY"},
{"url": "https://www.bycs.de", "name": "Bayern Cloud Schule", "description": "Bayerische Schulcloud", "category": "states", "trust_boost": 0.85, "source_type": "GOV", "scope": "STATE", "state": "BY"},
{"url": "https://www.schulberatung.bayern.de", "name": "Bayern Schulberatung", "description": "Staatliche Schulberatung Bayern", "category": "states", "trust_boost": 0.85, "source_type": "GOV", "scope": "STATE", "state": "BY"},
# ===== BERLIN (BE) =====
{"url": "https://www.berlin.de/sen/bjf", "name": "Berlin Senatsverwaltung", "description": "Senatsverwaltung für Bildung, Jugend und Familie", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BE"},
{"url": "https://bildungsserver.berlin-brandenburg.de", "name": "Berlin-Brandenburg Bildungsserver", "description": "Gemeinsamer Bildungsserver Berlin-Brandenburg", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BE"},
{"url": "https://www.berlin.de/schule", "name": "Berlin Schulportal", "description": "Berliner Schulportal", "category": "states", "trust_boost": 0.85, "source_type": "GOV", "scope": "STATE", "state": "BE"},
{"url": "https://www.berlin.de/landesinstitut-schule-medien", "name": "Berlin LISUM", "description": "Landesinstitut für Schule und Medien", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BE"},
# ===== BRANDENBURG (BB) =====
{"url": "https://mbjs.brandenburg.de", "name": "Brandenburg MBJS", "description": "Ministerium für Bildung, Jugend und Sport", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BB"},
{"url": "https://lisum.berlin-brandenburg.de", "name": "Brandenburg LISUM", "description": "Landesinstitut für Schule und Medien", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BB"},
{"url": "https://www.schulportal.brandenburg.de", "name": "Brandenburg Schulportal", "description": "Schulportal Brandenburg", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "BB"},
{"url": "https://lehrplan.brandenburg.de", "name": "Brandenburg Lehrpläne", "description": "Rahmenlehrpläne Brandenburg", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "BB"},
# ===== BREMEN (HB) =====
{"url": "https://www.bildung.bremen.de", "name": "Bremen Bildung", "description": "Senatorin für Kinder und Bildung Bremen", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HB"},
{"url": "https://www.lis.bremen.de", "name": "Bremen LIS", "description": "Landesinstitut für Schule Bremen", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "HB"},
{"url": "https://www.bildungsplaene.bremen.de", "name": "Bremen Bildungspläne", "description": "Bildungspläne Bremen", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HB"},
# ===== HAMBURG (HH) =====
{"url": "https://www.hamburg.de/bsb", "name": "Hamburg BSB", "description": "Behörde für Schule und Berufsbildung", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HH"},
{"url": "https://li.hamburg.de", "name": "Hamburg Landesinstitut", "description": "Landesinstitut für Lehrerbildung und Schulentwicklung", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HH"},
{"url": "https://www.bildungsplaene.hamburg.de", "name": "Hamburg Bildungspläne", "description": "Hamburger Bildungspläne", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HH"},
# ===== HESSEN (HE) =====
{"url": "https://kultusministerium.hessen.de", "name": "Hessen Kultusministerium", "description": "Hessisches Kultusministerium", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HE"},
{"url": "https://lehrplaene.hessen.de", "name": "Hessen Lehrpläne", "description": "Kerncurricula Hessen", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "HE"},
{"url": "https://www.schulportal.hessen.de", "name": "Hessen Schulportal", "description": "Hessisches Schulportal", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "HE"},
{"url": "https://la.hessen.de", "name": "Hessen Lehrkräfteakademie", "description": "Hessische Lehrkräfteakademie", "category": "authorities", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "HE"},
# ===== MECKLENBURG-VORPOMMERN (MV) =====
{"url": "https://www.regierung-mv.de/Landesregierung/bm", "name": "MV Bildungsministerium", "description": "Bildungsministerium Mecklenburg-Vorpommern", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "MV"},
{"url": "https://www.bildung-mv.de", "name": "MV Bildungsportal", "description": "Bildungsserver Mecklenburg-Vorpommern", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "MV"},
# ===== NIEDERSACHSEN (NI) =====
{"url": "https://www.mk.niedersachsen.de", "name": "Niedersachsen MK", "description": "Niedersächsisches Kultusministerium", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "NI"},
{"url": "https://www.nibis.de", "name": "Niedersachsen NiBiS", "description": "Niedersächsischer Bildungsserver", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "NI"},
# ===== NORDRHEIN-WESTFALEN (NW) =====
{"url": "https://www.msb.nrw", "name": "NRW Schulministerium", "description": "Ministerium für Schule und Bildung NRW", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "NW"},
{"url": "https://www.schulentwicklung.nrw.de", "name": "NRW Schulentwicklung", "description": "Schulentwicklung NRW", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "NW"},
{"url": "https://www.qua-lis.nrw.de", "name": "NRW QUA-LiS", "description": "Qualitäts- und UnterstützungsAgentur", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "NW"},
{"url": "https://www.standardsicherung.schulministerium.nrw.de", "name": "NRW Standardsicherung", "description": "Standardsicherung und Prüfungen NRW", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "NW"},
# ===== RHEINLAND-PFALZ (RP) =====
{"url": "https://bm.rlp.de", "name": "RLP Bildungsministerium", "description": "Ministerium für Bildung Rheinland-Pfalz", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "RP"},
{"url": "https://bildung.rlp.de", "name": "RLP Bildungsserver", "description": "Bildungsserver Rheinland-Pfalz", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "RP"},
{"url": "https://lehrplaene.rlp.de", "name": "RLP Lehrpläne", "description": "Lehrpläne Rheinland-Pfalz", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "RP"},
# ===== SAARLAND (SL) =====
{"url": "https://www.saarland.de/mbk", "name": "Saarland MBK", "description": "Ministerium für Bildung und Kultur Saarland", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "SL"},
{"url": "https://www.bildungsserver.saarland.de", "name": "Saarland Bildungsserver", "description": "Bildungsserver Saarland", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "SL"},
# ===== SACHSEN (SN) =====
{"url": "https://www.smk.sachsen.de", "name": "Sachsen SMK", "description": "Sächsisches Staatsministerium für Kultus", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "SN"},
{"url": "https://www.schule.sachsen.de", "name": "Sachsen Schulportal", "description": "Sächsisches Schulportal", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "SN"},
# ===== SACHSEN-ANHALT (ST) =====
{"url": "https://mb.sachsen-anhalt.de", "name": "Sachsen-Anhalt MB", "description": "Ministerium für Bildung Sachsen-Anhalt", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "ST"},
{"url": "https://lisa.sachsen-anhalt.de", "name": "Sachsen-Anhalt LISA", "description": "Landesinstitut für Schulqualität", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "ST"},
{"url": "https://www.bildung-lsa.de", "name": "Sachsen-Anhalt Bildungsserver", "description": "Bildungsserver Sachsen-Anhalt", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "ST"},
# ===== SCHLESWIG-HOLSTEIN (SH) =====
{"url": "https://www.schleswig-holstein.de/BILDUNG", "name": "SH Bildungsministerium", "description": "Ministerium für Allgemeine und Berufliche Bildung", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "SH"},
{"url": "https://fachanforderungen.schleswig-holstein.de", "name": "SH Fachanforderungen", "description": "Fachanforderungen Schleswig-Holstein", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "SH"},
# ===== THÜRINGEN (TH) =====
{"url": "https://bildung.thueringen.de", "name": "Thüringen Bildungsministerium", "description": "Thüringer Ministerium für Bildung", "category": "states", "trust_boost": 0.95, "source_type": "GOV", "scope": "STATE", "state": "TH"},
{"url": "https://www.schulportal-thueringen.de", "name": "Thüringen Schulportal", "description": "Thüringer Schulportal", "category": "states", "trust_boost": 0.90, "source_type": "GOV", "scope": "STATE", "state": "TH"},
# ===== WISSENSCHAFT & STUDIEN =====
{"url": "https://www.bertelsmann-stiftung.de/bildung", "name": "Bertelsmann Stiftung", "description": "Bildungsstudien und Ländermonitor", "category": "science", "trust_boost": 0.85, "source_type": "NGO", "scope": "FEDERAL"},
{"url": "https://www.oecd.org/pisa", "name": "OECD PISA", "description": "Internationale Schulleistungsstudie PISA", "category": "science", "trust_boost": 0.90, "source_type": "INT", "scope": "INTERNATIONAL"},
# ===== BILDUNGSPORTALE =====
{"url": "https://www.lehrer-online.de", "name": "Lehrer-Online", "description": "Unterrichtsmaterialien und Fachinformationen", "category": "portals", "trust_boost": 0.80, "source_type": "PORTAL", "scope": "FEDERAL"},
{"url": "https://www.4teachers.de", "name": "4teachers", "description": "Unterrichtsmaterialien von Lehrern für Lehrer", "category": "portals", "trust_boost": 0.75, "source_type": "PORTAL", "scope": "FEDERAL"},
{"url": "https://www.zum.de", "name": "ZUM", "description": "Zentrale für Unterrichtsmedien im Internet", "category": "portals", "trust_boost": 0.80, "source_type": "NGO", "scope": "FEDERAL"},
]
async def load_seeds():
"""
Load initial seeds via bulk import API.
Returns:
bool: True if successful, False otherwise
Raises:
httpx.ConnectError: If API is not reachable
httpx.TimeoutException: If request times out
"""
print(f"Loading {len(INITIAL_SEEDS)} seeds into {API_BASE}...")
print(f"Seeds breakdown:")
categories = {}
for seed in INITIAL_SEEDS:
cat = seed.get("category", "unknown")
categories[cat] = categories.get(cat, 0) + 1
for cat, count in sorted(categories.items()):
print(f" - {cat}: {count}")
try:
async with httpx.AsyncClient(timeout=60.0) as client:
# Check API health first
try:
health_response = await client.get(f"{API_BASE}/health")
if health_response.status_code != 200:
print(f"WARNING: Health check returned {health_response.status_code}")
except httpx.ConnectError:
print(f"ERROR: Cannot connect to API at {API_BASE}")
print("Make sure the backend service is running:")
print(" docker compose up -d backend")
return False
# Import seeds
print("\nImporting seeds...")
response = await client.post(
f"{API_BASE}/v1/edu-search/seeds/bulk-import",
json={"seeds": INITIAL_SEEDS}
)
if response.status_code == 200:
result = response.json()
imported = result.get('imported', 0)
skipped = result.get('skipped', 0)
errors = result.get('errors', [])
print(f"\nResult:")
print(f" Imported: {imported}")
print(f" Skipped (duplicates): {skipped}")
if errors:
print(f" Errors: {len(errors)}")
for err in errors[:5]:
print(f" - {err}")
if len(errors) > 5:
print(f" ... and {len(errors) - 5} more")
else:
print(f"\nERROR: Import failed with status {response.status_code}")
try:
error_detail = response.json()
print(f" Detail: {error_detail.get('detail', response.text)}")
except Exception:
print(f" Response: {response.text[:500]}")
return False
# Get stats
print("\nFetching statistics...")
stats_response = await client.get(f"{API_BASE}/v1/edu-search/stats")
if stats_response.status_code == 200:
stats = stats_response.json()
print(f"\nDatabase Statistics:")
print(f" Total seeds: {stats.get('total_seeds', 0)}")
print(f" Enabled seeds: {stats.get('enabled_seeds', 0)}")
print(f" Disabled seeds: {stats.get('disabled_seeds', 0)}")
print(f" Avg trust boost: {stats.get('avg_trust_boost', 0):.2f}")
per_category = stats.get('seeds_per_category', {})
if per_category:
print(f"\n Seeds per category:")
for cat, count in sorted(per_category.items()):
print(f" - {cat}: {count}")
print("\nDone!")
return True
except httpx.ConnectError as e:
print(f"\nERROR: Connection failed - {e}")
print(f"Make sure the API is running at {API_BASE}")
return False
except httpx.TimeoutException:
print(f"\nERROR: Request timed out")
print("The server may be overloaded. Try again later.")
return False
except Exception as e:
print(f"\nERROR: Unexpected error - {e}")
return False
if __name__ == "__main__":
import sys
success = asyncio.run(load_seeds())
sys.exit(0 if success else 1)