All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Successful in 1m45s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 21s
- edu-search-service von breakpilot-pwa nach breakpilot-lehrer kopiert (ohne vendor) - opensearch + edu-search-service in docker-compose.yml hinzugefuegt - voice-service aus docker-compose.yml entfernt (jetzt in breakpilot-core) - geo-service aus docker-compose.yml entfernt (nicht mehr benoetigt) - CI/CD: edu-search-service zu Gitea Actions und Woodpecker hinzugefuegt (Go lint, test mit go mod download, build, SBOM) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
148 lines
4.7 KiB
Python
148 lines
4.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Seed German Universities directly into the edu-search-service universities table.
|
|
|
|
This script imports the same university data as load_university_seeds.py
|
|
but writes directly to the PostgreSQL universities table used by the crawler.
|
|
"""
|
|
|
|
import psycopg2
|
|
import os
|
|
import sys
|
|
|
|
# Add the backend scripts path to import university data
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../backend/scripts'))
|
|
from load_university_seeds import (
|
|
UNIVERSITAETEN, FACHHOCHSCHULEN, PAEDAGOGISCHE_HOCHSCHULEN,
|
|
KUNSTHOCHSCHULEN, PRIVATE_HOCHSCHULEN
|
|
)
|
|
|
|
# Database connection from environment or defaults
|
|
DATABASE_URL = os.environ.get(
|
|
'DATABASE_URL',
|
|
'postgresql://breakpilot:breakpilot@localhost:5432/breakpilot_db'
|
|
)
|
|
|
|
|
|
def get_uni_type(original_type: str) -> str:
|
|
"""Map the type from seed data to database uni_type."""
|
|
type_map = {
|
|
'UNI': 'UNI',
|
|
'FH': 'HAW', # Fachhochschule -> HAW (Hochschule für Angewandte Wissenschaften)
|
|
'PH': 'PH', # Pädagogische Hochschule
|
|
'KUNST': 'KUNST',
|
|
'PRIVATE': 'PRIVATE',
|
|
}
|
|
return type_map.get(original_type, 'UNI')
|
|
|
|
|
|
def seed_universities():
|
|
"""Load all universities into the database."""
|
|
# Collect all universities with their types
|
|
all_unis = []
|
|
|
|
for uni in UNIVERSITAETEN:
|
|
all_unis.append({**uni, 'uni_type': 'UNI'})
|
|
|
|
for uni in FACHHOCHSCHULEN:
|
|
all_unis.append({**uni, 'uni_type': 'HAW'})
|
|
|
|
for uni in PAEDAGOGISCHE_HOCHSCHULEN:
|
|
all_unis.append({**uni, 'uni_type': 'PH'})
|
|
|
|
for uni in KUNSTHOCHSCHULEN:
|
|
all_unis.append({**uni, 'uni_type': 'KUNST'})
|
|
|
|
for uni in PRIVATE_HOCHSCHULEN:
|
|
all_unis.append({**uni, 'uni_type': 'PRIVATE'})
|
|
|
|
print(f"Total universities to seed: {len(all_unis)}")
|
|
print(f" - Universitäten: {len(UNIVERSITAETEN)}")
|
|
print(f" - Fachhochschulen: {len(FACHHOCHSCHULEN)}")
|
|
print(f" - Pädagogische Hochschulen: {len(PAEDAGOGISCHE_HOCHSCHULEN)}")
|
|
print(f" - Kunst-/Musikhochschulen: {len(KUNSTHOCHSCHULEN)}")
|
|
print(f" - Private Hochschulen: {len(PRIVATE_HOCHSCHULEN)}")
|
|
|
|
try:
|
|
conn = psycopg2.connect(DATABASE_URL)
|
|
cur = conn.cursor()
|
|
|
|
inserted = 0
|
|
skipped = 0
|
|
errors = []
|
|
|
|
for uni in all_unis:
|
|
try:
|
|
# Generate a short name from the full name
|
|
name = uni['name']
|
|
short_name = None
|
|
|
|
# Try to extract common abbreviations
|
|
if 'KIT' in name:
|
|
short_name = 'KIT'
|
|
elif 'TUM' in name or name == 'Technische Universität München':
|
|
short_name = 'TUM'
|
|
elif 'LMU' in name or 'Ludwig-Maximilians' in name:
|
|
short_name = 'LMU'
|
|
elif 'RWTH' in name:
|
|
short_name = 'RWTH'
|
|
elif 'FAU' in name or 'Friedrich-Alexander' in name:
|
|
short_name = 'FAU'
|
|
elif name.startswith('Universität '):
|
|
short_name = 'Uni ' + name.replace('Universität ', '')[:15]
|
|
elif name.startswith('Technische Universität '):
|
|
short_name = 'TU ' + name.replace('Technische Universität ', '')[:12]
|
|
elif name.startswith('Hochschule '):
|
|
short_name = 'HS ' + name.replace('Hochschule ', '')[:15]
|
|
|
|
cur.execute("""
|
|
INSERT INTO universities (name, short_name, url, state, uni_type)
|
|
VALUES (%s, %s, %s, %s, %s)
|
|
ON CONFLICT (url) DO NOTHING
|
|
RETURNING id
|
|
""", (
|
|
uni['name'],
|
|
short_name,
|
|
uni['url'],
|
|
uni.get('state'),
|
|
uni['uni_type']
|
|
))
|
|
|
|
result = cur.fetchone()
|
|
if result:
|
|
inserted += 1
|
|
else:
|
|
skipped += 1
|
|
|
|
except Exception as e:
|
|
errors.append(f"{uni['name']}: {str(e)}")
|
|
|
|
conn.commit()
|
|
cur.close()
|
|
conn.close()
|
|
|
|
print(f"\nResults:")
|
|
print(f" Inserted: {inserted}")
|
|
print(f" Skipped (duplicates): {skipped}")
|
|
|
|
if errors:
|
|
print(f" Errors: {len(errors)}")
|
|
for err in errors[:5]:
|
|
print(f" - {err}")
|
|
|
|
print(f"\nDone! Total universities in database: {inserted + skipped}")
|
|
return True
|
|
|
|
except psycopg2.Error as e:
|
|
print(f"Database error: {e}")
|
|
return False
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print("=" * 60)
|
|
print("Seeding Universities into edu-search-service database")
|
|
print("=" * 60)
|
|
|
|
success = seed_universities()
|
|
sys.exit(0 if success else 1)
|