feat: edu-search-service migriert, voice-service/geo-service entfernt
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Successful in 1m45s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 21s
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Successful in 1m45s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 21s
- edu-search-service von breakpilot-pwa nach breakpilot-lehrer kopiert (ohne vendor) - opensearch + edu-search-service in docker-compose.yml hinzugefuegt - voice-service aus docker-compose.yml entfernt (jetzt in breakpilot-core) - geo-service aus docker-compose.yml entfernt (nicht mehr benoetigt) - CI/CD: edu-search-service zu Gitea Actions und Woodpecker hinzugefuegt (Go lint, test mit go mod download, build, SBOM) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
147
edu-search-service/scripts/seed_universities.py
Normal file
147
edu-search-service/scripts/seed_universities.py
Normal file
@@ -0,0 +1,147 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Seed German Universities directly into the edu-search-service universities table.
|
||||
|
||||
This script imports the same university data as load_university_seeds.py
|
||||
but writes directly to the PostgreSQL universities table used by the crawler.
|
||||
"""
|
||||
|
||||
import psycopg2
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Add the backend scripts path to import university data
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../backend/scripts'))
|
||||
from load_university_seeds import (
|
||||
UNIVERSITAETEN, FACHHOCHSCHULEN, PAEDAGOGISCHE_HOCHSCHULEN,
|
||||
KUNSTHOCHSCHULEN, PRIVATE_HOCHSCHULEN
|
||||
)
|
||||
|
||||
# Database connection from environment or defaults
|
||||
DATABASE_URL = os.environ.get(
|
||||
'DATABASE_URL',
|
||||
'postgresql://breakpilot:breakpilot@localhost:5432/breakpilot_db'
|
||||
)
|
||||
|
||||
|
||||
def get_uni_type(original_type: str) -> str:
|
||||
"""Map the type from seed data to database uni_type."""
|
||||
type_map = {
|
||||
'UNI': 'UNI',
|
||||
'FH': 'HAW', # Fachhochschule -> HAW (Hochschule für Angewandte Wissenschaften)
|
||||
'PH': 'PH', # Pädagogische Hochschule
|
||||
'KUNST': 'KUNST',
|
||||
'PRIVATE': 'PRIVATE',
|
||||
}
|
||||
return type_map.get(original_type, 'UNI')
|
||||
|
||||
|
||||
def seed_universities():
|
||||
"""Load all universities into the database."""
|
||||
# Collect all universities with their types
|
||||
all_unis = []
|
||||
|
||||
for uni in UNIVERSITAETEN:
|
||||
all_unis.append({**uni, 'uni_type': 'UNI'})
|
||||
|
||||
for uni in FACHHOCHSCHULEN:
|
||||
all_unis.append({**uni, 'uni_type': 'HAW'})
|
||||
|
||||
for uni in PAEDAGOGISCHE_HOCHSCHULEN:
|
||||
all_unis.append({**uni, 'uni_type': 'PH'})
|
||||
|
||||
for uni in KUNSTHOCHSCHULEN:
|
||||
all_unis.append({**uni, 'uni_type': 'KUNST'})
|
||||
|
||||
for uni in PRIVATE_HOCHSCHULEN:
|
||||
all_unis.append({**uni, 'uni_type': 'PRIVATE'})
|
||||
|
||||
print(f"Total universities to seed: {len(all_unis)}")
|
||||
print(f" - Universitäten: {len(UNIVERSITAETEN)}")
|
||||
print(f" - Fachhochschulen: {len(FACHHOCHSCHULEN)}")
|
||||
print(f" - Pädagogische Hochschulen: {len(PAEDAGOGISCHE_HOCHSCHULEN)}")
|
||||
print(f" - Kunst-/Musikhochschulen: {len(KUNSTHOCHSCHULEN)}")
|
||||
print(f" - Private Hochschulen: {len(PRIVATE_HOCHSCHULEN)}")
|
||||
|
||||
try:
|
||||
conn = psycopg2.connect(DATABASE_URL)
|
||||
cur = conn.cursor()
|
||||
|
||||
inserted = 0
|
||||
skipped = 0
|
||||
errors = []
|
||||
|
||||
for uni in all_unis:
|
||||
try:
|
||||
# Generate a short name from the full name
|
||||
name = uni['name']
|
||||
short_name = None
|
||||
|
||||
# Try to extract common abbreviations
|
||||
if 'KIT' in name:
|
||||
short_name = 'KIT'
|
||||
elif 'TUM' in name or name == 'Technische Universität München':
|
||||
short_name = 'TUM'
|
||||
elif 'LMU' in name or 'Ludwig-Maximilians' in name:
|
||||
short_name = 'LMU'
|
||||
elif 'RWTH' in name:
|
||||
short_name = 'RWTH'
|
||||
elif 'FAU' in name or 'Friedrich-Alexander' in name:
|
||||
short_name = 'FAU'
|
||||
elif name.startswith('Universität '):
|
||||
short_name = 'Uni ' + name.replace('Universität ', '')[:15]
|
||||
elif name.startswith('Technische Universität '):
|
||||
short_name = 'TU ' + name.replace('Technische Universität ', '')[:12]
|
||||
elif name.startswith('Hochschule '):
|
||||
short_name = 'HS ' + name.replace('Hochschule ', '')[:15]
|
||||
|
||||
cur.execute("""
|
||||
INSERT INTO universities (name, short_name, url, state, uni_type)
|
||||
VALUES (%s, %s, %s, %s, %s)
|
||||
ON CONFLICT (url) DO NOTHING
|
||||
RETURNING id
|
||||
""", (
|
||||
uni['name'],
|
||||
short_name,
|
||||
uni['url'],
|
||||
uni.get('state'),
|
||||
uni['uni_type']
|
||||
))
|
||||
|
||||
result = cur.fetchone()
|
||||
if result:
|
||||
inserted += 1
|
||||
else:
|
||||
skipped += 1
|
||||
|
||||
except Exception as e:
|
||||
errors.append(f"{uni['name']}: {str(e)}")
|
||||
|
||||
conn.commit()
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
print(f"\nResults:")
|
||||
print(f" Inserted: {inserted}")
|
||||
print(f" Skipped (duplicates): {skipped}")
|
||||
|
||||
if errors:
|
||||
print(f" Errors: {len(errors)}")
|
||||
for err in errors[:5]:
|
||||
print(f" - {err}")
|
||||
|
||||
print(f"\nDone! Total universities in database: {inserted + skipped}")
|
||||
return True
|
||||
|
||||
except psycopg2.Error as e:
|
||||
print(f"Database error: {e}")
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("=" * 60)
|
||||
print("Seeding Universities into edu-search-service database")
|
||||
print("=" * 60)
|
||||
|
||||
success = seed_universities()
|
||||
sys.exit(0 if success else 1)
|
||||
Reference in New Issue
Block a user