feat: edu-search-service migriert, voice-service/geo-service entfernt
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Successful in 1m45s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 21s
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 28s
CI / test-go-edu-search (push) Successful in 27s
CI / test-python-klausur (push) Successful in 1m45s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 21s
- edu-search-service von breakpilot-pwa nach breakpilot-lehrer kopiert (ohne vendor) - opensearch + edu-search-service in docker-compose.yml hinzugefuegt - voice-service aus docker-compose.yml entfernt (jetzt in breakpilot-core) - geo-service aus docker-compose.yml entfernt (nicht mehr benoetigt) - CI/CD: edu-search-service zu Gitea Actions und Woodpecker hinzugefuegt (Go lint, test mit go mod download, build, SBOM) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
282
edu-search-service/scripts/add_german_universities.py
Normal file
282
edu-search-service/scripts/add_german_universities.py
Normal file
@@ -0,0 +1,282 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Add all major German universities to the edu-search-service database.
|
||||
Based on HRK (Hochschulrektorenkonferenz) list.
|
||||
"""
|
||||
|
||||
import requests
|
||||
import json
|
||||
import time
|
||||
import sys
|
||||
|
||||
API_BASE = "https://macmini:8089/api/v1"
|
||||
|
||||
# German Universities - categorized
|
||||
GERMAN_UNIVERSITIES = [
|
||||
# === UNIVERSITIES (Universitäten) ===
|
||||
# Already in DB (skip or update)
|
||||
# {"name": "TUM", "url": "https://www.tum.de", "type": "university"},
|
||||
# {"name": "LMU München", "url": "https://www.lmu.de", "type": "university"},
|
||||
# {"name": "UOL", "url": "https://uol.de", "type": "university"},
|
||||
# {"name": "KIT Karlsruhe", "url": "https://www.kit.edu", "type": "university"},
|
||||
|
||||
# TU9 Universities
|
||||
{"name": "TU Dresden", "url": "https://tu-dresden.de", "type": "university"},
|
||||
{"name": "TU Braunschweig", "url": "https://www.tu-braunschweig.de", "type": "university"},
|
||||
{"name": "TU Darmstadt", "url": "https://www.tu-darmstadt.de", "type": "university"},
|
||||
{"name": "Leibniz Universität Hannover", "url": "https://www.uni-hannover.de", "type": "university"},
|
||||
{"name": "Universität Stuttgart", "url": "https://www.uni-stuttgart.de", "type": "university"},
|
||||
|
||||
# Excellence Universities
|
||||
{"name": "Universität Bonn", "url": "https://www.uni-bonn.de", "type": "university"},
|
||||
{"name": "Universität Konstanz", "url": "https://www.uni-konstanz.de", "type": "university"},
|
||||
{"name": "Universität Tübingen", "url": "https://uni-tuebingen.de", "type": "university"},
|
||||
{"name": "Universität Freiburg", "url": "https://www.uni-freiburg.de", "type": "university"},
|
||||
|
||||
# Large State Universities
|
||||
{"name": "Universität Münster", "url": "https://www.uni-muenster.de", "type": "university"},
|
||||
{"name": "Universität Frankfurt", "url": "https://www.uni-frankfurt.de", "type": "university"},
|
||||
{"name": "Universität Mainz", "url": "https://www.uni-mainz.de", "type": "university"},
|
||||
{"name": "Universität Würzburg", "url": "https://www.uni-wuerzburg.de", "type": "university"},
|
||||
{"name": "Universität Erlangen-Nürnberg", "url": "https://www.fau.de", "type": "university"},
|
||||
{"name": "Universität Leipzig", "url": "https://www.uni-leipzig.de", "type": "university"},
|
||||
{"name": "Universität Jena", "url": "https://www.uni-jena.de", "type": "university"},
|
||||
{"name": "Universität Halle", "url": "https://www.uni-halle.de", "type": "university"},
|
||||
{"name": "Universität Rostock", "url": "https://www.uni-rostock.de", "type": "university"},
|
||||
{"name": "Universität Greifswald", "url": "https://www.uni-greifswald.de", "type": "university"},
|
||||
{"name": "Universität Kiel", "url": "https://www.uni-kiel.de", "type": "university"},
|
||||
{"name": "Universität Bremen", "url": "https://www.uni-bremen.de", "type": "university"},
|
||||
{"name": "Universität Bielefeld", "url": "https://www.uni-bielefeld.de", "type": "university"},
|
||||
{"name": "Universität Duisburg-Essen", "url": "https://www.uni-due.de", "type": "university"},
|
||||
{"name": "Universität Dortmund", "url": "https://www.tu-dortmund.de", "type": "university"},
|
||||
{"name": "Universität Bochum", "url": "https://www.ruhr-uni-bochum.de", "type": "university"},
|
||||
{"name": "Universität Düsseldorf", "url": "https://www.hhu.de", "type": "university"},
|
||||
{"name": "Universität Wuppertal", "url": "https://www.uni-wuppertal.de", "type": "university"},
|
||||
{"name": "Universität Siegen", "url": "https://www.uni-siegen.de", "type": "university"},
|
||||
{"name": "Universität Paderborn", "url": "https://www.uni-paderborn.de", "type": "university"},
|
||||
{"name": "Universität Kassel", "url": "https://www.uni-kassel.de", "type": "university"},
|
||||
{"name": "Universität Marburg", "url": "https://www.uni-marburg.de", "type": "university"},
|
||||
{"name": "Universität Gießen", "url": "https://www.uni-giessen.de", "type": "university"},
|
||||
{"name": "Universität Saarbrücken", "url": "https://www.uni-saarland.de", "type": "university"},
|
||||
{"name": "Universität Trier", "url": "https://www.uni-trier.de", "type": "university"},
|
||||
{"name": "Universität Koblenz", "url": "https://www.uni-koblenz.de", "type": "university"},
|
||||
{"name": "Universität Landau", "url": "https://rptu.de", "type": "university"},
|
||||
{"name": "Universität Mannheim", "url": "https://www.uni-mannheim.de", "type": "university"},
|
||||
{"name": "Universität Ulm", "url": "https://www.uni-ulm.de", "type": "university"},
|
||||
{"name": "Universität Hohenheim", "url": "https://www.uni-hohenheim.de", "type": "university"},
|
||||
{"name": "Universität Regensburg", "url": "https://www.uni-regensburg.de", "type": "university"},
|
||||
{"name": "Universität Passau", "url": "https://www.uni-passau.de", "type": "university"},
|
||||
{"name": "Universität Bayreuth", "url": "https://www.uni-bayreuth.de", "type": "university"},
|
||||
{"name": "Universität Bamberg", "url": "https://www.uni-bamberg.de", "type": "university"},
|
||||
{"name": "Universität Augsburg", "url": "https://www.uni-augsburg.de", "type": "university"},
|
||||
{"name": "Universität Potsdam", "url": "https://www.uni-potsdam.de", "type": "university"},
|
||||
{"name": "Universität Magdeburg", "url": "https://www.ovgu.de", "type": "university"},
|
||||
{"name": "TU Chemnitz", "url": "https://www.tu-chemnitz.de", "type": "university"},
|
||||
{"name": "TU Ilmenau", "url": "https://www.tu-ilmenau.de", "type": "university"},
|
||||
{"name": "TU Freiberg", "url": "https://tu-freiberg.de", "type": "university"},
|
||||
{"name": "TU Clausthal", "url": "https://www.tu-clausthal.de", "type": "university"},
|
||||
{"name": "TU Kaiserslautern", "url": "https://rptu.de", "type": "university"},
|
||||
{"name": "BTU Cottbus-Senftenberg", "url": "https://www.b-tu.de", "type": "university"},
|
||||
{"name": "Universität der Bundeswehr München", "url": "https://www.unibw.de", "type": "university"},
|
||||
{"name": "Universität der Bundeswehr Hamburg", "url": "https://www.hsu-hh.de", "type": "university"},
|
||||
|
||||
# === FACHHOCHSCHULEN / HAW ===
|
||||
{"name": "HAW Hamburg", "url": "https://www.haw-hamburg.de", "type": "haw"},
|
||||
{"name": "HTW Berlin", "url": "https://www.htw-berlin.de", "type": "haw"},
|
||||
{"name": "Beuth Hochschule Berlin", "url": "https://www.bht-berlin.de", "type": "haw"},
|
||||
{"name": "FH Aachen", "url": "https://www.fh-aachen.de", "type": "haw"},
|
||||
{"name": "TH Köln", "url": "https://www.th-koeln.de", "type": "haw"},
|
||||
{"name": "Hochschule Düsseldorf", "url": "https://www.hs-duesseldorf.de", "type": "haw"},
|
||||
{"name": "FH Dortmund", "url": "https://www.fh-dortmund.de", "type": "haw"},
|
||||
{"name": "Hochschule Bochum", "url": "https://www.hochschule-bochum.de", "type": "haw"},
|
||||
{"name": "Westfälische Hochschule", "url": "https://www.w-hs.de", "type": "haw"},
|
||||
{"name": "FH Bielefeld", "url": "https://www.fh-bielefeld.de", "type": "haw"},
|
||||
{"name": "FH Münster", "url": "https://www.fh-muenster.de", "type": "haw"},
|
||||
{"name": "Hochschule Osnabrück", "url": "https://www.hs-osnabrueck.de", "type": "haw"},
|
||||
{"name": "Hochschule Bremen", "url": "https://www.hs-bremen.de", "type": "haw"},
|
||||
{"name": "Hochschule Hannover", "url": "https://www.hs-hannover.de", "type": "haw"},
|
||||
{"name": "Ostfalia Hochschule", "url": "https://www.ostfalia.de", "type": "haw"},
|
||||
{"name": "Hochschule Emden/Leer", "url": "https://www.hs-emden-leer.de", "type": "haw"},
|
||||
{"name": "HAWK Hildesheim", "url": "https://www.hawk.de", "type": "haw"},
|
||||
{"name": "Hochschule Fulda", "url": "https://www.hs-fulda.de", "type": "haw"},
|
||||
{"name": "Frankfurt UAS", "url": "https://www.frankfurt-university.de", "type": "haw"},
|
||||
{"name": "Hochschule Darmstadt", "url": "https://www.h-da.de", "type": "haw"},
|
||||
{"name": "Hochschule RheinMain", "url": "https://www.hs-rm.de", "type": "haw"},
|
||||
{"name": "Hochschule Mainz", "url": "https://www.hs-mainz.de", "type": "haw"},
|
||||
{"name": "Hochschule Trier", "url": "https://www.hochschule-trier.de", "type": "haw"},
|
||||
{"name": "Hochschule Koblenz", "url": "https://www.hs-koblenz.de", "type": "haw"},
|
||||
{"name": "Hochschule Karlsruhe", "url": "https://www.h-ka.de", "type": "haw"},
|
||||
{"name": "Hochschule Mannheim", "url": "https://www.hs-mannheim.de", "type": "haw"},
|
||||
{"name": "Hochschule Heilbronn", "url": "https://www.hs-heilbronn.de", "type": "haw"},
|
||||
{"name": "Hochschule Esslingen", "url": "https://www.hs-esslingen.de", "type": "haw"},
|
||||
{"name": "Hochschule Reutlingen", "url": "https://www.reutlingen-university.de", "type": "haw"},
|
||||
{"name": "Hochschule Konstanz", "url": "https://www.htwg-konstanz.de", "type": "haw"},
|
||||
{"name": "Hochschule Offenburg", "url": "https://www.hs-offenburg.de", "type": "haw"},
|
||||
{"name": "Hochschule Pforzheim", "url": "https://www.hs-pforzheim.de", "type": "haw"},
|
||||
{"name": "Hochschule Albstadt-Sigmaringen", "url": "https://www.hs-albsig.de", "type": "haw"},
|
||||
{"name": "Hochschule München", "url": "https://www.hm.edu", "type": "haw"},
|
||||
{"name": "TH Nürnberg", "url": "https://www.th-nuernberg.de", "type": "haw"},
|
||||
{"name": "TH Ingolstadt", "url": "https://www.thi.de", "type": "haw"},
|
||||
{"name": "Hochschule Augsburg", "url": "https://www.hs-augsburg.de", "type": "haw"},
|
||||
{"name": "Hochschule Rosenheim", "url": "https://www.th-rosenheim.de", "type": "haw"},
|
||||
{"name": "Hochschule Regensburg", "url": "https://www.oth-regensburg.de", "type": "haw"},
|
||||
{"name": "Hochschule Landshut", "url": "https://www.haw-landshut.de", "type": "haw"},
|
||||
{"name": "Hochschule Coburg", "url": "https://www.hs-coburg.de", "type": "haw"},
|
||||
{"name": "Hochschule Hof", "url": "https://www.hof-university.de", "type": "haw"},
|
||||
{"name": "Hochschule Würzburg-Schweinfurt", "url": "https://www.thws.de", "type": "haw"},
|
||||
{"name": "Hochschule Aschaffenburg", "url": "https://www.th-ab.de", "type": "haw"},
|
||||
{"name": "Hochschule Ansbach", "url": "https://www.hs-ansbach.de", "type": "haw"},
|
||||
{"name": "OTH Amberg-Weiden", "url": "https://www.oth-aw.de", "type": "haw"},
|
||||
{"name": "Hochschule Deggendorf", "url": "https://www.th-deg.de", "type": "haw"},
|
||||
{"name": "Hochschule Kempten", "url": "https://www.hs-kempten.de", "type": "haw"},
|
||||
{"name": "Hochschule Neu-Ulm", "url": "https://www.hnu.de", "type": "haw"},
|
||||
{"name": "HTW Dresden", "url": "https://www.htw-dresden.de", "type": "haw"},
|
||||
{"name": "HTWK Leipzig", "url": "https://www.htwk-leipzig.de", "type": "haw"},
|
||||
{"name": "Hochschule Mittweida", "url": "https://www.hs-mittweida.de", "type": "haw"},
|
||||
{"name": "Hochschule Zittau/Görlitz", "url": "https://www.hszg.de", "type": "haw"},
|
||||
{"name": "Westsächsische Hochschule Zwickau", "url": "https://www.fh-zwickau.de", "type": "haw"},
|
||||
{"name": "Hochschule Merseburg", "url": "https://www.hs-merseburg.de", "type": "haw"},
|
||||
{"name": "Hochschule Anhalt", "url": "https://www.hs-anhalt.de", "type": "haw"},
|
||||
{"name": "Hochschule Magdeburg-Stendal", "url": "https://www.h2.de", "type": "haw"},
|
||||
{"name": "Hochschule Harz", "url": "https://www.hs-harz.de", "type": "haw"},
|
||||
{"name": "Ernst-Abbe-Hochschule Jena", "url": "https://www.eah-jena.de", "type": "haw"},
|
||||
{"name": "FH Erfurt", "url": "https://www.fh-erfurt.de", "type": "haw"},
|
||||
{"name": "Hochschule Nordhausen", "url": "https://www.hs-nordhausen.de", "type": "haw"},
|
||||
{"name": "Hochschule Schmalkalden", "url": "https://www.hs-schmalkalden.de", "type": "haw"},
|
||||
{"name": "TH Brandenburg", "url": "https://www.th-brandenburg.de", "type": "haw"},
|
||||
{"name": "FH Potsdam", "url": "https://www.fh-potsdam.de", "type": "haw"},
|
||||
{"name": "TH Wildau", "url": "https://www.th-wildau.de", "type": "haw"},
|
||||
{"name": "Hochschule Neubrandenburg", "url": "https://www.hs-nb.de", "type": "haw"},
|
||||
{"name": "Hochschule Stralsund", "url": "https://www.hochschule-stralsund.de", "type": "haw"},
|
||||
{"name": "Hochschule Wismar", "url": "https://www.hs-wismar.de", "type": "haw"},
|
||||
{"name": "FH Kiel", "url": "https://www.fh-kiel.de", "type": "haw"},
|
||||
{"name": "FH Westküste", "url": "https://www.fh-westkueste.de", "type": "haw"},
|
||||
{"name": "TH Lübeck", "url": "https://www.th-luebeck.de", "type": "haw"},
|
||||
{"name": "FH Flensburg", "url": "https://hs-flensburg.de", "type": "haw"},
|
||||
{"name": "Hochschule Bremerhaven", "url": "https://www.hs-bremerhaven.de", "type": "haw"},
|
||||
|
||||
# === PRIVATE HOCHSCHULEN ===
|
||||
{"name": "WHU Vallendar", "url": "https://www.whu.edu", "type": "private"},
|
||||
{"name": "HHL Leipzig", "url": "https://www.hhl.de", "type": "private"},
|
||||
{"name": "EBS Universität", "url": "https://www.ebs.edu", "type": "private"},
|
||||
{"name": "Frankfurt School", "url": "https://www.frankfurt-school.de", "type": "private"},
|
||||
{"name": "ESMT Berlin", "url": "https://esmt.berlin", "type": "private"},
|
||||
{"name": "Jacobs University Bremen", "url": "https://www.jacobs-university.de", "type": "private"},
|
||||
{"name": "Zeppelin Universität", "url": "https://www.zu.de", "type": "private"},
|
||||
{"name": "Bucerius Law School", "url": "https://www.law-school.de", "type": "private"},
|
||||
{"name": "Universität Witten/Herdecke", "url": "https://www.uni-wh.de", "type": "private"},
|
||||
{"name": "IUBH", "url": "https://www.iu.de", "type": "private"},
|
||||
{"name": "SRH Hochschule Heidelberg", "url": "https://www.srh-hochschule-heidelberg.de", "type": "private"},
|
||||
{"name": "FOM Hochschule", "url": "https://www.fom.de", "type": "private"},
|
||||
|
||||
# === FRAUNHOFER INSTITUTE ===
|
||||
{"name": "Fraunhofer IIS", "url": "https://www.iis.fraunhofer.de", "type": "research"},
|
||||
{"name": "Fraunhofer IAIS", "url": "https://www.iais.fraunhofer.de", "type": "research"},
|
||||
{"name": "Fraunhofer IML", "url": "https://www.iml.fraunhofer.de", "type": "research"},
|
||||
{"name": "Fraunhofer ISI", "url": "https://www.isi.fraunhofer.de", "type": "research"},
|
||||
{"name": "Fraunhofer IPA", "url": "https://www.ipa.fraunhofer.de", "type": "research"},
|
||||
{"name": "Fraunhofer IAO", "url": "https://www.iao.fraunhofer.de", "type": "research"},
|
||||
{"name": "Fraunhofer IWS", "url": "https://www.iws.fraunhofer.de", "type": "research"},
|
||||
{"name": "Fraunhofer IPT", "url": "https://www.ipt.fraunhofer.de", "type": "research"},
|
||||
{"name": "Fraunhofer FOKUS", "url": "https://www.fokus.fraunhofer.de", "type": "research"},
|
||||
{"name": "Fraunhofer HHI", "url": "https://www.hhi.fraunhofer.de", "type": "research"},
|
||||
{"name": "Fraunhofer IESE", "url": "https://www.iese.fraunhofer.de", "type": "research"},
|
||||
{"name": "Fraunhofer IOSB", "url": "https://www.iosb.fraunhofer.de", "type": "research"},
|
||||
{"name": "Fraunhofer IDMT", "url": "https://www.idmt.fraunhofer.de", "type": "research"},
|
||||
{"name": "Fraunhofer IKTS", "url": "https://www.ikts.fraunhofer.de", "type": "research"},
|
||||
{"name": "Fraunhofer IGD", "url": "https://www.igd.fraunhofer.de", "type": "research"},
|
||||
|
||||
# === MAX-PLANCK-INSTITUTE ===
|
||||
{"name": "MPI für Informatik", "url": "https://www.mpi-inf.mpg.de", "type": "research"},
|
||||
{"name": "MPI für Software Systeme", "url": "https://www.mpi-sws.org", "type": "research"},
|
||||
{"name": "MPI für intelligente Systeme", "url": "https://is.mpg.de", "type": "research"},
|
||||
{"name": "MPI für Mathematik", "url": "https://www.mpim-bonn.mpg.de", "type": "research"},
|
||||
{"name": "MPI für Physik", "url": "https://www.mpp.mpg.de", "type": "research"},
|
||||
{"name": "MPI für Quantenoptik", "url": "https://www.mpq.mpg.de", "type": "research"},
|
||||
{"name": "MPI für Biophysik", "url": "https://www.biophys.mpg.de", "type": "research"},
|
||||
{"name": "MPI für Biochemie", "url": "https://www.biochem.mpg.de", "type": "research"},
|
||||
{"name": "MPI für Neurobiologie", "url": "https://www.neuro.mpg.de", "type": "research"},
|
||||
{"name": "MPI für Hirnforschung", "url": "https://brain.mpg.de", "type": "research"},
|
||||
|
||||
# === HELMHOLTZ-ZENTREN ===
|
||||
{"name": "DESY Hamburg", "url": "https://www.desy.de", "type": "research"},
|
||||
{"name": "FZ Jülich", "url": "https://www.fz-juelich.de", "type": "research"},
|
||||
{"name": "GSI Darmstadt", "url": "https://www.gsi.de", "type": "research"},
|
||||
{"name": "DKFZ Heidelberg", "url": "https://www.dkfz.de", "type": "research"},
|
||||
{"name": "DLR", "url": "https://www.dlr.de", "type": "research"},
|
||||
{"name": "AWI Bremerhaven", "url": "https://www.awi.de", "type": "research"},
|
||||
{"name": "GFZ Potsdam", "url": "https://www.gfz-potsdam.de", "type": "research"},
|
||||
{"name": "UFZ Leipzig", "url": "https://www.ufz.de", "type": "research"},
|
||||
{"name": "GEOMAR Kiel", "url": "https://www.geomar.de", "type": "research"},
|
||||
]
|
||||
|
||||
def get_existing_universities():
|
||||
"""Get list of existing universities from the API."""
|
||||
try:
|
||||
response = requests.get(f"{API_BASE}/universities", verify=False, timeout=10)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
return {u['url'].rstrip('/').lower(): u for u in data.get('universities', [])}
|
||||
except Exception as e:
|
||||
print(f"Error fetching existing universities: {e}")
|
||||
return {}
|
||||
|
||||
def add_university(uni):
|
||||
"""Add a university to the database."""
|
||||
payload = {
|
||||
"name": uni["name"],
|
||||
"url": uni["url"],
|
||||
"type": uni.get("type", "university"),
|
||||
"country": "DE"
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{API_BASE}/universities",
|
||||
json=payload,
|
||||
verify=False,
|
||||
timeout=10
|
||||
)
|
||||
return response.status_code == 201 or response.status_code == 200
|
||||
except Exception as e:
|
||||
print(f"Error adding {uni['name']}: {e}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
print("Fetching existing universities...")
|
||||
existing = get_existing_universities()
|
||||
print(f"Found {len(existing)} existing universities")
|
||||
|
||||
added = 0
|
||||
skipped = 0
|
||||
failed = 0
|
||||
|
||||
for uni in GERMAN_UNIVERSITIES:
|
||||
url_key = uni["url"].rstrip('/').lower()
|
||||
|
||||
if url_key in existing:
|
||||
print(f"SKIP: {uni['name']} (already exists)")
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
print(f"ADD: {uni['name']} ({uni['url']})")
|
||||
if add_university(uni):
|
||||
added += 1
|
||||
else:
|
||||
failed += 1
|
||||
|
||||
# Rate limiting
|
||||
time.sleep(0.2)
|
||||
|
||||
print(f"\n=== SUMMARY ===")
|
||||
print(f"Added: {added}")
|
||||
print(f"Skipped: {skipped}")
|
||||
print(f"Failed: {failed}")
|
||||
print(f"Total: {len(GERMAN_UNIVERSITIES)}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Disable SSL warnings for self-signed cert
|
||||
import urllib3
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
main()
|
||||
125
edu-search-service/scripts/fix_university_types.py
Normal file
125
edu-search-service/scripts/fix_university_types.py
Normal file
@@ -0,0 +1,125 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Fix university types in the database.
|
||||
This script updates uni_type based on university names.
|
||||
"""
|
||||
|
||||
import requests
|
||||
import json
|
||||
import sys
|
||||
|
||||
API_BASE = "https://macmini:8089/api/v1"
|
||||
|
||||
# Classification rules based on name patterns
|
||||
UNI_TYPE_RULES = {
|
||||
"UNI": [
|
||||
"Universität", "University", "TU ", "TUM", "LMU", "RWTH",
|
||||
"Humboldt", "FU Berlin", "HU Berlin", "TH ", "KIT"
|
||||
],
|
||||
"FH": [
|
||||
"Hochschule", "Fachhochschule", "FH ", "HAW ", "HS ",
|
||||
"University of Applied", "Beuth", "HTW"
|
||||
],
|
||||
"RESEARCH": [
|
||||
"Fraunhofer", "Max-Planck", "Helmholtz", "DLR", "DESY",
|
||||
"DKFZ", "FZ Jülich", "AWI", "GFZ", "GSI", "Leibniz"
|
||||
],
|
||||
"PRIVATE": [
|
||||
"EBS", "ESMT", "Bucerius", "WHU", "HHL", "FOM", "IUBH",
|
||||
"SRH", "International School", "Business School"
|
||||
],
|
||||
"KUNST": [
|
||||
"Kunsthochschule", "Musikhochschule", "Filmhochschule",
|
||||
"Kunstakademie", "HfK", "HfM", "HfG", "UdK", "Bauhaus"
|
||||
],
|
||||
"PH": [
|
||||
"Pädagogische Hochschule", "PH "
|
||||
]
|
||||
}
|
||||
|
||||
def classify_university(name):
|
||||
"""Classify university by name patterns."""
|
||||
name_lower = name.lower()
|
||||
|
||||
# Check each category
|
||||
for uni_type, patterns in UNI_TYPE_RULES.items():
|
||||
for pattern in patterns:
|
||||
if pattern.lower() in name_lower:
|
||||
return uni_type
|
||||
|
||||
# Default to UNI if "universität" in name, else FH
|
||||
if "universität" in name_lower or "university" in name_lower:
|
||||
return "UNI"
|
||||
|
||||
return "FH" # Default
|
||||
|
||||
def get_all_universities():
|
||||
"""Get list of all universities from the API."""
|
||||
try:
|
||||
response = requests.get(f"{API_BASE}/universities", verify=False, timeout=30)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
return data.get('universities', [])
|
||||
except Exception as e:
|
||||
print(f"Error fetching universities: {e}")
|
||||
return []
|
||||
|
||||
def update_university_type(uni_id, uni_type, uni_state=None):
|
||||
"""Update university type via direct database or API."""
|
||||
# The API doesn't have an update endpoint, so we'll print SQL statements
|
||||
return uni_type
|
||||
|
||||
def main():
|
||||
print("=== University Type Fixer ===\n")
|
||||
|
||||
# Disable SSL warnings
|
||||
import urllib3
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
universities = get_all_universities()
|
||||
if not universities:
|
||||
print("ERROR: No universities found!")
|
||||
return
|
||||
|
||||
print(f"Found {len(universities)} universities\n")
|
||||
|
||||
# Classify and generate SQL
|
||||
sql_statements = []
|
||||
type_counts = {}
|
||||
|
||||
for uni in universities:
|
||||
uni_id = uni['id']
|
||||
uni_name = uni['name']
|
||||
current_type = uni.get('uni_type', 'unknown')
|
||||
|
||||
# Classify
|
||||
new_type = classify_university(uni_name)
|
||||
|
||||
# Count
|
||||
type_counts[new_type] = type_counts.get(new_type, 0) + 1
|
||||
|
||||
# Generate SQL
|
||||
sql = f"UPDATE universities SET uni_type = '{new_type}' WHERE id = '{uni_id}';"
|
||||
sql_statements.append(sql)
|
||||
|
||||
if current_type != new_type:
|
||||
print(f" {uni_name[:50]:<50} -> {new_type}")
|
||||
|
||||
print(f"\n=== Summary ===")
|
||||
for t, c in sorted(type_counts.items()):
|
||||
print(f" {t}: {c}")
|
||||
|
||||
# Write SQL file
|
||||
sql_file = "/tmp/fix_uni_types.sql"
|
||||
with open(sql_file, 'w') as f:
|
||||
f.write("-- Fix university types\n")
|
||||
f.write("BEGIN;\n\n")
|
||||
for sql in sql_statements:
|
||||
f.write(sql + "\n")
|
||||
f.write("\nCOMMIT;\n")
|
||||
|
||||
print(f"\nSQL written to: {sql_file}")
|
||||
print(f"Run: cat {sql_file} | docker exec -i breakpilot-pwa-postgres psql -U <user> -d edu_search")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
147
edu-search-service/scripts/seed_universities.py
Normal file
147
edu-search-service/scripts/seed_universities.py
Normal file
@@ -0,0 +1,147 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Seed German Universities directly into the edu-search-service universities table.
|
||||
|
||||
This script imports the same university data as load_university_seeds.py
|
||||
but writes directly to the PostgreSQL universities table used by the crawler.
|
||||
"""
|
||||
|
||||
import psycopg2
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Add the backend scripts path to import university data
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../backend/scripts'))
|
||||
from load_university_seeds import (
|
||||
UNIVERSITAETEN, FACHHOCHSCHULEN, PAEDAGOGISCHE_HOCHSCHULEN,
|
||||
KUNSTHOCHSCHULEN, PRIVATE_HOCHSCHULEN
|
||||
)
|
||||
|
||||
# Database connection from environment or defaults
|
||||
DATABASE_URL = os.environ.get(
|
||||
'DATABASE_URL',
|
||||
'postgresql://breakpilot:breakpilot@localhost:5432/breakpilot_db'
|
||||
)
|
||||
|
||||
|
||||
def get_uni_type(original_type: str) -> str:
|
||||
"""Map the type from seed data to database uni_type."""
|
||||
type_map = {
|
||||
'UNI': 'UNI',
|
||||
'FH': 'HAW', # Fachhochschule -> HAW (Hochschule für Angewandte Wissenschaften)
|
||||
'PH': 'PH', # Pädagogische Hochschule
|
||||
'KUNST': 'KUNST',
|
||||
'PRIVATE': 'PRIVATE',
|
||||
}
|
||||
return type_map.get(original_type, 'UNI')
|
||||
|
||||
|
||||
def seed_universities():
|
||||
"""Load all universities into the database."""
|
||||
# Collect all universities with their types
|
||||
all_unis = []
|
||||
|
||||
for uni in UNIVERSITAETEN:
|
||||
all_unis.append({**uni, 'uni_type': 'UNI'})
|
||||
|
||||
for uni in FACHHOCHSCHULEN:
|
||||
all_unis.append({**uni, 'uni_type': 'HAW'})
|
||||
|
||||
for uni in PAEDAGOGISCHE_HOCHSCHULEN:
|
||||
all_unis.append({**uni, 'uni_type': 'PH'})
|
||||
|
||||
for uni in KUNSTHOCHSCHULEN:
|
||||
all_unis.append({**uni, 'uni_type': 'KUNST'})
|
||||
|
||||
for uni in PRIVATE_HOCHSCHULEN:
|
||||
all_unis.append({**uni, 'uni_type': 'PRIVATE'})
|
||||
|
||||
print(f"Total universities to seed: {len(all_unis)}")
|
||||
print(f" - Universitäten: {len(UNIVERSITAETEN)}")
|
||||
print(f" - Fachhochschulen: {len(FACHHOCHSCHULEN)}")
|
||||
print(f" - Pädagogische Hochschulen: {len(PAEDAGOGISCHE_HOCHSCHULEN)}")
|
||||
print(f" - Kunst-/Musikhochschulen: {len(KUNSTHOCHSCHULEN)}")
|
||||
print(f" - Private Hochschulen: {len(PRIVATE_HOCHSCHULEN)}")
|
||||
|
||||
try:
|
||||
conn = psycopg2.connect(DATABASE_URL)
|
||||
cur = conn.cursor()
|
||||
|
||||
inserted = 0
|
||||
skipped = 0
|
||||
errors = []
|
||||
|
||||
for uni in all_unis:
|
||||
try:
|
||||
# Generate a short name from the full name
|
||||
name = uni['name']
|
||||
short_name = None
|
||||
|
||||
# Try to extract common abbreviations
|
||||
if 'KIT' in name:
|
||||
short_name = 'KIT'
|
||||
elif 'TUM' in name or name == 'Technische Universität München':
|
||||
short_name = 'TUM'
|
||||
elif 'LMU' in name or 'Ludwig-Maximilians' in name:
|
||||
short_name = 'LMU'
|
||||
elif 'RWTH' in name:
|
||||
short_name = 'RWTH'
|
||||
elif 'FAU' in name or 'Friedrich-Alexander' in name:
|
||||
short_name = 'FAU'
|
||||
elif name.startswith('Universität '):
|
||||
short_name = 'Uni ' + name.replace('Universität ', '')[:15]
|
||||
elif name.startswith('Technische Universität '):
|
||||
short_name = 'TU ' + name.replace('Technische Universität ', '')[:12]
|
||||
elif name.startswith('Hochschule '):
|
||||
short_name = 'HS ' + name.replace('Hochschule ', '')[:15]
|
||||
|
||||
cur.execute("""
|
||||
INSERT INTO universities (name, short_name, url, state, uni_type)
|
||||
VALUES (%s, %s, %s, %s, %s)
|
||||
ON CONFLICT (url) DO NOTHING
|
||||
RETURNING id
|
||||
""", (
|
||||
uni['name'],
|
||||
short_name,
|
||||
uni['url'],
|
||||
uni.get('state'),
|
||||
uni['uni_type']
|
||||
))
|
||||
|
||||
result = cur.fetchone()
|
||||
if result:
|
||||
inserted += 1
|
||||
else:
|
||||
skipped += 1
|
||||
|
||||
except Exception as e:
|
||||
errors.append(f"{uni['name']}: {str(e)}")
|
||||
|
||||
conn.commit()
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
print(f"\nResults:")
|
||||
print(f" Inserted: {inserted}")
|
||||
print(f" Skipped (duplicates): {skipped}")
|
||||
|
||||
if errors:
|
||||
print(f" Errors: {len(errors)}")
|
||||
for err in errors[:5]:
|
||||
print(f" - {err}")
|
||||
|
||||
print(f"\nDone! Total universities in database: {inserted + skipped}")
|
||||
return True
|
||||
|
||||
except psycopg2.Error as e:
|
||||
print(f"Database error: {e}")
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("=" * 60)
|
||||
print("Seeding Universities into edu-search-service database")
|
||||
print("=" * 60)
|
||||
|
||||
success = seed_universities()
|
||||
sys.exit(0 if success else 1)
|
||||
320
edu-search-service/scripts/vast_ai_extractor.py
Normal file
320
edu-search-service/scripts/vast_ai_extractor.py
Normal file
@@ -0,0 +1,320 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
vast.ai Profile Extractor Script
|
||||
Dieses Skript läuft auf vast.ai und extrahiert Profildaten von Universitäts-Webseiten.
|
||||
|
||||
Verwendung auf vast.ai:
|
||||
1. Lade dieses Skript auf deine vast.ai Instanz
|
||||
2. Installiere Abhängigkeiten: pip install requests beautifulsoup4 openai
|
||||
3. Setze Umgebungsvariablen:
|
||||
- BREAKPILOT_API_URL=http://deine-ip:8086
|
||||
- BREAKPILOT_API_KEY=dev-key
|
||||
- OPENAI_API_KEY=sk-...
|
||||
4. Starte: python vast_ai_extractor.py
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
import logging
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from typing import Optional, Dict, Any, List
|
||||
|
||||
# Logging Setup
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Configuration
|
||||
API_URL = os.environ.get('BREAKPILOT_API_URL', 'http://localhost:8086')
|
||||
API_KEY = os.environ.get('BREAKPILOT_API_KEY', 'dev-key')
|
||||
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY', '')
|
||||
BATCH_SIZE = 10
|
||||
SLEEP_BETWEEN_REQUESTS = 1 # Sekunden zwischen Requests (respektiere rate limits)
|
||||
|
||||
|
||||
def fetch_pending_profiles(limit: int = 50) -> List[Dict]:
|
||||
"""Hole Profile die noch extrahiert werden müssen."""
|
||||
try:
|
||||
response = requests.get(
|
||||
f"{API_URL}/api/v1/ai/extraction/pending",
|
||||
params={"limit": limit},
|
||||
headers={"Authorization": f"Bearer {API_KEY}"},
|
||||
timeout=30
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data.get("tasks", [])
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Abrufen der Profile: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def fetch_profile_page(url: str) -> Optional[str]:
|
||||
"""Lade den HTML-Inhalt einer Profilseite."""
|
||||
try:
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (compatible; BreakPilot-Crawler/1.0; +https://breakpilot.de)',
|
||||
'Accept': 'text/html,application/xhtml+xml',
|
||||
'Accept-Language': 'de-DE,de;q=0.9,en;q=0.8',
|
||||
}
|
||||
response = requests.get(url, headers=headers, timeout=30)
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Laden von {url}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def extract_with_beautifulsoup(html: str, url: str) -> Dict[str, Any]:
|
||||
"""Extrahiere Basis-Informationen mit BeautifulSoup (ohne AI)."""
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
data = {}
|
||||
|
||||
# Email suchen
|
||||
email_links = soup.find_all('a', href=lambda x: x and x.startswith('mailto:'))
|
||||
if email_links:
|
||||
email = email_links[0]['href'].replace('mailto:', '').split('?')[0]
|
||||
data['email'] = email
|
||||
|
||||
# Telefon suchen
|
||||
phone_links = soup.find_all('a', href=lambda x: x and x.startswith('tel:'))
|
||||
if phone_links:
|
||||
data['phone'] = phone_links[0]['href'].replace('tel:', '')
|
||||
|
||||
# ORCID suchen
|
||||
orcid_links = soup.find_all('a', href=lambda x: x and 'orcid.org' in x)
|
||||
if orcid_links:
|
||||
orcid = orcid_links[0]['href']
|
||||
# Extrahiere ORCID ID
|
||||
if '/' in orcid:
|
||||
data['orcid'] = orcid.split('/')[-1]
|
||||
|
||||
# Google Scholar suchen
|
||||
scholar_links = soup.find_all('a', href=lambda x: x and 'scholar.google' in x)
|
||||
if scholar_links:
|
||||
href = scholar_links[0]['href']
|
||||
if 'user=' in href:
|
||||
data['google_scholar_id'] = href.split('user=')[1].split('&')[0]
|
||||
|
||||
# ResearchGate suchen
|
||||
rg_links = soup.find_all('a', href=lambda x: x and 'researchgate.net' in x)
|
||||
if rg_links:
|
||||
data['researchgate_url'] = rg_links[0]['href']
|
||||
|
||||
# LinkedIn suchen
|
||||
linkedin_links = soup.find_all('a', href=lambda x: x and 'linkedin.com' in x)
|
||||
if linkedin_links:
|
||||
data['linkedin_url'] = linkedin_links[0]['href']
|
||||
|
||||
# Institut/Abteilung Links sammeln (für Hierarchie-Erkennung)
|
||||
base_domain = '/'.join(url.split('/')[:3])
|
||||
department_links = []
|
||||
for link in soup.find_all('a', href=True):
|
||||
href = link['href']
|
||||
text = link.get_text(strip=True)
|
||||
# Suche nach Links die auf Institute/Fakultäten hindeuten
|
||||
if any(kw in text.lower() for kw in ['institut', 'fakultät', 'fachbereich', 'abteilung', 'lehrstuhl']):
|
||||
if href.startswith('/'):
|
||||
href = base_domain + href
|
||||
if href.startswith('http'):
|
||||
department_links.append({'url': href, 'name': text})
|
||||
|
||||
if department_links:
|
||||
# Nimm den ersten gefundenen Department-Link
|
||||
data['department_url'] = department_links[0]['url']
|
||||
data['department_name'] = department_links[0]['name']
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def extract_with_ai(html: str, url: str, full_name: str) -> Dict[str, Any]:
|
||||
"""Extrahiere strukturierte Daten mit OpenAI GPT."""
|
||||
if not OPENAI_API_KEY:
|
||||
logger.warning("Kein OPENAI_API_KEY gesetzt - nutze nur BeautifulSoup")
|
||||
return extract_with_beautifulsoup(html, url)
|
||||
|
||||
try:
|
||||
import openai
|
||||
client = openai.OpenAI(api_key=OPENAI_API_KEY)
|
||||
|
||||
# Reduziere HTML auf relevanten Text
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
|
||||
# Entferne Scripts, Styles, etc.
|
||||
for tag in soup(['script', 'style', 'nav', 'footer', 'header']):
|
||||
tag.decompose()
|
||||
|
||||
# Extrahiere Text
|
||||
text = soup.get_text(separator='\n', strip=True)
|
||||
# Limitiere auf 8000 Zeichen für API
|
||||
text = text[:8000]
|
||||
|
||||
prompt = f"""Analysiere diese Universitäts-Profilseite für {full_name} und extrahiere folgende Informationen im JSON-Format:
|
||||
|
||||
{{
|
||||
"email": "email@uni.de oder null",
|
||||
"phone": "Telefonnummer oder null",
|
||||
"office": "Raum/Büro oder null",
|
||||
"position": "Position/Titel (z.B. Wissenschaftlicher Mitarbeiter, Professorin) oder null",
|
||||
"department_name": "Name des Instituts/der Abteilung oder null",
|
||||
"research_interests": ["Liste", "der", "Forschungsthemen"] oder [],
|
||||
"teaching_topics": ["Liste", "der", "Lehrveranstaltungen/Fächer"] oder [],
|
||||
"supervisor_name": "Name des Vorgesetzten/Lehrstuhlinhabers falls erkennbar oder null"
|
||||
}}
|
||||
|
||||
Profilseite von {url}:
|
||||
|
||||
{text}
|
||||
|
||||
Antworte NUR mit dem JSON-Objekt, keine Erklärungen."""
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model="gpt-4o-mini", # Kostengünstig und schnell
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
temperature=0.1,
|
||||
max_tokens=500
|
||||
)
|
||||
|
||||
result_text = response.choices[0].message.content.strip()
|
||||
|
||||
# Parse JSON (entferne eventuelle Markdown-Blöcke)
|
||||
if result_text.startswith('```'):
|
||||
result_text = result_text.split('```')[1]
|
||||
if result_text.startswith('json'):
|
||||
result_text = result_text[4:]
|
||||
|
||||
ai_data = json.loads(result_text)
|
||||
|
||||
# Kombiniere mit BeautifulSoup-Ergebnissen (für Links wie ORCID)
|
||||
bs_data = extract_with_beautifulsoup(html, url)
|
||||
|
||||
# AI-Daten haben Priorität, aber BS-Daten für spezifische Links
|
||||
for key in ['orcid', 'google_scholar_id', 'researchgate_url', 'linkedin_url']:
|
||||
if key in bs_data and bs_data[key]:
|
||||
ai_data[key] = bs_data[key]
|
||||
|
||||
return ai_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"AI-Extraktion fehlgeschlagen: {e}")
|
||||
return extract_with_beautifulsoup(html, url)
|
||||
|
||||
|
||||
def submit_extracted_data(staff_id: str, data: Dict[str, Any]) -> bool:
|
||||
"""Sende extrahierte Daten zurück an BreakPilot."""
|
||||
try:
|
||||
payload = {"staff_id": staff_id, **data}
|
||||
|
||||
# Entferne None-Werte
|
||||
payload = {k: v for k, v in payload.items() if v is not None}
|
||||
|
||||
response = requests.post(
|
||||
f"{API_URL}/api/v1/ai/extraction/submit",
|
||||
json=payload,
|
||||
headers={
|
||||
"Authorization": f"Bearer {API_KEY}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
timeout=30
|
||||
)
|
||||
response.raise_for_status()
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Senden der Daten für {staff_id}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def process_profiles():
|
||||
"""Hauptschleife: Hole Profile, extrahiere Daten, sende zurück."""
|
||||
logger.info(f"Starte Extraktion - API: {API_URL}")
|
||||
|
||||
processed = 0
|
||||
errors = 0
|
||||
|
||||
while True:
|
||||
# Hole neue Profile
|
||||
profiles = fetch_pending_profiles(limit=BATCH_SIZE)
|
||||
|
||||
if not profiles:
|
||||
logger.info("Keine weiteren Profile zum Verarbeiten. Warte 60 Sekunden...")
|
||||
time.sleep(60)
|
||||
continue
|
||||
|
||||
logger.info(f"Verarbeite {len(profiles)} Profile...")
|
||||
|
||||
for profile in profiles:
|
||||
staff_id = profile['staff_id']
|
||||
url = profile['profile_url']
|
||||
full_name = profile.get('full_name', 'Unbekannt')
|
||||
|
||||
logger.info(f"Verarbeite: {full_name} - {url}")
|
||||
|
||||
# Lade Profilseite
|
||||
html = fetch_profile_page(url)
|
||||
if not html:
|
||||
errors += 1
|
||||
continue
|
||||
|
||||
# Extrahiere Daten
|
||||
extracted = extract_with_ai(html, url, full_name)
|
||||
|
||||
if extracted:
|
||||
# Sende zurück
|
||||
if submit_extracted_data(staff_id, extracted):
|
||||
processed += 1
|
||||
logger.info(f"Erfolgreich: {full_name} - Email: {extracted.get('email', 'N/A')}")
|
||||
else:
|
||||
errors += 1
|
||||
else:
|
||||
errors += 1
|
||||
|
||||
# Rate limiting
|
||||
time.sleep(SLEEP_BETWEEN_REQUESTS)
|
||||
|
||||
logger.info(f"Batch abgeschlossen. Gesamt: {processed} erfolgreich, {errors} Fehler")
|
||||
|
||||
|
||||
def main():
|
||||
"""Einstiegspunkt."""
|
||||
logger.info("=" * 60)
|
||||
logger.info("BreakPilot vast.ai Profile Extractor")
|
||||
logger.info("=" * 60)
|
||||
|
||||
# Prüfe Konfiguration
|
||||
if not API_KEY:
|
||||
logger.error("BREAKPILOT_API_KEY nicht gesetzt!")
|
||||
sys.exit(1)
|
||||
|
||||
if not OPENAI_API_KEY:
|
||||
logger.warning("OPENAI_API_KEY nicht gesetzt - nutze nur BeautifulSoup-Extraktion")
|
||||
|
||||
# Teste Verbindung
|
||||
try:
|
||||
response = requests.get(
|
||||
f"{API_URL}/v1/health",
|
||||
headers={"Authorization": f"Bearer {API_KEY}"},
|
||||
timeout=10
|
||||
)
|
||||
logger.info(f"API-Verbindung OK: {response.status_code}")
|
||||
except Exception as e:
|
||||
logger.error(f"Kann API nicht erreichen: {e}")
|
||||
logger.error(f"Stelle sicher dass {API_URL} erreichbar ist!")
|
||||
sys.exit(1)
|
||||
|
||||
# Starte Verarbeitung
|
||||
try:
|
||||
process_profiles()
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Beendet durch Benutzer")
|
||||
except Exception as e:
|
||||
logger.error(f"Unerwarteter Fehler: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user