feat: edu-search-service migriert, voice-service/geo-service entfernt

- edu-search-service von breakpilot-pwa nach breakpilot-lehrer kopiert (ohne vendor) - opensearch + edu-search-service in docker-compose.yml hinzugefuegt - voice-service aus docker-compose.yml entfernt (jetzt in breakpilot-core) - geo-service aus docker-compose.yml entfernt (nicht mehr benoetigt) - CI/CD: edu-search-service zu Gitea Actions und Woodpecker hinzugefuegt (Go lint, test mit go mod download, build, SBOM) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 18:36:38 +01:00
parent d4e1d6bab6
commit 414e0f5ec0
73 changed files with 23938 additions and 92 deletions
@@ -0,0 +1,282 @@
+#!/usr/bin/env python3
+"""
+Add all major German universities to the edu-search-service database.
+Based on HRK (Hochschulrektorenkonferenz) list.
+"""
+
+import requests
+import json
+import time
+import sys
+
+API_BASE = "https://macmini:8089/api/v1"
+
+# German Universities - categorized
+GERMAN_UNIVERSITIES = [
+    # === UNIVERSITIES (Universitäten) ===
+    # Already in DB (skip or update)
+    # {"name": "TUM", "url": "https://www.tum.de", "type": "university"},
+    # {"name": "LMU München", "url": "https://www.lmu.de", "type": "university"},
+    # {"name": "UOL", "url": "https://uol.de", "type": "university"},
+    # {"name": "KIT Karlsruhe", "url": "https://www.kit.edu", "type": "university"},
+
+    # TU9 Universities
+    {"name": "TU Dresden", "url": "https://tu-dresden.de", "type": "university"},
+    {"name": "TU Braunschweig", "url": "https://www.tu-braunschweig.de", "type": "university"},
+    {"name": "TU Darmstadt", "url": "https://www.tu-darmstadt.de", "type": "university"},
+    {"name": "Leibniz Universität Hannover", "url": "https://www.uni-hannover.de", "type": "university"},
+    {"name": "Universität Stuttgart", "url": "https://www.uni-stuttgart.de", "type": "university"},
+
+    # Excellence Universities
+    {"name": "Universität Bonn", "url": "https://www.uni-bonn.de", "type": "university"},
+    {"name": "Universität Konstanz", "url": "https://www.uni-konstanz.de", "type": "university"},
+    {"name": "Universität Tübingen", "url": "https://uni-tuebingen.de", "type": "university"},
+    {"name": "Universität Freiburg", "url": "https://www.uni-freiburg.de", "type": "university"},
+
+    # Large State Universities
+    {"name": "Universität Münster", "url": "https://www.uni-muenster.de", "type": "university"},
+    {"name": "Universität Frankfurt", "url": "https://www.uni-frankfurt.de", "type": "university"},
+    {"name": "Universität Mainz", "url": "https://www.uni-mainz.de", "type": "university"},
+    {"name": "Universität Würzburg", "url": "https://www.uni-wuerzburg.de", "type": "university"},
+    {"name": "Universität Erlangen-Nürnberg", "url": "https://www.fau.de", "type": "university"},
+    {"name": "Universität Leipzig", "url": "https://www.uni-leipzig.de", "type": "university"},
+    {"name": "Universität Jena", "url": "https://www.uni-jena.de", "type": "university"},
+    {"name": "Universität Halle", "url": "https://www.uni-halle.de", "type": "university"},
+    {"name": "Universität Rostock", "url": "https://www.uni-rostock.de", "type": "university"},
+    {"name": "Universität Greifswald", "url": "https://www.uni-greifswald.de", "type": "university"},
+    {"name": "Universität Kiel", "url": "https://www.uni-kiel.de", "type": "university"},
+    {"name": "Universität Bremen", "url": "https://www.uni-bremen.de", "type": "university"},
+    {"name": "Universität Bielefeld", "url": "https://www.uni-bielefeld.de", "type": "university"},
+    {"name": "Universität Duisburg-Essen", "url": "https://www.uni-due.de", "type": "university"},
+    {"name": "Universität Dortmund", "url": "https://www.tu-dortmund.de", "type": "university"},
+    {"name": "Universität Bochum", "url": "https://www.ruhr-uni-bochum.de", "type": "university"},
+    {"name": "Universität Düsseldorf", "url": "https://www.hhu.de", "type": "university"},
+    {"name": "Universität Wuppertal", "url": "https://www.uni-wuppertal.de", "type": "university"},
+    {"name": "Universität Siegen", "url": "https://www.uni-siegen.de", "type": "university"},
+    {"name": "Universität Paderborn", "url": "https://www.uni-paderborn.de", "type": "university"},
+    {"name": "Universität Kassel", "url": "https://www.uni-kassel.de", "type": "university"},
+    {"name": "Universität Marburg", "url": "https://www.uni-marburg.de", "type": "university"},
+    {"name": "Universität Gießen", "url": "https://www.uni-giessen.de", "type": "university"},
+    {"name": "Universität Saarbrücken", "url": "https://www.uni-saarland.de", "type": "university"},
+    {"name": "Universität Trier", "url": "https://www.uni-trier.de", "type": "university"},
+    {"name": "Universität Koblenz", "url": "https://www.uni-koblenz.de", "type": "university"},
+    {"name": "Universität Landau", "url": "https://rptu.de", "type": "university"},
+    {"name": "Universität Mannheim", "url": "https://www.uni-mannheim.de", "type": "university"},
+    {"name": "Universität Ulm", "url": "https://www.uni-ulm.de", "type": "university"},
+    {"name": "Universität Hohenheim", "url": "https://www.uni-hohenheim.de", "type": "university"},
+    {"name": "Universität Regensburg", "url": "https://www.uni-regensburg.de", "type": "university"},
+    {"name": "Universität Passau", "url": "https://www.uni-passau.de", "type": "university"},
+    {"name": "Universität Bayreuth", "url": "https://www.uni-bayreuth.de", "type": "university"},
+    {"name": "Universität Bamberg", "url": "https://www.uni-bamberg.de", "type": "university"},
+    {"name": "Universität Augsburg", "url": "https://www.uni-augsburg.de", "type": "university"},
+    {"name": "Universität Potsdam", "url": "https://www.uni-potsdam.de", "type": "university"},
+    {"name": "Universität Magdeburg", "url": "https://www.ovgu.de", "type": "university"},
+    {"name": "TU Chemnitz", "url": "https://www.tu-chemnitz.de", "type": "university"},
+    {"name": "TU Ilmenau", "url": "https://www.tu-ilmenau.de", "type": "university"},
+    {"name": "TU Freiberg", "url": "https://tu-freiberg.de", "type": "university"},
+    {"name": "TU Clausthal", "url": "https://www.tu-clausthal.de", "type": "university"},
+    {"name": "TU Kaiserslautern", "url": "https://rptu.de", "type": "university"},
+    {"name": "BTU Cottbus-Senftenberg", "url": "https://www.b-tu.de", "type": "university"},
+    {"name": "Universität der Bundeswehr München", "url": "https://www.unibw.de", "type": "university"},
+    {"name": "Universität der Bundeswehr Hamburg", "url": "https://www.hsu-hh.de", "type": "university"},
+
+    # === FACHHOCHSCHULEN / HAW ===
+    {"name": "HAW Hamburg", "url": "https://www.haw-hamburg.de", "type": "haw"},
+    {"name": "HTW Berlin", "url": "https://www.htw-berlin.de", "type": "haw"},
+    {"name": "Beuth Hochschule Berlin", "url": "https://www.bht-berlin.de", "type": "haw"},
+    {"name": "FH Aachen", "url": "https://www.fh-aachen.de", "type": "haw"},
+    {"name": "TH Köln", "url": "https://www.th-koeln.de", "type": "haw"},
+    {"name": "Hochschule Düsseldorf", "url": "https://www.hs-duesseldorf.de", "type": "haw"},
+    {"name": "FH Dortmund", "url": "https://www.fh-dortmund.de", "type": "haw"},
+    {"name": "Hochschule Bochum", "url": "https://www.hochschule-bochum.de", "type": "haw"},
+    {"name": "Westfälische Hochschule", "url": "https://www.w-hs.de", "type": "haw"},
+    {"name": "FH Bielefeld", "url": "https://www.fh-bielefeld.de", "type": "haw"},
+    {"name": "FH Münster", "url": "https://www.fh-muenster.de", "type": "haw"},
+    {"name": "Hochschule Osnabrück", "url": "https://www.hs-osnabrueck.de", "type": "haw"},
+    {"name": "Hochschule Bremen", "url": "https://www.hs-bremen.de", "type": "haw"},
+    {"name": "Hochschule Hannover", "url": "https://www.hs-hannover.de", "type": "haw"},
+    {"name": "Ostfalia Hochschule", "url": "https://www.ostfalia.de", "type": "haw"},
+    {"name": "Hochschule Emden/Leer", "url": "https://www.hs-emden-leer.de", "type": "haw"},
+    {"name": "HAWK Hildesheim", "url": "https://www.hawk.de", "type": "haw"},
+    {"name": "Hochschule Fulda", "url": "https://www.hs-fulda.de", "type": "haw"},
+    {"name": "Frankfurt UAS", "url": "https://www.frankfurt-university.de", "type": "haw"},
+    {"name": "Hochschule Darmstadt", "url": "https://www.h-da.de", "type": "haw"},
+    {"name": "Hochschule RheinMain", "url": "https://www.hs-rm.de", "type": "haw"},
+    {"name": "Hochschule Mainz", "url": "https://www.hs-mainz.de", "type": "haw"},
+    {"name": "Hochschule Trier", "url": "https://www.hochschule-trier.de", "type": "haw"},
+    {"name": "Hochschule Koblenz", "url": "https://www.hs-koblenz.de", "type": "haw"},
+    {"name": "Hochschule Karlsruhe", "url": "https://www.h-ka.de", "type": "haw"},
+    {"name": "Hochschule Mannheim", "url": "https://www.hs-mannheim.de", "type": "haw"},
+    {"name": "Hochschule Heilbronn", "url": "https://www.hs-heilbronn.de", "type": "haw"},
+    {"name": "Hochschule Esslingen", "url": "https://www.hs-esslingen.de", "type": "haw"},
+    {"name": "Hochschule Reutlingen", "url": "https://www.reutlingen-university.de", "type": "haw"},
+    {"name": "Hochschule Konstanz", "url": "https://www.htwg-konstanz.de", "type": "haw"},
+    {"name": "Hochschule Offenburg", "url": "https://www.hs-offenburg.de", "type": "haw"},
+    {"name": "Hochschule Pforzheim", "url": "https://www.hs-pforzheim.de", "type": "haw"},
+    {"name": "Hochschule Albstadt-Sigmaringen", "url": "https://www.hs-albsig.de", "type": "haw"},
+    {"name": "Hochschule München", "url": "https://www.hm.edu", "type": "haw"},
+    {"name": "TH Nürnberg", "url": "https://www.th-nuernberg.de", "type": "haw"},
+    {"name": "TH Ingolstadt", "url": "https://www.thi.de", "type": "haw"},
+    {"name": "Hochschule Augsburg", "url": "https://www.hs-augsburg.de", "type": "haw"},
+    {"name": "Hochschule Rosenheim", "url": "https://www.th-rosenheim.de", "type": "haw"},
+    {"name": "Hochschule Regensburg", "url": "https://www.oth-regensburg.de", "type": "haw"},
+    {"name": "Hochschule Landshut", "url": "https://www.haw-landshut.de", "type": "haw"},
+    {"name": "Hochschule Coburg", "url": "https://www.hs-coburg.de", "type": "haw"},
+    {"name": "Hochschule Hof", "url": "https://www.hof-university.de", "type": "haw"},
+    {"name": "Hochschule Würzburg-Schweinfurt", "url": "https://www.thws.de", "type": "haw"},
+    {"name": "Hochschule Aschaffenburg", "url": "https://www.th-ab.de", "type": "haw"},
+    {"name": "Hochschule Ansbach", "url": "https://www.hs-ansbach.de", "type": "haw"},
+    {"name": "OTH Amberg-Weiden", "url": "https://www.oth-aw.de", "type": "haw"},
+    {"name": "Hochschule Deggendorf", "url": "https://www.th-deg.de", "type": "haw"},
+    {"name": "Hochschule Kempten", "url": "https://www.hs-kempten.de", "type": "haw"},
+    {"name": "Hochschule Neu-Ulm", "url": "https://www.hnu.de", "type": "haw"},
+    {"name": "HTW Dresden", "url": "https://www.htw-dresden.de", "type": "haw"},
+    {"name": "HTWK Leipzig", "url": "https://www.htwk-leipzig.de", "type": "haw"},
+    {"name": "Hochschule Mittweida", "url": "https://www.hs-mittweida.de", "type": "haw"},
+    {"name": "Hochschule Zittau/Görlitz", "url": "https://www.hszg.de", "type": "haw"},
+    {"name": "Westsächsische Hochschule Zwickau", "url": "https://www.fh-zwickau.de", "type": "haw"},
+    {"name": "Hochschule Merseburg", "url": "https://www.hs-merseburg.de", "type": "haw"},
+    {"name": "Hochschule Anhalt", "url": "https://www.hs-anhalt.de", "type": "haw"},
+    {"name": "Hochschule Magdeburg-Stendal", "url": "https://www.h2.de", "type": "haw"},
+    {"name": "Hochschule Harz", "url": "https://www.hs-harz.de", "type": "haw"},
+    {"name": "Ernst-Abbe-Hochschule Jena", "url": "https://www.eah-jena.de", "type": "haw"},
+    {"name": "FH Erfurt", "url": "https://www.fh-erfurt.de", "type": "haw"},
+    {"name": "Hochschule Nordhausen", "url": "https://www.hs-nordhausen.de", "type": "haw"},
+    {"name": "Hochschule Schmalkalden", "url": "https://www.hs-schmalkalden.de", "type": "haw"},
+    {"name": "TH Brandenburg", "url": "https://www.th-brandenburg.de", "type": "haw"},
+    {"name": "FH Potsdam", "url": "https://www.fh-potsdam.de", "type": "haw"},
+    {"name": "TH Wildau", "url": "https://www.th-wildau.de", "type": "haw"},
+    {"name": "Hochschule Neubrandenburg", "url": "https://www.hs-nb.de", "type": "haw"},
+    {"name": "Hochschule Stralsund", "url": "https://www.hochschule-stralsund.de", "type": "haw"},
+    {"name": "Hochschule Wismar", "url": "https://www.hs-wismar.de", "type": "haw"},
+    {"name": "FH Kiel", "url": "https://www.fh-kiel.de", "type": "haw"},
+    {"name": "FH Westküste", "url": "https://www.fh-westkueste.de", "type": "haw"},
+    {"name": "TH Lübeck", "url": "https://www.th-luebeck.de", "type": "haw"},
+    {"name": "FH Flensburg", "url": "https://hs-flensburg.de", "type": "haw"},
+    {"name": "Hochschule Bremerhaven", "url": "https://www.hs-bremerhaven.de", "type": "haw"},
+
+    # === PRIVATE HOCHSCHULEN ===
+    {"name": "WHU Vallendar", "url": "https://www.whu.edu", "type": "private"},
+    {"name": "HHL Leipzig", "url": "https://www.hhl.de", "type": "private"},
+    {"name": "EBS Universität", "url": "https://www.ebs.edu", "type": "private"},
+    {"name": "Frankfurt School", "url": "https://www.frankfurt-school.de", "type": "private"},
+    {"name": "ESMT Berlin", "url": "https://esmt.berlin", "type": "private"},
+    {"name": "Jacobs University Bremen", "url": "https://www.jacobs-university.de", "type": "private"},
+    {"name": "Zeppelin Universität", "url": "https://www.zu.de", "type": "private"},
+    {"name": "Bucerius Law School", "url": "https://www.law-school.de", "type": "private"},
+    {"name": "Universität Witten/Herdecke", "url": "https://www.uni-wh.de", "type": "private"},
+    {"name": "IUBH", "url": "https://www.iu.de", "type": "private"},
+    {"name": "SRH Hochschule Heidelberg", "url": "https://www.srh-hochschule-heidelberg.de", "type": "private"},
+    {"name": "FOM Hochschule", "url": "https://www.fom.de", "type": "private"},
+
+    # === FRAUNHOFER INSTITUTE ===
+    {"name": "Fraunhofer IIS", "url": "https://www.iis.fraunhofer.de", "type": "research"},
+    {"name": "Fraunhofer IAIS", "url": "https://www.iais.fraunhofer.de", "type": "research"},
+    {"name": "Fraunhofer IML", "url": "https://www.iml.fraunhofer.de", "type": "research"},
+    {"name": "Fraunhofer ISI", "url": "https://www.isi.fraunhofer.de", "type": "research"},
+    {"name": "Fraunhofer IPA", "url": "https://www.ipa.fraunhofer.de", "type": "research"},
+    {"name": "Fraunhofer IAO", "url": "https://www.iao.fraunhofer.de", "type": "research"},
+    {"name": "Fraunhofer IWS", "url": "https://www.iws.fraunhofer.de", "type": "research"},
+    {"name": "Fraunhofer IPT", "url": "https://www.ipt.fraunhofer.de", "type": "research"},
+    {"name": "Fraunhofer FOKUS", "url": "https://www.fokus.fraunhofer.de", "type": "research"},
+    {"name": "Fraunhofer HHI", "url": "https://www.hhi.fraunhofer.de", "type": "research"},
+    {"name": "Fraunhofer IESE", "url": "https://www.iese.fraunhofer.de", "type": "research"},
+    {"name": "Fraunhofer IOSB", "url": "https://www.iosb.fraunhofer.de", "type": "research"},
+    {"name": "Fraunhofer IDMT", "url": "https://www.idmt.fraunhofer.de", "type": "research"},
+    {"name": "Fraunhofer IKTS", "url": "https://www.ikts.fraunhofer.de", "type": "research"},
+    {"name": "Fraunhofer IGD", "url": "https://www.igd.fraunhofer.de", "type": "research"},
+
+    # === MAX-PLANCK-INSTITUTE ===
+    {"name": "MPI für Informatik", "url": "https://www.mpi-inf.mpg.de", "type": "research"},
+    {"name": "MPI für Software Systeme", "url": "https://www.mpi-sws.org", "type": "research"},
+    {"name": "MPI für intelligente Systeme", "url": "https://is.mpg.de", "type": "research"},
+    {"name": "MPI für Mathematik", "url": "https://www.mpim-bonn.mpg.de", "type": "research"},
+    {"name": "MPI für Physik", "url": "https://www.mpp.mpg.de", "type": "research"},
+    {"name": "MPI für Quantenoptik", "url": "https://www.mpq.mpg.de", "type": "research"},
+    {"name": "MPI für Biophysik", "url": "https://www.biophys.mpg.de", "type": "research"},
+    {"name": "MPI für Biochemie", "url": "https://www.biochem.mpg.de", "type": "research"},
+    {"name": "MPI für Neurobiologie", "url": "https://www.neuro.mpg.de", "type": "research"},
+    {"name": "MPI für Hirnforschung", "url": "https://brain.mpg.de", "type": "research"},
+
+    # === HELMHOLTZ-ZENTREN ===
+    {"name": "DESY Hamburg", "url": "https://www.desy.de", "type": "research"},
+    {"name": "FZ Jülich", "url": "https://www.fz-juelich.de", "type": "research"},
+    {"name": "GSI Darmstadt", "url": "https://www.gsi.de", "type": "research"},
+    {"name": "DKFZ Heidelberg", "url": "https://www.dkfz.de", "type": "research"},
+    {"name": "DLR", "url": "https://www.dlr.de", "type": "research"},
+    {"name": "AWI Bremerhaven", "url": "https://www.awi.de", "type": "research"},
+    {"name": "GFZ Potsdam", "url": "https://www.gfz-potsdam.de", "type": "research"},
+    {"name": "UFZ Leipzig", "url": "https://www.ufz.de", "type": "research"},
+    {"name": "GEOMAR Kiel", "url": "https://www.geomar.de", "type": "research"},
+]
+
+def get_existing_universities():
+    """Get list of existing universities from the API."""
+    try:
+        response = requests.get(f"{API_BASE}/universities", verify=False, timeout=10)
+        if response.status_code == 200:
+            data = response.json()
+            return {u['url'].rstrip('/').lower(): u for u in data.get('universities', [])}
+    except Exception as e:
+        print(f"Error fetching existing universities: {e}")
+    return {}
+
+def add_university(uni):
+    """Add a university to the database."""
+    payload = {
+        "name": uni["name"],
+        "url": uni["url"],
+        "type": uni.get("type", "university"),
+        "country": "DE"
+    }
+
+    try:
+        response = requests.post(
+            f"{API_BASE}/universities",
+            json=payload,
+            verify=False,
+            timeout=10
+        )
+        return response.status_code == 201 or response.status_code == 200
+    except Exception as e:
+        print(f"Error adding {uni['name']}: {e}")
+        return False
+
+def main():
+    print("Fetching existing universities...")
+    existing = get_existing_universities()
+    print(f"Found {len(existing)} existing universities")
+
+    added = 0
+    skipped = 0
+    failed = 0
+
+    for uni in GERMAN_UNIVERSITIES:
+        url_key = uni["url"].rstrip('/').lower()
+
+        if url_key in existing:
+            print(f"SKIP: {uni['name']} (already exists)")
+            skipped += 1
+            continue
+
+        print(f"ADD:  {uni['name']} ({uni['url']})")
+        if add_university(uni):
+            added += 1
+        else:
+            failed += 1
+
+        # Rate limiting
+        time.sleep(0.2)
+
+    print(f"\n=== SUMMARY ===")
+    print(f"Added:   {added}")
+    print(f"Skipped: {skipped}")
+    print(f"Failed:  {failed}")
+    print(f"Total:   {len(GERMAN_UNIVERSITIES)}")
+
+if __name__ == "__main__":
+    # Disable SSL warnings for self-signed cert
+    import urllib3
+    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+    main()
@@ -0,0 +1,125 @@
+#!/usr/bin/env python3
+"""
+Fix university types in the database.
+This script updates uni_type based on university names.
+"""
+
+import requests
+import json
+import sys
+
+API_BASE = "https://macmini:8089/api/v1"
+
+# Classification rules based on name patterns
+UNI_TYPE_RULES = {
+    "UNI": [
+        "Universität", "University", "TU ", "TUM", "LMU", "RWTH",
+        "Humboldt", "FU Berlin", "HU Berlin", "TH ", "KIT"
+    ],
+    "FH": [
+        "Hochschule", "Fachhochschule", "FH ", "HAW ", "HS ",
+        "University of Applied", "Beuth", "HTW"
+    ],
+    "RESEARCH": [
+        "Fraunhofer", "Max-Planck", "Helmholtz", "DLR", "DESY",
+        "DKFZ", "FZ Jülich", "AWI", "GFZ", "GSI", "Leibniz"
+    ],
+    "PRIVATE": [
+        "EBS", "ESMT", "Bucerius", "WHU", "HHL", "FOM", "IUBH",
+        "SRH", "International School", "Business School"
+    ],
+    "KUNST": [
+        "Kunsthochschule", "Musikhochschule", "Filmhochschule",
+        "Kunstakademie", "HfK", "HfM", "HfG", "UdK", "Bauhaus"
+    ],
+    "PH": [
+        "Pädagogische Hochschule", "PH "
+    ]
+}
+
+def classify_university(name):
+    """Classify university by name patterns."""
+    name_lower = name.lower()
+
+    # Check each category
+    for uni_type, patterns in UNI_TYPE_RULES.items():
+        for pattern in patterns:
+            if pattern.lower() in name_lower:
+                return uni_type
+
+    # Default to UNI if "universität" in name, else FH
+    if "universität" in name_lower or "university" in name_lower:
+        return "UNI"
+
+    return "FH"  # Default
+
+def get_all_universities():
+    """Get list of all universities from the API."""
+    try:
+        response = requests.get(f"{API_BASE}/universities", verify=False, timeout=30)
+        if response.status_code == 200:
+            data = response.json()
+            return data.get('universities', [])
+    except Exception as e:
+        print(f"Error fetching universities: {e}")
+    return []
+
+def update_university_type(uni_id, uni_type, uni_state=None):
+    """Update university type via direct database or API."""
+    # The API doesn't have an update endpoint, so we'll print SQL statements
+    return uni_type
+
+def main():
+    print("=== University Type Fixer ===\n")
+
+    # Disable SSL warnings
+    import urllib3
+    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+    universities = get_all_universities()
+    if not universities:
+        print("ERROR: No universities found!")
+        return
+
+    print(f"Found {len(universities)} universities\n")
+
+    # Classify and generate SQL
+    sql_statements = []
+    type_counts = {}
+
+    for uni in universities:
+        uni_id = uni['id']
+        uni_name = uni['name']
+        current_type = uni.get('uni_type', 'unknown')
+
+        # Classify
+        new_type = classify_university(uni_name)
+
+        # Count
+        type_counts[new_type] = type_counts.get(new_type, 0) + 1
+
+        # Generate SQL
+        sql = f"UPDATE universities SET uni_type = '{new_type}' WHERE id = '{uni_id}';"
+        sql_statements.append(sql)
+
+        if current_type != new_type:
+            print(f"  {uni_name[:50]:<50} -> {new_type}")
+
+    print(f"\n=== Summary ===")
+    for t, c in sorted(type_counts.items()):
+        print(f"  {t}: {c}")
+
+    # Write SQL file
+    sql_file = "/tmp/fix_uni_types.sql"
+    with open(sql_file, 'w') as f:
+        f.write("-- Fix university types\n")
+        f.write("BEGIN;\n\n")
+        for sql in sql_statements:
+            f.write(sql + "\n")
+        f.write("\nCOMMIT;\n")
+
+    print(f"\nSQL written to: {sql_file}")
+    print(f"Run: cat {sql_file} | docker exec -i breakpilot-pwa-postgres psql -U <user> -d edu_search")
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,147 @@
+#!/usr/bin/env python3
+"""
+Seed German Universities directly into the edu-search-service universities table.
+
+This script imports the same university data as load_university_seeds.py
+but writes directly to the PostgreSQL universities table used by the crawler.
+"""
+
+import psycopg2
+import os
+import sys
+
+# Add the backend scripts path to import university data
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../backend/scripts'))
+from load_university_seeds import (
+    UNIVERSITAETEN, FACHHOCHSCHULEN, PAEDAGOGISCHE_HOCHSCHULEN,
+    KUNSTHOCHSCHULEN, PRIVATE_HOCHSCHULEN
+)
+
+# Database connection from environment or defaults
+DATABASE_URL = os.environ.get(
+    'DATABASE_URL',
+    'postgresql://breakpilot:breakpilot@localhost:5432/breakpilot_db'
+)
+
+
+def get_uni_type(original_type: str) -> str:
+    """Map the type from seed data to database uni_type."""
+    type_map = {
+        'UNI': 'UNI',
+        'FH': 'HAW',  # Fachhochschule -> HAW (Hochschule für Angewandte Wissenschaften)
+        'PH': 'PH',   # Pädagogische Hochschule
+        'KUNST': 'KUNST',
+        'PRIVATE': 'PRIVATE',
+    }
+    return type_map.get(original_type, 'UNI')
+
+
+def seed_universities():
+    """Load all universities into the database."""
+    # Collect all universities with their types
+    all_unis = []
+
+    for uni in UNIVERSITAETEN:
+        all_unis.append({**uni, 'uni_type': 'UNI'})
+
+    for uni in FACHHOCHSCHULEN:
+        all_unis.append({**uni, 'uni_type': 'HAW'})
+
+    for uni in PAEDAGOGISCHE_HOCHSCHULEN:
+        all_unis.append({**uni, 'uni_type': 'PH'})
+
+    for uni in KUNSTHOCHSCHULEN:
+        all_unis.append({**uni, 'uni_type': 'KUNST'})
+
+    for uni in PRIVATE_HOCHSCHULEN:
+        all_unis.append({**uni, 'uni_type': 'PRIVATE'})
+
+    print(f"Total universities to seed: {len(all_unis)}")
+    print(f"  - Universitäten: {len(UNIVERSITAETEN)}")
+    print(f"  - Fachhochschulen: {len(FACHHOCHSCHULEN)}")
+    print(f"  - Pädagogische Hochschulen: {len(PAEDAGOGISCHE_HOCHSCHULEN)}")
+    print(f"  - Kunst-/Musikhochschulen: {len(KUNSTHOCHSCHULEN)}")
+    print(f"  - Private Hochschulen: {len(PRIVATE_HOCHSCHULEN)}")
+
+    try:
+        conn = psycopg2.connect(DATABASE_URL)
+        cur = conn.cursor()
+
+        inserted = 0
+        skipped = 0
+        errors = []
+
+        for uni in all_unis:
+            try:
+                # Generate a short name from the full name
+                name = uni['name']
+                short_name = None
+
+                # Try to extract common abbreviations
+                if 'KIT' in name:
+                    short_name = 'KIT'
+                elif 'TUM' in name or name == 'Technische Universität München':
+                    short_name = 'TUM'
+                elif 'LMU' in name or 'Ludwig-Maximilians' in name:
+                    short_name = 'LMU'
+                elif 'RWTH' in name:
+                    short_name = 'RWTH'
+                elif 'FAU' in name or 'Friedrich-Alexander' in name:
+                    short_name = 'FAU'
+                elif name.startswith('Universität '):
+                    short_name = 'Uni ' + name.replace('Universität ', '')[:15]
+                elif name.startswith('Technische Universität '):
+                    short_name = 'TU ' + name.replace('Technische Universität ', '')[:12]
+                elif name.startswith('Hochschule '):
+                    short_name = 'HS ' + name.replace('Hochschule ', '')[:15]
+
+                cur.execute("""
+                    INSERT INTO universities (name, short_name, url, state, uni_type)
+                    VALUES (%s, %s, %s, %s, %s)
+                    ON CONFLICT (url) DO NOTHING
+                    RETURNING id
+                """, (
+                    uni['name'],
+                    short_name,
+                    uni['url'],
+                    uni.get('state'),
+                    uni['uni_type']
+                ))
+
+                result = cur.fetchone()
+                if result:
+                    inserted += 1
+                else:
+                    skipped += 1
+
+            except Exception as e:
+                errors.append(f"{uni['name']}: {str(e)}")
+
+        conn.commit()
+        cur.close()
+        conn.close()
+
+        print(f"\nResults:")
+        print(f"  Inserted: {inserted}")
+        print(f"  Skipped (duplicates): {skipped}")
+
+        if errors:
+            print(f"  Errors: {len(errors)}")
+            for err in errors[:5]:
+                print(f"    - {err}")
+
+        print(f"\nDone! Total universities in database: {inserted + skipped}")
+        return True
+
+    except psycopg2.Error as e:
+        print(f"Database error: {e}")
+        return False
+
+
+if __name__ == "__main__":
+    print("=" * 60)
+    print("Seeding Universities into edu-search-service database")
+    print("=" * 60)
+
+    success = seed_universities()
+    sys.exit(0 if success else 1)
@@ -0,0 +1,320 @@
+#!/usr/bin/env python3
+"""
+vast.ai Profile Extractor Script
+Dieses Skript läuft auf vast.ai und extrahiert Profildaten von Universitäts-Webseiten.
+
+Verwendung auf vast.ai:
+1. Lade dieses Skript auf deine vast.ai Instanz
+2. Installiere Abhängigkeiten: pip install requests beautifulsoup4 openai
+3. Setze Umgebungsvariablen:
+   - BREAKPILOT_API_URL=http://deine-ip:8086
+   - BREAKPILOT_API_KEY=dev-key
+   - OPENAI_API_KEY=sk-...
+4. Starte: python vast_ai_extractor.py
+"""
+
+import os
+import sys
+import json
+import time
+import logging
+import requests
+from bs4 import BeautifulSoup
+from typing import Optional, Dict, Any, List
+
+# Logging Setup
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+# Configuration
+API_URL = os.environ.get('BREAKPILOT_API_URL', 'http://localhost:8086')
+API_KEY = os.environ.get('BREAKPILOT_API_KEY', 'dev-key')
+OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY', '')
+BATCH_SIZE = 10
+SLEEP_BETWEEN_REQUESTS = 1  # Sekunden zwischen Requests (respektiere rate limits)
+
+
+def fetch_pending_profiles(limit: int = 50) -> List[Dict]:
+    """Hole Profile die noch extrahiert werden müssen."""
+    try:
+        response = requests.get(
+            f"{API_URL}/api/v1/ai/extraction/pending",
+            params={"limit": limit},
+            headers={"Authorization": f"Bearer {API_KEY}"},
+            timeout=30
+        )
+        response.raise_for_status()
+        data = response.json()
+        return data.get("tasks", [])
+    except Exception as e:
+        logger.error(f"Fehler beim Abrufen der Profile: {e}")
+        return []
+
+
+def fetch_profile_page(url: str) -> Optional[str]:
+    """Lade den HTML-Inhalt einer Profilseite."""
+    try:
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (compatible; BreakPilot-Crawler/1.0; +https://breakpilot.de)',
+            'Accept': 'text/html,application/xhtml+xml',
+            'Accept-Language': 'de-DE,de;q=0.9,en;q=0.8',
+        }
+        response = requests.get(url, headers=headers, timeout=30)
+        response.raise_for_status()
+        return response.text
+    except Exception as e:
+        logger.error(f"Fehler beim Laden von {url}: {e}")
+        return None
+
+
+def extract_with_beautifulsoup(html: str, url: str) -> Dict[str, Any]:
+    """Extrahiere Basis-Informationen mit BeautifulSoup (ohne AI)."""
+    soup = BeautifulSoup(html, 'html.parser')
+    data = {}
+
+    # Email suchen
+    email_links = soup.find_all('a', href=lambda x: x and x.startswith('mailto:'))
+    if email_links:
+        email = email_links[0]['href'].replace('mailto:', '').split('?')[0]
+        data['email'] = email
+
+    # Telefon suchen
+    phone_links = soup.find_all('a', href=lambda x: x and x.startswith('tel:'))
+    if phone_links:
+        data['phone'] = phone_links[0]['href'].replace('tel:', '')
+
+    # ORCID suchen
+    orcid_links = soup.find_all('a', href=lambda x: x and 'orcid.org' in x)
+    if orcid_links:
+        orcid = orcid_links[0]['href']
+        # Extrahiere ORCID ID
+        if '/' in orcid:
+            data['orcid'] = orcid.split('/')[-1]
+
+    # Google Scholar suchen
+    scholar_links = soup.find_all('a', href=lambda x: x and 'scholar.google' in x)
+    if scholar_links:
+        href = scholar_links[0]['href']
+        if 'user=' in href:
+            data['google_scholar_id'] = href.split('user=')[1].split('&')[0]
+
+    # ResearchGate suchen
+    rg_links = soup.find_all('a', href=lambda x: x and 'researchgate.net' in x)
+    if rg_links:
+        data['researchgate_url'] = rg_links[0]['href']
+
+    # LinkedIn suchen
+    linkedin_links = soup.find_all('a', href=lambda x: x and 'linkedin.com' in x)
+    if linkedin_links:
+        data['linkedin_url'] = linkedin_links[0]['href']
+
+    # Institut/Abteilung Links sammeln (für Hierarchie-Erkennung)
+    base_domain = '/'.join(url.split('/')[:3])
+    department_links = []
+    for link in soup.find_all('a', href=True):
+        href = link['href']
+        text = link.get_text(strip=True)
+        # Suche nach Links die auf Institute/Fakultäten hindeuten
+        if any(kw in text.lower() for kw in ['institut', 'fakultät', 'fachbereich', 'abteilung', 'lehrstuhl']):
+            if href.startswith('/'):
+                href = base_domain + href
+            if href.startswith('http'):
+                department_links.append({'url': href, 'name': text})
+
+    if department_links:
+        # Nimm den ersten gefundenen Department-Link
+        data['department_url'] = department_links[0]['url']
+        data['department_name'] = department_links[0]['name']
+
+    return data
+
+
+def extract_with_ai(html: str, url: str, full_name: str) -> Dict[str, Any]:
+    """Extrahiere strukturierte Daten mit OpenAI GPT."""
+    if not OPENAI_API_KEY:
+        logger.warning("Kein OPENAI_API_KEY gesetzt - nutze nur BeautifulSoup")
+        return extract_with_beautifulsoup(html, url)
+
+    try:
+        import openai
+        client = openai.OpenAI(api_key=OPENAI_API_KEY)
+
+        # Reduziere HTML auf relevanten Text
+        soup = BeautifulSoup(html, 'html.parser')
+
+        # Entferne Scripts, Styles, etc.
+        for tag in soup(['script', 'style', 'nav', 'footer', 'header']):
+            tag.decompose()
+
+        # Extrahiere Text
+        text = soup.get_text(separator='\n', strip=True)
+        # Limitiere auf 8000 Zeichen für API
+        text = text[:8000]
+
+        prompt = f"""Analysiere diese Universitäts-Profilseite für {full_name} und extrahiere folgende Informationen im JSON-Format:
+
+{{
+  "email": "email@uni.de oder null",
+  "phone": "Telefonnummer oder null",
+  "office": "Raum/Büro oder null",
+  "position": "Position/Titel (z.B. Wissenschaftlicher Mitarbeiter, Professorin) oder null",
+  "department_name": "Name des Instituts/der Abteilung oder null",
+  "research_interests": ["Liste", "der", "Forschungsthemen"] oder [],
+  "teaching_topics": ["Liste", "der", "Lehrveranstaltungen/Fächer"] oder [],
+  "supervisor_name": "Name des Vorgesetzten/Lehrstuhlinhabers falls erkennbar oder null"
+}}
+
+Profilseite von {url}:
+
+{text}
+
+Antworte NUR mit dem JSON-Objekt, keine Erklärungen."""
+
+        response = client.chat.completions.create(
+            model="gpt-4o-mini",  # Kostengünstig und schnell
+            messages=[{"role": "user", "content": prompt}],
+            temperature=0.1,
+            max_tokens=500
+        )
+
+        result_text = response.choices[0].message.content.strip()
+
+        # Parse JSON (entferne eventuelle Markdown-Blöcke)
+        if result_text.startswith('```'):
+            result_text = result_text.split('```')[1]
+            if result_text.startswith('json'):
+                result_text = result_text[4:]
+
+        ai_data = json.loads(result_text)
+
+        # Kombiniere mit BeautifulSoup-Ergebnissen (für Links wie ORCID)
+        bs_data = extract_with_beautifulsoup(html, url)
+
+        # AI-Daten haben Priorität, aber BS-Daten für spezifische Links
+        for key in ['orcid', 'google_scholar_id', 'researchgate_url', 'linkedin_url']:
+            if key in bs_data and bs_data[key]:
+                ai_data[key] = bs_data[key]
+
+        return ai_data
+
+    except Exception as e:
+        logger.error(f"AI-Extraktion fehlgeschlagen: {e}")
+        return extract_with_beautifulsoup(html, url)
+
+
+def submit_extracted_data(staff_id: str, data: Dict[str, Any]) -> bool:
+    """Sende extrahierte Daten zurück an BreakPilot."""
+    try:
+        payload = {"staff_id": staff_id, **data}
+
+        # Entferne None-Werte
+        payload = {k: v for k, v in payload.items() if v is not None}
+
+        response = requests.post(
+            f"{API_URL}/api/v1/ai/extraction/submit",
+            json=payload,
+            headers={
+                "Authorization": f"Bearer {API_KEY}",
+                "Content-Type": "application/json"
+            },
+            timeout=30
+        )
+        response.raise_for_status()
+        return True
+    except Exception as e:
+        logger.error(f"Fehler beim Senden der Daten für {staff_id}: {e}")
+        return False
+
+
+def process_profiles():
+    """Hauptschleife: Hole Profile, extrahiere Daten, sende zurück."""
+    logger.info(f"Starte Extraktion - API: {API_URL}")
+
+    processed = 0
+    errors = 0
+
+    while True:
+        # Hole neue Profile
+        profiles = fetch_pending_profiles(limit=BATCH_SIZE)
+
+        if not profiles:
+            logger.info("Keine weiteren Profile zum Verarbeiten. Warte 60 Sekunden...")
+            time.sleep(60)
+            continue
+
+        logger.info(f"Verarbeite {len(profiles)} Profile...")
+
+        for profile in profiles:
+            staff_id = profile['staff_id']
+            url = profile['profile_url']
+            full_name = profile.get('full_name', 'Unbekannt')
+
+            logger.info(f"Verarbeite: {full_name} - {url}")
+
+            # Lade Profilseite
+            html = fetch_profile_page(url)
+            if not html:
+                errors += 1
+                continue
+
+            # Extrahiere Daten
+            extracted = extract_with_ai(html, url, full_name)
+
+            if extracted:
+                # Sende zurück
+                if submit_extracted_data(staff_id, extracted):
+                    processed += 1
+                    logger.info(f"Erfolgreich: {full_name} - Email: {extracted.get('email', 'N/A')}")
+                else:
+                    errors += 1
+            else:
+                errors += 1
+
+            # Rate limiting
+            time.sleep(SLEEP_BETWEEN_REQUESTS)
+
+        logger.info(f"Batch abgeschlossen. Gesamt: {processed} erfolgreich, {errors} Fehler")
+
+
+def main():
+    """Einstiegspunkt."""
+    logger.info("=" * 60)
+    logger.info("BreakPilot vast.ai Profile Extractor")
+    logger.info("=" * 60)
+
+    # Prüfe Konfiguration
+    if not API_KEY:
+        logger.error("BREAKPILOT_API_KEY nicht gesetzt!")
+        sys.exit(1)
+
+    if not OPENAI_API_KEY:
+        logger.warning("OPENAI_API_KEY nicht gesetzt - nutze nur BeautifulSoup-Extraktion")
+
+    # Teste Verbindung
+    try:
+        response = requests.get(
+            f"{API_URL}/v1/health",
+            headers={"Authorization": f"Bearer {API_KEY}"},
+            timeout=10
+        )
+        logger.info(f"API-Verbindung OK: {response.status_code}")
+    except Exception as e:
+        logger.error(f"Kann API nicht erreichen: {e}")
+        logger.error(f"Stelle sicher dass {API_URL} erreichbar ist!")
+        sys.exit(1)
+
+    # Starte Verarbeitung
+    try:
+        process_profiles()
+    except KeyboardInterrupt:
+        logger.info("Beendet durch Benutzer")
+    except Exception as e:
+        logger.error(f"Unerwarteter Fehler: {e}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()