fix: Restore all files lost during destructive rebase

A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.

This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).

Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-02-09 09:51:32 +01:00
parent f7487ee240
commit bfdaf63ba9
2009 changed files with 749983 additions and 1731 deletions

View File

@@ -0,0 +1,21 @@
"""
LLM Gateway Routes.
"""
from .chat import router as chat_router
from .playbooks import router as playbooks_router
from .health import router as health_router
from .tools import router as tools_router
from .comparison import router as comparison_router
from .edu_search_seeds import router as edu_search_seeds_router
from .communication import router as communication_router
__all__ = [
"chat_router",
"playbooks_router",
"health_router",
"tools_router",
"comparison_router",
"edu_search_seeds_router",
"communication_router",
]

View File

@@ -0,0 +1,112 @@
"""
Chat Completions Route - OpenAI-kompatible API.
"""
import logging
import json
from typing import AsyncIterator
from fastapi import APIRouter, HTTPException, Depends
from fastapi.responses import StreamingResponse
from ..models.chat import (
ChatCompletionRequest,
ChatCompletionResponse,
ChatMessage,
ModelListResponse,
)
from ..services.inference import get_inference_service, InferenceService
from ..services.playbook_service import get_playbook_service, PlaybookService
from ..middleware.auth import verify_api_key
logger = logging.getLogger(__name__)
router = APIRouter(tags=["LLM"])
def get_services():
"""Dependency für Services."""
return get_inference_service(), get_playbook_service()
@router.post("/chat/completions", response_model=ChatCompletionResponse)
async def chat_completions(
request: ChatCompletionRequest,
_: str = Depends(verify_api_key),
):
"""
OpenAI-kompatible Chat Completions.
Unterstützt:
- Streaming (stream=true)
- Playbook-basierte System Prompts (metadata.playbook_id)
- Multiple Models (breakpilot-teacher-8b, claude-3-5-sonnet, etc.)
"""
inference_service, playbook_service = get_services()
# Playbook System Prompt injizieren
if request.metadata and request.metadata.playbook_id:
playbook = playbook_service.get_playbook(request.metadata.playbook_id)
if playbook:
# System Prompt an den Anfang der Messages einfügen
system_msg = ChatMessage(role="system", content=playbook.system_prompt)
# Prüfen ob bereits ein System Prompt existiert
has_system = any(m.role == "system" for m in request.messages)
if not has_system:
request.messages.insert(0, system_msg)
else:
# Playbook Prompt vor bestehenden System Prompt setzen
for i, msg in enumerate(request.messages):
if msg.role == "system":
msg.content = f"{playbook.system_prompt}\n\n{msg.content}"
break
try:
if request.stream:
return StreamingResponse(
stream_response(request, inference_service),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no",
},
)
else:
response = await inference_service.complete(request)
return response
except ValueError as e:
logger.error(f"Chat completion error: {e}")
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.exception(f"Chat completion failed: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
async def stream_response(
request: ChatCompletionRequest,
inference_service: InferenceService,
) -> AsyncIterator[str]:
"""Generator für SSE Streaming."""
try:
async for chunk in inference_service.stream(request):
data = chunk.model_dump_json()
yield f"data: {data}\n\n"
yield "data: [DONE]\n\n"
except Exception as e:
logger.exception(f"Streaming error: {e}")
error_data = json.dumps({"error": str(e)})
yield f"data: {error_data}\n\n"
@router.get("/models", response_model=ModelListResponse)
async def list_models(
_: str = Depends(verify_api_key),
):
"""
Liste verfügbarer Modelle.
Gibt alle konfigurierten Modelle zurück, die aktuell verfügbar sind.
"""
inference_service = get_inference_service()
return await inference_service.list_models()

View File

@@ -0,0 +1,403 @@
"""
Communication API Routes.
API-Endpoints für KI-gestützte Lehrer-Eltern-Kommunikation.
Basiert auf den Prinzipien der gewaltfreien Kommunikation (GFK)
und deutschen Schulgesetzen.
"""
import logging
from typing import Optional, List
from datetime import datetime
from fastapi import APIRouter, HTTPException, Depends
from pydantic import BaseModel, Field
from ..services.communication_service import (
get_communication_service,
CommunicationService,
CommunicationType,
CommunicationTone,
)
from ..services.inference import InferenceService, get_inference_service
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/communication", tags=["communication"])
# =============================================================================
# Pydantic Models
# =============================================================================
class CommunicationTypeResponse(BaseModel):
"""Response für Kommunikationstypen."""
value: str
label: str
class ToneResponse(BaseModel):
"""Response für Tonalitäten."""
value: str
label: str
class StateResponse(BaseModel):
"""Response für Bundesländer."""
value: str
label: str
class LegalReferenceResponse(BaseModel):
"""Response für rechtliche Referenzen."""
law: str
paragraph: str
title: str
summary: str
relevance: str
class GFKPrincipleResponse(BaseModel):
"""Response für GFK-Prinzipien."""
principle: str
description: str
example: str
class GenerateRequest(BaseModel):
"""Request für Nachrichtengenerierung."""
communication_type: str = Field(..., description="Art der Kommunikation (z.B. 'behavior', 'academic')")
tone: str = Field("professional", description="Tonalität (formal, professional, warm, concerned, appreciative)")
state: str = Field("NRW", description="Bundesland für rechtliche Referenzen")
student_name: str = Field(..., description="Name des Schülers/der Schülerin")
parent_name: str = Field(..., description="Name der Eltern (z.B. 'Frau Müller')")
situation: str = Field(..., description="Beschreibung der Situation")
additional_info: Optional[str] = Field(None, description="Zusätzliche Informationen")
class GenerateResponse(BaseModel):
"""Response für generierte Nachrichten."""
message: str
subject: str
validation: dict
legal_references: List[LegalReferenceResponse]
gfk_principles: List[GFKPrincipleResponse]
class ValidateRequest(BaseModel):
"""Request für Textvalidierung."""
text: str = Field(..., description="Der zu validierende Text")
class ValidateResponse(BaseModel):
"""Response für Validierung."""
is_valid: bool
issues: List[str]
suggestions: List[str]
positive_elements: List[str]
gfk_score: float
# =============================================================================
# Endpoints
# =============================================================================
@router.get("/types", response_model=List[CommunicationTypeResponse])
async def get_communication_types():
"""
Gibt alle verfügbaren Kommunikationstypen zurück.
Returns:
Liste aller Kommunikationstypen mit Wert und Label
"""
service = get_communication_service()
return service.get_all_communication_types()
@router.get("/tones", response_model=List[ToneResponse])
async def get_tones():
"""
Gibt alle verfügbaren Tonalitäten zurück.
Returns:
Liste aller Tonalitäten mit Wert und Label
"""
service = get_communication_service()
return service.get_all_tones()
@router.get("/states", response_model=List[StateResponse])
async def get_states():
"""
Gibt alle verfügbaren Bundesländer zurück.
Returns:
Liste aller Bundesländer mit Wert und Label
"""
service = get_communication_service()
return service.get_states()
@router.get("/legal-references/{state}")
async def get_legal_references(state: str):
"""
Gibt rechtliche Referenzen für ein Bundesland zurück.
Args:
state: Bundesland-Kürzel (z.B. NRW, BY)
Returns:
Rechtliche Referenzen für das Bundesland
"""
service = get_communication_service()
refs = service.get_legal_references(state, "elternpflichten")
return [
LegalReferenceResponse(
law=ref.law,
paragraph=ref.paragraph,
title=ref.title,
summary=ref.summary,
relevance=ref.relevance
)
for ref in refs
]
@router.get("/gfk-principles", response_model=List[GFKPrincipleResponse])
async def get_gfk_principles():
"""
Gibt die Prinzipien der gewaltfreien Kommunikation zurück.
Returns:
Liste der GFK-Prinzipien mit Beschreibung und Beispielen
"""
service = get_communication_service()
principles = service.get_gfk_guidance(CommunicationType.GENERAL_INFO)
return [
GFKPrincipleResponse(
principle=p.principle,
description=p.description,
example=p.example
)
for p in principles
]
@router.post("/generate", response_model=GenerateResponse)
async def generate_communication(request: GenerateRequest):
"""
Generiert einen Elternbrief basierend auf dem Kontext.
Args:
request: GenerateRequest mit allen nötigen Informationen
Returns:
GenerateResponse mit generiertem Text und Metadaten
"""
service = get_communication_service()
# Kommunikationstyp validieren
try:
comm_type = CommunicationType(request.communication_type)
except ValueError:
raise HTTPException(
status_code=400,
detail=f"Ungültiger Kommunikationstyp: {request.communication_type}"
)
# Tonalität validieren
try:
tone = CommunicationTone(request.tone)
except ValueError:
tone = CommunicationTone.PROFESSIONAL
# System- und User-Prompt erstellen
system_prompt = service.build_system_prompt(comm_type, request.state, tone)
user_prompt = service.build_user_prompt(comm_type, {
"student_name": request.student_name,
"parent_name": request.parent_name,
"situation": request.situation,
"additional_info": request.additional_info,
})
# Inference-Service aufrufen
try:
inference_service = get_inference_service()
response = await inference_service.generate(
prompt=user_prompt,
system_prompt=system_prompt,
temperature=0.7, # Etwas kreativ, aber kontrolliert
max_tokens=2000,
)
generated_message = response.get("content", "")
except Exception as e:
logger.error(f"Fehler bei der Nachrichtengenerierung: {e}")
# Fallback: Vorlage verwenden
template = service.get_template(comm_type)
generated_message = f"""{template['opening'].format(
parent_name=request.parent_name,
student_name=request.student_name,
topic=request.situation[:50] + '...' if len(request.situation) > 50 else request.situation
)}
{request.situation}
{template['closing'].format(
student_name=request.student_name,
legal_reference=f"des Schulgesetzes"
)}"""
# Validierung durchführen
validation = service.validate_communication(generated_message)
# Rechtliche Referenzen holen
topic_map = {
CommunicationType.ATTENDANCE: "schulpflicht",
CommunicationType.BEHAVIOR: "ordnungsmassnahmen",
CommunicationType.ACADEMIC: "foerderung",
CommunicationType.SPECIAL_NEEDS: "foerderung",
}
topic = topic_map.get(comm_type, "elternpflichten")
legal_refs = service.get_legal_references(request.state, topic)
# GFK-Prinzipien
gfk_principles = service.get_gfk_guidance(comm_type)
# Betreff generieren
template = service.get_template(comm_type)
subject = template.get("subject", "Mitteilung der Schule").format(
student_name=request.student_name,
topic=request.situation[:30] + '...' if len(request.situation) > 30 else request.situation
)
return GenerateResponse(
message=generated_message,
subject=subject,
validation=validation,
legal_references=[
LegalReferenceResponse(
law=ref.law,
paragraph=ref.paragraph,
title=ref.title,
summary=ref.summary,
relevance=ref.relevance
)
for ref in legal_refs
],
gfk_principles=[
GFKPrincipleResponse(
principle=p.principle,
description=p.description,
example=p.example
)
for p in gfk_principles
]
)
@router.post("/validate", response_model=ValidateResponse)
async def validate_communication(request: ValidateRequest):
"""
Validiert einen Text auf GFK-Konformität.
Args:
request: ValidateRequest mit dem zu prüfenden Text
Returns:
ValidateResponse mit Validierungsergebnissen
"""
service = get_communication_service()
result = service.validate_communication(request.text)
return ValidateResponse(
is_valid=result["is_valid"],
issues=result["issues"],
suggestions=result["suggestions"],
positive_elements=result["positive_elements"],
gfk_score=result["gfk_score"]
)
@router.post("/improve")
async def improve_communication(request: ValidateRequest):
"""
Verbessert einen bestehenden Text nach GFK-Prinzipien.
Args:
request: ValidateRequest mit dem zu verbessernden Text
Returns:
Verbesserter Text mit Änderungsvorschlägen
"""
service = get_communication_service()
# Erst validieren
validation = service.validate_communication(request.text)
if validation["is_valid"] and validation["gfk_score"] >= 0.8:
return {
"improved_text": request.text,
"changes": [],
"was_improved": False,
"message": "Der Text entspricht bereits den GFK-Prinzipien."
}
# System-Prompt für Verbesserung
system_prompt = """Du bist ein Experte für gewaltfreie Kommunikation (GFK) nach Marshall Rosenberg.
Deine Aufgabe ist es, einen Elternbrief zu verbessern, sodass er den GFK-Prinzipien entspricht.
VERBESSERUNGSREGELN:
1. Ersetze Bewertungen durch Beobachtungen
2. Ersetze "Sie müssen/sollten" durch Ich-Botschaften und Bitten
3. Entferne Schuldzuweisungen
4. Füge empathische Elemente hinzu
5. Behalte den sachlichen Inhalt bei
Gib den verbesserten Text zurück und erkläre kurz die wichtigsten Änderungen."""
user_prompt = f"""Bitte verbessere folgenden Elternbrief nach den GFK-Prinzipien:
---
{request.text}
---
Identifizierte Probleme:
{', '.join(validation['issues']) if validation['issues'] else 'Keine spezifischen Probleme gefunden, aber GFK-Score könnte verbessert werden.'}
Vorschläge:
{', '.join(validation['suggestions']) if validation['suggestions'] else 'Allgemeine Verbesserungen möglich.'}"""
try:
inference_service = get_inference_service()
response = await inference_service.generate(
prompt=user_prompt,
system_prompt=system_prompt,
temperature=0.5,
max_tokens=2500,
)
improved_text = response.get("content", request.text)
# Nochmal validieren
new_validation = service.validate_communication(improved_text)
return {
"improved_text": improved_text,
"original_issues": validation["issues"],
"was_improved": True,
"old_score": validation["gfk_score"],
"new_score": new_validation["gfk_score"],
"remaining_issues": new_validation["issues"],
}
except Exception as e:
logger.error(f"Fehler bei der Textverbesserung: {e}")
return {
"improved_text": request.text,
"changes": [],
"was_improved": False,
"error": str(e),
"message": "Die automatische Verbesserung ist derzeit nicht verfügbar."
}

View File

@@ -0,0 +1,584 @@
"""
LLM Comparison Route - Vergleicht Antworten verschiedener LLM Backends.
Dieses Modul ermoeglicht:
- Parallele Anfragen an OpenAI, Claude, Self-hosted+Tavily, Self-hosted+EduSearch
- Speichern von Vergleichsergebnissen fuer QA
- Parameter-Tuning fuer Self-hosted Modelle
"""
import asyncio
import logging
import time
import uuid
from datetime import datetime, timezone
from typing import Optional
from pydantic import BaseModel, Field
from fastapi import APIRouter, HTTPException, Depends
from ..models.chat import ChatMessage
from ..middleware.auth import verify_api_key
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/comparison", tags=["LLM Comparison"])
class ComparisonRequest(BaseModel):
"""Request fuer LLM-Vergleich."""
prompt: str = Field(..., description="User prompt (z.B. Lehrer-Frage)")
system_prompt: Optional[str] = Field(None, description="Optionaler System Prompt")
enable_openai: bool = Field(True, description="OpenAI/ChatGPT aktivieren")
enable_claude: bool = Field(True, description="Claude aktivieren")
enable_selfhosted_tavily: bool = Field(True, description="Self-hosted + Tavily aktivieren")
enable_selfhosted_edusearch: bool = Field(True, description="Self-hosted + EduSearch aktivieren")
# Parameter fuer Self-hosted Modelle
selfhosted_model: str = Field("llama3.2:3b", description="Self-hosted Modell")
temperature: float = Field(0.7, ge=0.0, le=2.0, description="Temperature")
top_p: float = Field(0.9, ge=0.0, le=1.0, description="Top-p Sampling")
max_tokens: int = Field(2048, ge=1, le=8192, description="Max Tokens")
# Search Parameter
search_results_count: int = Field(5, ge=1, le=20, description="Anzahl Suchergebnisse")
edu_search_filters: Optional[dict] = Field(None, description="Filter fuer EduSearch")
class LLMResponse(BaseModel):
"""Antwort eines einzelnen LLM."""
provider: str
model: str
response: str
latency_ms: int
tokens_used: Optional[int] = None
search_results: Optional[list] = None
error: Optional[str] = None
timestamp: datetime = Field(default_factory=datetime.utcnow)
class ComparisonResponse(BaseModel):
"""Gesamt-Antwort des Vergleichs."""
comparison_id: str
prompt: str
system_prompt: Optional[str]
responses: list[LLMResponse]
created_at: datetime = Field(default_factory=datetime.utcnow)
class SavedComparison(BaseModel):
"""Gespeicherter Vergleich fuer QA."""
comparison_id: str
prompt: str
system_prompt: Optional[str]
responses: list[LLMResponse]
notes: Optional[str] = None
rating: Optional[dict] = None # {"openai": 4, "claude": 5, ...}
created_at: datetime
created_by: Optional[str] = None
# In-Memory Storage (in Production: Database)
_comparisons_store: dict[str, SavedComparison] = {}
_system_prompts_store: dict[str, dict] = {
"default": {
"id": "default",
"name": "Standard Lehrer-Assistent",
"prompt": """Du bist ein hilfreicher Assistent fuer Lehrkraefte in Deutschland.
Deine Aufgaben:
- Hilfe bei der Unterrichtsplanung
- Erklaerung von Fachinhalten
- Erstellung von Arbeitsblaettern und Pruefungen
- Beratung zu paedagogischen Methoden
Antworte immer auf Deutsch und beachte den deutschen Lehrplankontext.""",
"created_at": datetime.now(timezone.utc).isoformat(),
},
"curriculum": {
"id": "curriculum",
"name": "Lehrplan-Experte",
"prompt": """Du bist ein Experte fuer deutsche Lehrplaene und Bildungsstandards.
Du kennst:
- Lehrplaene aller 16 Bundeslaender
- KMK Bildungsstandards
- Kompetenzorientierung im deutschen Bildungssystem
Beziehe dich immer auf konkrete Lehrplanvorgaben wenn moeglich.""",
"created_at": datetime.now(timezone.utc).isoformat(),
},
"worksheet": {
"id": "worksheet",
"name": "Arbeitsblatt-Generator",
"prompt": """Du bist ein spezialisierter Assistent fuer die Erstellung von Arbeitsblaettern.
Erstelle didaktisch sinnvolle Aufgaben mit:
- Klaren Arbeitsanweisungen
- Differenzierungsmoeglichkeiten
- Loesungshinweisen
Format: Markdown mit klarer Struktur.""",
"created_at": datetime.now(timezone.utc).isoformat(),
},
}
async def _call_openai(prompt: str, system_prompt: Optional[str]) -> LLMResponse:
"""Ruft OpenAI ChatGPT auf."""
import os
import httpx
start_time = time.time()
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
return LLMResponse(
provider="openai",
model="gpt-4o-mini",
response="",
latency_ms=0,
error="OPENAI_API_KEY nicht konfiguriert"
)
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
try:
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.post(
"https://api.openai.com/v1/chat/completions",
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
},
json={
"model": "gpt-4o-mini",
"messages": messages,
"temperature": 0.7,
"max_tokens": 2048,
},
)
response.raise_for_status()
data = response.json()
latency_ms = int((time.time() - start_time) * 1000)
content = data["choices"][0]["message"]["content"]
tokens = data.get("usage", {}).get("total_tokens")
return LLMResponse(
provider="openai",
model="gpt-4o-mini",
response=content,
latency_ms=latency_ms,
tokens_used=tokens,
)
except Exception as e:
return LLMResponse(
provider="openai",
model="gpt-4o-mini",
response="",
latency_ms=int((time.time() - start_time) * 1000),
error=str(e),
)
async def _call_claude(prompt: str, system_prompt: Optional[str]) -> LLMResponse:
"""Ruft Anthropic Claude auf."""
import os
start_time = time.time()
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
return LLMResponse(
provider="claude",
model="claude-3-5-sonnet-20241022",
response="",
latency_ms=0,
error="ANTHROPIC_API_KEY nicht konfiguriert"
)
try:
import anthropic
client = anthropic.AsyncAnthropic(api_key=api_key)
response = await client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=2048,
system=system_prompt or "",
messages=[{"role": "user", "content": prompt}],
)
latency_ms = int((time.time() - start_time) * 1000)
content = response.content[0].text if response.content else ""
tokens = response.usage.input_tokens + response.usage.output_tokens
return LLMResponse(
provider="claude",
model="claude-3-5-sonnet-20241022",
response=content,
latency_ms=latency_ms,
tokens_used=tokens,
)
except Exception as e:
return LLMResponse(
provider="claude",
model="claude-3-5-sonnet-20241022",
response="",
latency_ms=int((time.time() - start_time) * 1000),
error=str(e),
)
async def _search_tavily(query: str, count: int = 5) -> list[dict]:
"""Sucht mit Tavily API."""
import os
import httpx
api_key = os.getenv("TAVILY_API_KEY")
if not api_key:
return []
try:
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(
"https://api.tavily.com/search",
json={
"api_key": api_key,
"query": query,
"max_results": count,
"include_domains": [
"kmk.org", "bildungsserver.de", "bpb.de",
"bayern.de", "nrw.de", "berlin.de",
],
},
)
response.raise_for_status()
data = response.json()
return data.get("results", [])
except Exception as e:
logger.error(f"Tavily search error: {e}")
return []
async def _search_edusearch(query: str, count: int = 5, filters: Optional[dict] = None) -> list[dict]:
"""Sucht mit EduSearch API."""
import os
import httpx
edu_search_url = os.getenv("EDU_SEARCH_URL", "http://edu-search-service:8084")
try:
async with httpx.AsyncClient(timeout=30.0) as client:
payload = {
"q": query,
"limit": count,
"mode": "keyword",
}
if filters:
payload["filters"] = filters
response = await client.post(
f"{edu_search_url}/v1/search",
json=payload,
)
response.raise_for_status()
data = response.json()
# Formatiere Ergebnisse
results = []
for r in data.get("results", []):
results.append({
"title": r.get("title", ""),
"url": r.get("url", ""),
"content": r.get("snippet", ""),
"score": r.get("scores", {}).get("final", 0),
})
return results
except Exception as e:
logger.error(f"EduSearch error: {e}")
return []
async def _call_selfhosted_with_search(
prompt: str,
system_prompt: Optional[str],
search_provider: str,
search_results: list[dict],
model: str,
temperature: float,
top_p: float,
max_tokens: int,
) -> LLMResponse:
"""Ruft Self-hosted LLM mit Suchergebnissen auf."""
import os
import httpx
start_time = time.time()
ollama_url = os.getenv("OLLAMA_URL", "http://localhost:11434")
# Baue Kontext aus Suchergebnissen
context_parts = []
for i, result in enumerate(search_results, 1):
context_parts.append(f"[{i}] {result.get('title', 'Untitled')}")
context_parts.append(f" URL: {result.get('url', '')}")
context_parts.append(f" {result.get('content', '')[:500]}")
context_parts.append("")
search_context = "\n".join(context_parts)
# Erweitere System Prompt mit Suchergebnissen
augmented_system = f"""{system_prompt or ''}
Du hast Zugriff auf folgende Suchergebnisse aus {"Tavily" if search_provider == "tavily" else "EduSearch (deutsche Bildungsquellen)"}:
{search_context}
Nutze diese Quellen um deine Antwort zu unterstuetzen. Zitiere relevante Quellen mit [Nummer]."""
messages = [
{"role": "system", "content": augmented_system},
{"role": "user", "content": prompt},
]
try:
async with httpx.AsyncClient(timeout=120.0) as client:
response = await client.post(
f"{ollama_url}/api/chat",
json={
"model": model,
"messages": messages,
"stream": False,
"options": {
"temperature": temperature,
"top_p": top_p,
"num_predict": max_tokens,
},
},
)
response.raise_for_status()
data = response.json()
latency_ms = int((time.time() - start_time) * 1000)
content = data.get("message", {}).get("content", "")
tokens = data.get("prompt_eval_count", 0) + data.get("eval_count", 0)
return LLMResponse(
provider=f"selfhosted_{search_provider}",
model=model,
response=content,
latency_ms=latency_ms,
tokens_used=tokens,
search_results=search_results,
)
except Exception as e:
return LLMResponse(
provider=f"selfhosted_{search_provider}",
model=model,
response="",
latency_ms=int((time.time() - start_time) * 1000),
error=str(e),
search_results=search_results,
)
@router.post("/run", response_model=ComparisonResponse)
async def run_comparison(
request: ComparisonRequest,
_: str = Depends(verify_api_key),
):
"""
Fuehrt LLM-Vergleich durch.
Sendet den Prompt parallel an alle aktivierten Provider und
sammelt die Antworten.
"""
comparison_id = f"cmp-{uuid.uuid4().hex[:12]}"
tasks = []
# System Prompt vorbereiten
system_prompt = request.system_prompt
# OpenAI
if request.enable_openai:
tasks.append(("openai", _call_openai(request.prompt, system_prompt)))
# Claude
if request.enable_claude:
tasks.append(("claude", _call_claude(request.prompt, system_prompt)))
# Self-hosted + Tavily
if request.enable_selfhosted_tavily:
tavily_results = await _search_tavily(request.prompt, request.search_results_count)
tasks.append((
"selfhosted_tavily",
_call_selfhosted_with_search(
request.prompt,
system_prompt,
"tavily",
tavily_results,
request.selfhosted_model,
request.temperature,
request.top_p,
request.max_tokens,
)
))
# Self-hosted + EduSearch
if request.enable_selfhosted_edusearch:
edu_results = await _search_edusearch(
request.prompt,
request.search_results_count,
request.edu_search_filters,
)
tasks.append((
"selfhosted_edusearch",
_call_selfhosted_with_search(
request.prompt,
system_prompt,
"edusearch",
edu_results,
request.selfhosted_model,
request.temperature,
request.top_p,
request.max_tokens,
)
))
# Parallele Ausfuehrung
responses = []
if tasks:
results = await asyncio.gather(*[t[1] for t in tasks], return_exceptions=True)
for (name, _), result in zip(tasks, results):
if isinstance(result, Exception):
responses.append(LLMResponse(
provider=name,
model="unknown",
response="",
latency_ms=0,
error=str(result),
))
else:
responses.append(result)
return ComparisonResponse(
comparison_id=comparison_id,
prompt=request.prompt,
system_prompt=system_prompt,
responses=responses,
)
@router.post("/save/{comparison_id}")
async def save_comparison(
comparison_id: str,
comparison: ComparisonResponse,
notes: Optional[str] = None,
rating: Optional[dict] = None,
_: str = Depends(verify_api_key),
):
"""Speichert einen Vergleich fuer spaetere Analyse."""
saved = SavedComparison(
comparison_id=comparison_id,
prompt=comparison.prompt,
system_prompt=comparison.system_prompt,
responses=comparison.responses,
notes=notes,
rating=rating,
created_at=comparison.created_at,
)
_comparisons_store[comparison_id] = saved
return {"status": "saved", "comparison_id": comparison_id}
@router.get("/history")
async def get_comparison_history(
limit: int = 50,
_: str = Depends(verify_api_key),
):
"""Gibt gespeicherte Vergleiche zurueck."""
comparisons = list(_comparisons_store.values())
comparisons.sort(key=lambda x: x.created_at, reverse=True)
return {"comparisons": comparisons[:limit]}
@router.get("/history/{comparison_id}")
async def get_comparison(
comparison_id: str,
_: str = Depends(verify_api_key),
):
"""Gibt einen bestimmten Vergleich zurueck."""
if comparison_id not in _comparisons_store:
raise HTTPException(status_code=404, detail="Vergleich nicht gefunden")
return _comparisons_store[comparison_id]
# System Prompt Management
@router.get("/prompts")
async def list_system_prompts(
_: str = Depends(verify_api_key),
):
"""Listet alle gespeicherten System Prompts."""
return {"prompts": list(_system_prompts_store.values())}
@router.post("/prompts")
async def create_system_prompt(
name: str,
prompt: str,
_: str = Depends(verify_api_key),
):
"""Erstellt einen neuen System Prompt."""
prompt_id = f"sp-{uuid.uuid4().hex[:8]}"
_system_prompts_store[prompt_id] = {
"id": prompt_id,
"name": name,
"prompt": prompt,
"created_at": datetime.now(timezone.utc).isoformat(),
}
return {"status": "created", "prompt_id": prompt_id}
@router.put("/prompts/{prompt_id}")
async def update_system_prompt(
prompt_id: str,
name: str,
prompt: str,
_: str = Depends(verify_api_key),
):
"""Aktualisiert einen System Prompt."""
if prompt_id not in _system_prompts_store:
raise HTTPException(status_code=404, detail="System Prompt nicht gefunden")
_system_prompts_store[prompt_id].update({
"name": name,
"prompt": prompt,
"updated_at": datetime.now(timezone.utc).isoformat(),
})
return {"status": "updated", "prompt_id": prompt_id}
@router.delete("/prompts/{prompt_id}")
async def delete_system_prompt(
prompt_id: str,
_: str = Depends(verify_api_key),
):
"""Loescht einen System Prompt."""
if prompt_id not in _system_prompts_store:
raise HTTPException(status_code=404, detail="System Prompt nicht gefunden")
if prompt_id in ["default", "curriculum", "worksheet"]:
raise HTTPException(status_code=400, detail="Standard-Prompts koennen nicht geloescht werden")
del _system_prompts_store[prompt_id]
return {"status": "deleted", "prompt_id": prompt_id}
@router.get("/prompts/{prompt_id}")
async def get_system_prompt(
prompt_id: str,
_: str = Depends(verify_api_key),
):
"""Gibt einen System Prompt zurueck."""
if prompt_id not in _system_prompts_store:
raise HTTPException(status_code=404, detail="System Prompt nicht gefunden")
return _system_prompts_store[prompt_id]

View File

@@ -0,0 +1,710 @@
"""
EduSearch Seeds API Routes.
CRUD operations for managing education search crawler seed URLs.
Direct database access to PostgreSQL.
"""
import os
import logging
from typing import Optional, List
from datetime import datetime
from uuid import UUID
from fastapi import APIRouter, HTTPException, Depends, Query
from pydantic import BaseModel, Field, HttpUrl
import asyncpg
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/edu-search", tags=["edu-search"])
# Database connection pool
_pool: Optional[asyncpg.Pool] = None
async def get_db_pool() -> asyncpg.Pool:
"""Get or create database connection pool."""
global _pool
if _pool is None:
database_url = os.environ.get("DATABASE_URL")
if not database_url:
raise RuntimeError("DATABASE_URL nicht konfiguriert - bitte via Vault oder Umgebungsvariable setzen")
_pool = await asyncpg.create_pool(database_url, min_size=2, max_size=10)
return _pool
# =============================================================================
# Pydantic Models
# =============================================================================
class CategoryResponse(BaseModel):
"""Category response model."""
id: str
name: str
display_name: str
description: Optional[str] = None
icon: Optional[str] = None
sort_order: int
is_active: bool
class SeedBase(BaseModel):
"""Base seed model for creation/update."""
url: str = Field(..., max_length=500)
name: str = Field(..., max_length=255)
description: Optional[str] = None
category_name: Optional[str] = Field(None, description="Category name (federal, states, etc.)")
source_type: str = Field("GOV", description="GOV, EDU, UNI, etc.")
scope: str = Field("FEDERAL", description="FEDERAL, STATE, etc.")
state: Optional[str] = Field(None, max_length=5, description="State code (BW, BY, etc.)")
trust_boost: float = Field(0.50, ge=0.0, le=1.0)
enabled: bool = True
crawl_depth: int = Field(2, ge=1, le=5)
crawl_frequency: str = Field("weekly", description="hourly, daily, weekly, monthly")
class SeedCreate(SeedBase):
"""Seed creation model."""
pass
class SeedUpdate(BaseModel):
"""Seed update model (all fields optional)."""
url: Optional[str] = Field(None, max_length=500)
name: Optional[str] = Field(None, max_length=255)
description: Optional[str] = None
category_name: Optional[str] = None
source_type: Optional[str] = None
scope: Optional[str] = None
state: Optional[str] = Field(None, max_length=5)
trust_boost: Optional[float] = Field(None, ge=0.0, le=1.0)
enabled: Optional[bool] = None
crawl_depth: Optional[int] = Field(None, ge=1, le=5)
crawl_frequency: Optional[str] = None
class SeedResponse(BaseModel):
"""Seed response model."""
id: str
url: str
name: str
description: Optional[str] = None
category: Optional[str] = None
category_display_name: Optional[str] = None
source_type: str
scope: str
state: Optional[str] = None
trust_boost: float
enabled: bool
crawl_depth: int
crawl_frequency: str
last_crawled_at: Optional[datetime] = None
last_crawl_status: Optional[str] = None
last_crawl_docs: int = 0
total_documents: int = 0
created_at: datetime
updated_at: datetime
class SeedsListResponse(BaseModel):
"""List response with pagination info."""
seeds: List[SeedResponse]
total: int
page: int
page_size: int
class StatsResponse(BaseModel):
"""Crawl statistics response."""
total_seeds: int
enabled_seeds: int
total_documents: int
seeds_by_category: dict
seeds_by_state: dict
last_crawl_time: Optional[datetime] = None
class BulkImportRequest(BaseModel):
"""Bulk import request."""
seeds: List[SeedCreate]
class BulkImportResponse(BaseModel):
"""Bulk import response."""
imported: int
skipped: int
errors: List[str]
# =============================================================================
# API Endpoints
# =============================================================================
@router.get("/categories", response_model=List[CategoryResponse])
async def list_categories():
"""List all seed categories."""
pool = await get_db_pool()
async with pool.acquire() as conn:
rows = await conn.fetch("""
SELECT id, name, display_name, description, icon, sort_order, is_active
FROM edu_search_categories
WHERE is_active = TRUE
ORDER BY sort_order
""")
return [
CategoryResponse(
id=str(row["id"]),
name=row["name"],
display_name=row["display_name"],
description=row["description"],
icon=row["icon"],
sort_order=row["sort_order"],
is_active=row["is_active"],
)
for row in rows
]
@router.get("/seeds", response_model=SeedsListResponse)
async def list_seeds(
category: Optional[str] = Query(None, description="Filter by category name"),
state: Optional[str] = Query(None, description="Filter by state code"),
enabled: Optional[bool] = Query(None, description="Filter by enabled status"),
search: Optional[str] = Query(None, description="Search in name/url"),
page: int = Query(1, ge=1),
page_size: int = Query(50, ge=1, le=200),
):
"""List seeds with optional filtering and pagination."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Build WHERE clause
conditions = []
params = []
param_idx = 1
if category:
conditions.append(f"c.name = ${param_idx}")
params.append(category)
param_idx += 1
if state:
conditions.append(f"s.state = ${param_idx}")
params.append(state)
param_idx += 1
if enabled is not None:
conditions.append(f"s.enabled = ${param_idx}")
params.append(enabled)
param_idx += 1
if search:
conditions.append(f"(s.name ILIKE ${param_idx} OR s.url ILIKE ${param_idx})")
params.append(f"%{search}%")
param_idx += 1
where_clause = " AND ".join(conditions) if conditions else "TRUE"
# Count total
count_query = f"""
SELECT COUNT(*) FROM edu_search_seeds s
LEFT JOIN edu_search_categories c ON s.category_id = c.id
WHERE {where_clause}
"""
total = await conn.fetchval(count_query, *params)
# Get paginated results
offset = (page - 1) * page_size
params.extend([page_size, offset])
query = f"""
SELECT
s.id, s.url, s.name, s.description,
c.name as category, c.display_name as category_display_name,
s.source_type, s.scope, s.state, s.trust_boost, s.enabled,
s.crawl_depth, s.crawl_frequency, s.last_crawled_at,
s.last_crawl_status, s.last_crawl_docs, s.total_documents,
s.created_at, s.updated_at
FROM edu_search_seeds s
LEFT JOIN edu_search_categories c ON s.category_id = c.id
WHERE {where_clause}
ORDER BY c.sort_order, s.name
LIMIT ${param_idx} OFFSET ${param_idx + 1}
"""
rows = await conn.fetch(query, *params)
seeds = [
SeedResponse(
id=str(row["id"]),
url=row["url"],
name=row["name"],
description=row["description"],
category=row["category"],
category_display_name=row["category_display_name"],
source_type=row["source_type"],
scope=row["scope"],
state=row["state"],
trust_boost=float(row["trust_boost"]),
enabled=row["enabled"],
crawl_depth=row["crawl_depth"],
crawl_frequency=row["crawl_frequency"],
last_crawled_at=row["last_crawled_at"],
last_crawl_status=row["last_crawl_status"],
last_crawl_docs=row["last_crawl_docs"] or 0,
total_documents=row["total_documents"] or 0,
created_at=row["created_at"],
updated_at=row["updated_at"],
)
for row in rows
]
return SeedsListResponse(
seeds=seeds,
total=total,
page=page,
page_size=page_size,
)
@router.get("/seeds/{seed_id}", response_model=SeedResponse)
async def get_seed(seed_id: str):
"""Get a single seed by ID."""
pool = await get_db_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow("""
SELECT
s.id, s.url, s.name, s.description,
c.name as category, c.display_name as category_display_name,
s.source_type, s.scope, s.state, s.trust_boost, s.enabled,
s.crawl_depth, s.crawl_frequency, s.last_crawled_at,
s.last_crawl_status, s.last_crawl_docs, s.total_documents,
s.created_at, s.updated_at
FROM edu_search_seeds s
LEFT JOIN edu_search_categories c ON s.category_id = c.id
WHERE s.id = $1
""", seed_id)
if not row:
raise HTTPException(status_code=404, detail="Seed nicht gefunden")
return SeedResponse(
id=str(row["id"]),
url=row["url"],
name=row["name"],
description=row["description"],
category=row["category"],
category_display_name=row["category_display_name"],
source_type=row["source_type"],
scope=row["scope"],
state=row["state"],
trust_boost=float(row["trust_boost"]),
enabled=row["enabled"],
crawl_depth=row["crawl_depth"],
crawl_frequency=row["crawl_frequency"],
last_crawled_at=row["last_crawled_at"],
last_crawl_status=row["last_crawl_status"],
last_crawl_docs=row["last_crawl_docs"] or 0,
total_documents=row["total_documents"] or 0,
created_at=row["created_at"],
updated_at=row["updated_at"],
)
@router.post("/seeds", response_model=SeedResponse, status_code=201)
async def create_seed(seed: SeedCreate):
"""Create a new seed URL."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Get category ID if provided
category_id = None
if seed.category_name:
category_id = await conn.fetchval(
"SELECT id FROM edu_search_categories WHERE name = $1",
seed.category_name
)
try:
row = await conn.fetchrow("""
INSERT INTO edu_search_seeds (
url, name, description, category_id, source_type, scope,
state, trust_boost, enabled, crawl_depth, crawl_frequency
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
RETURNING id, created_at, updated_at
""",
seed.url, seed.name, seed.description, category_id,
seed.source_type, seed.scope, seed.state, seed.trust_boost,
seed.enabled, seed.crawl_depth, seed.crawl_frequency
)
except asyncpg.UniqueViolationError:
raise HTTPException(status_code=409, detail="URL existiert bereits")
return SeedResponse(
id=str(row["id"]),
url=seed.url,
name=seed.name,
description=seed.description,
category=seed.category_name,
category_display_name=None,
source_type=seed.source_type,
scope=seed.scope,
state=seed.state,
trust_boost=seed.trust_boost,
enabled=seed.enabled,
crawl_depth=seed.crawl_depth,
crawl_frequency=seed.crawl_frequency,
last_crawled_at=None,
last_crawl_status=None,
last_crawl_docs=0,
total_documents=0,
created_at=row["created_at"],
updated_at=row["updated_at"],
)
@router.put("/seeds/{seed_id}", response_model=SeedResponse)
async def update_seed(seed_id: str, seed: SeedUpdate):
"""Update an existing seed."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Build update statement dynamically
updates = []
params = []
param_idx = 1
if seed.url is not None:
updates.append(f"url = ${param_idx}")
params.append(seed.url)
param_idx += 1
if seed.name is not None:
updates.append(f"name = ${param_idx}")
params.append(seed.name)
param_idx += 1
if seed.description is not None:
updates.append(f"description = ${param_idx}")
params.append(seed.description)
param_idx += 1
if seed.category_name is not None:
category_id = await conn.fetchval(
"SELECT id FROM edu_search_categories WHERE name = $1",
seed.category_name
)
updates.append(f"category_id = ${param_idx}")
params.append(category_id)
param_idx += 1
if seed.source_type is not None:
updates.append(f"source_type = ${param_idx}")
params.append(seed.source_type)
param_idx += 1
if seed.scope is not None:
updates.append(f"scope = ${param_idx}")
params.append(seed.scope)
param_idx += 1
if seed.state is not None:
updates.append(f"state = ${param_idx}")
params.append(seed.state)
param_idx += 1
if seed.trust_boost is not None:
updates.append(f"trust_boost = ${param_idx}")
params.append(seed.trust_boost)
param_idx += 1
if seed.enabled is not None:
updates.append(f"enabled = ${param_idx}")
params.append(seed.enabled)
param_idx += 1
if seed.crawl_depth is not None:
updates.append(f"crawl_depth = ${param_idx}")
params.append(seed.crawl_depth)
param_idx += 1
if seed.crawl_frequency is not None:
updates.append(f"crawl_frequency = ${param_idx}")
params.append(seed.crawl_frequency)
param_idx += 1
if not updates:
raise HTTPException(status_code=400, detail="Keine Felder zum Aktualisieren")
updates.append("updated_at = NOW()")
params.append(seed_id)
query = f"""
UPDATE edu_search_seeds
SET {", ".join(updates)}
WHERE id = ${param_idx}
RETURNING id
"""
result = await conn.fetchrow(query, *params)
if not result:
raise HTTPException(status_code=404, detail="Seed nicht gefunden")
# Return updated seed
return await get_seed(seed_id)
@router.delete("/seeds/{seed_id}")
async def delete_seed(seed_id: str):
"""Delete a seed."""
pool = await get_db_pool()
async with pool.acquire() as conn:
result = await conn.execute(
"DELETE FROM edu_search_seeds WHERE id = $1",
seed_id
)
if result == "DELETE 0":
raise HTTPException(status_code=404, detail="Seed nicht gefunden")
return {"status": "deleted", "id": seed_id}
@router.post("/seeds/bulk-import", response_model=BulkImportResponse)
async def bulk_import_seeds(request: BulkImportRequest):
"""Bulk import seeds (skip duplicates)."""
pool = await get_db_pool()
imported = 0
skipped = 0
errors = []
async with pool.acquire() as conn:
# Pre-fetch all category IDs
categories = {}
rows = await conn.fetch("SELECT id, name FROM edu_search_categories")
for row in rows:
categories[row["name"]] = row["id"]
for seed in request.seeds:
try:
category_id = categories.get(seed.category_name) if seed.category_name else None
await conn.execute("""
INSERT INTO edu_search_seeds (
url, name, description, category_id, source_type, scope,
state, trust_boost, enabled, crawl_depth, crawl_frequency
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
ON CONFLICT (url) DO NOTHING
""",
seed.url, seed.name, seed.description, category_id,
seed.source_type, seed.scope, seed.state, seed.trust_boost,
seed.enabled, seed.crawl_depth, seed.crawl_frequency
)
imported += 1
except asyncpg.UniqueViolationError:
skipped += 1
except Exception as e:
errors.append(f"{seed.url}: {str(e)}")
return BulkImportResponse(imported=imported, skipped=skipped, errors=errors)
@router.get("/stats", response_model=StatsResponse)
async def get_stats():
"""Get crawl statistics."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Basic counts
total = await conn.fetchval("SELECT COUNT(*) FROM edu_search_seeds")
enabled = await conn.fetchval("SELECT COUNT(*) FROM edu_search_seeds WHERE enabled = TRUE")
total_docs = await conn.fetchval("SELECT COALESCE(SUM(total_documents), 0) FROM edu_search_seeds")
# By category
cat_rows = await conn.fetch("""
SELECT c.name, COUNT(s.id) as count
FROM edu_search_categories c
LEFT JOIN edu_search_seeds s ON c.id = s.category_id
GROUP BY c.name
""")
by_category = {row["name"]: row["count"] for row in cat_rows}
# By state
state_rows = await conn.fetch("""
SELECT COALESCE(state, 'federal') as state, COUNT(*) as count
FROM edu_search_seeds
GROUP BY state
""")
by_state = {row["state"]: row["count"] for row in state_rows}
# Last crawl time
last_crawl = await conn.fetchval(
"SELECT MAX(last_crawled_at) FROM edu_search_seeds"
)
return StatsResponse(
total_seeds=total,
enabled_seeds=enabled,
total_documents=total_docs,
seeds_by_category=by_category,
seeds_by_state=by_state,
last_crawl_time=last_crawl,
)
# Export for external use (edu-search-service)
@router.get("/seeds/export/for-crawler")
async def export_seeds_for_crawler():
"""Export enabled seeds in format suitable for crawler."""
pool = await get_db_pool()
async with pool.acquire() as conn:
rows = await conn.fetch("""
SELECT
s.url, s.trust_boost, s.source_type, s.scope, s.state,
s.crawl_depth, c.name as category
FROM edu_search_seeds s
LEFT JOIN edu_search_categories c ON s.category_id = c.id
WHERE s.enabled = TRUE
ORDER BY s.trust_boost DESC
""")
return {
"seeds": [
{
"url": row["url"],
"trust": float(row["trust_boost"]),
"source": row["source_type"],
"scope": row["scope"],
"state": row["state"],
"depth": row["crawl_depth"],
"category": row["category"],
}
for row in rows
],
"total": len(rows),
"exported_at": datetime.utcnow().isoformat(),
}
# =============================================================================
# Crawl Status Feedback (from edu-search-service)
# =============================================================================
class CrawlStatusUpdate(BaseModel):
"""Crawl status update from edu-search-service."""
seed_url: str = Field(..., description="The seed URL that was crawled")
status: str = Field(..., description="Crawl status: success, error, partial")
documents_crawled: int = Field(0, ge=0, description="Number of documents crawled")
error_message: Optional[str] = Field(None, description="Error message if status is error")
crawl_duration_seconds: float = Field(0.0, ge=0.0, description="Duration of the crawl in seconds")
class CrawlStatusResponse(BaseModel):
"""Response for crawl status update."""
success: bool
seed_url: str
message: str
@router.post("/seeds/crawl-status", response_model=CrawlStatusResponse)
async def update_crawl_status(update: CrawlStatusUpdate):
"""Update crawl status for a seed URL (called by edu-search-service)."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Find the seed by URL
seed = await conn.fetchrow(
"SELECT id, total_documents FROM edu_search_seeds WHERE url = $1",
update.seed_url
)
if not seed:
raise HTTPException(
status_code=404,
detail=f"Seed nicht gefunden: {update.seed_url}"
)
# Update the seed with crawl status
new_total = (seed["total_documents"] or 0) + update.documents_crawled
await conn.execute("""
UPDATE edu_search_seeds
SET
last_crawled_at = NOW(),
last_crawl_status = $2,
last_crawl_docs = $3,
total_documents = $4,
updated_at = NOW()
WHERE id = $1
""", seed["id"], update.status, update.documents_crawled, new_total)
logger.info(
f"Crawl status updated: {update.seed_url} - "
f"status={update.status}, docs={update.documents_crawled}, "
f"duration={update.crawl_duration_seconds:.1f}s"
)
return CrawlStatusResponse(
success=True,
seed_url=update.seed_url,
message=f"Status aktualisiert: {update.documents_crawled} Dokumente gecrawlt"
)
class BulkCrawlStatusUpdate(BaseModel):
"""Bulk crawl status update."""
updates: List[CrawlStatusUpdate]
class BulkCrawlStatusResponse(BaseModel):
"""Response for bulk crawl status update."""
updated: int
failed: int
errors: List[str]
@router.post("/seeds/crawl-status/bulk", response_model=BulkCrawlStatusResponse)
async def bulk_update_crawl_status(request: BulkCrawlStatusUpdate):
"""Bulk update crawl status for multiple seeds."""
pool = await get_db_pool()
updated = 0
failed = 0
errors = []
async with pool.acquire() as conn:
for update in request.updates:
try:
seed = await conn.fetchrow(
"SELECT id, total_documents FROM edu_search_seeds WHERE url = $1",
update.seed_url
)
if not seed:
failed += 1
errors.append(f"Seed nicht gefunden: {update.seed_url}")
continue
new_total = (seed["total_documents"] or 0) + update.documents_crawled
await conn.execute("""
UPDATE edu_search_seeds
SET
last_crawled_at = NOW(),
last_crawl_status = $2,
last_crawl_docs = $3,
total_documents = $4,
updated_at = NOW()
WHERE id = $1
""", seed["id"], update.status, update.documents_crawled, new_total)
updated += 1
except Exception as e:
failed += 1
errors.append(f"{update.seed_url}: {str(e)}")
logger.info(f"Bulk crawl status update: {updated} updated, {failed} failed")
return BulkCrawlStatusResponse(
updated=updated,
failed=failed,
errors=errors
)

View File

@@ -0,0 +1,127 @@
"""
Health Check Route.
"""
import logging
from datetime import datetime
from fastapi import APIRouter
from pydantic import BaseModel
from ..config import get_config
logger = logging.getLogger(__name__)
router = APIRouter(tags=["Health"])
class ComponentStatus(BaseModel):
"""Status einer Komponente."""
name: str
status: str # healthy, degraded, unhealthy
message: str = ""
class HealthResponse(BaseModel):
"""Health Check Response."""
status: str # ok, degraded, error
ts: str
version: str
components: list[ComponentStatus]
@router.get("/health", response_model=HealthResponse)
async def health_check():
"""
Health Check Endpoint.
Prüft den Status aller Komponenten:
- Gateway selbst
- LLM Backend Erreichbarkeit
- Datenbank (wenn konfiguriert)
"""
config = get_config()
components = []
overall_status = "ok"
# Gateway selbst
components.append(ComponentStatus(
name="gateway",
status="healthy",
message="Gateway is running",
))
# Ollama Backend
if config.ollama and config.ollama.enabled:
try:
import httpx
async with httpx.AsyncClient(timeout=5.0) as client:
response = await client.get(f"{config.ollama.base_url}/api/tags")
if response.status_code == 200:
components.append(ComponentStatus(
name="ollama",
status="healthy",
message="Ollama is reachable",
))
else:
components.append(ComponentStatus(
name="ollama",
status="degraded",
message=f"Ollama returned status {response.status_code}",
))
overall_status = "degraded"
except Exception as e:
components.append(ComponentStatus(
name="ollama",
status="unhealthy",
message=f"Cannot reach Ollama: {str(e)}",
))
# Nicht critical wenn andere Backends verfügbar
if not (config.vllm and config.vllm.enabled) and not (config.anthropic and config.anthropic.enabled):
overall_status = "error"
# vLLM Backend
if config.vllm and config.vllm.enabled:
try:
import httpx
headers = {}
if config.vllm.api_key:
headers["Authorization"] = f"Bearer {config.vllm.api_key}"
async with httpx.AsyncClient(timeout=5.0) as client:
response = await client.get(
f"{config.vllm.base_url}/v1/models",
headers=headers,
)
if response.status_code == 200:
components.append(ComponentStatus(
name="vllm",
status="healthy",
message="vLLM is reachable",
))
else:
components.append(ComponentStatus(
name="vllm",
status="degraded",
message=f"vLLM returned status {response.status_code}",
))
overall_status = "degraded"
except Exception as e:
components.append(ComponentStatus(
name="vllm",
status="unhealthy",
message=f"Cannot reach vLLM: {str(e)}",
))
# Anthropic Backend
if config.anthropic and config.anthropic.enabled:
components.append(ComponentStatus(
name="anthropic",
status="healthy",
message="Anthropic API configured (not checked)",
))
return HealthResponse(
status=overall_status,
ts=datetime.utcnow().isoformat() + "Z",
version="0.1.0",
components=components,
)

View File

@@ -0,0 +1,173 @@
"""
Legal Crawler API Routes.
Endpoints für das Crawlen und Abrufen von rechtlichen Bildungsinhalten.
"""
import logging
import asyncio
from typing import List, Optional
from fastapi import APIRouter, HTTPException, BackgroundTasks
from pydantic import BaseModel
from ..services.legal_crawler import get_legal_crawler, LegalCrawler
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/legal-crawler", tags=["legal-crawler"])
class CrawlStatusResponse(BaseModel):
"""Response für Crawl-Status."""
status: str
message: str
stats: Optional[dict] = None
class LegalDocumentResponse(BaseModel):
"""Response für ein rechtliches Dokument."""
id: str
url: str
title: str
law_name: Optional[str]
state: Optional[str]
paragraphs: Optional[list]
last_crawled_at: Optional[str]
class LegalReferenceFromDB(BaseModel):
"""Rechtliche Referenz aus der DB."""
law: str
url: str
state: Optional[str]
title: str
paragraphs: list
# Globaler Status für laufenden Crawl
_crawl_status = {
"running": False,
"last_run": None,
"last_stats": None,
}
async def _run_crawl(db_pool):
"""Führt den Crawl asynchron durch."""
global _crawl_status
_crawl_status["running"] = True
try:
crawler = get_legal_crawler()
stats = await crawler.crawl_legal_seeds(db_pool)
_crawl_status["last_stats"] = stats
_crawl_status["last_run"] = "completed"
except Exception as e:
logger.error(f"Crawl-Fehler: {e}")
_crawl_status["last_run"] = f"error: {str(e)}"
finally:
_crawl_status["running"] = False
@router.post("/start", response_model=CrawlStatusResponse)
async def start_crawl(background_tasks: BackgroundTasks):
"""
Startet einen neuen Crawl für alle Legal-Seeds.
Der Crawl läuft im Hintergrund und kann über /status abgefragt werden.
"""
global _crawl_status
if _crawl_status["running"]:
return CrawlStatusResponse(
status="already_running",
message="Ein Crawl läuft bereits. Bitte warten Sie, bis er abgeschlossen ist."
)
# Hinweis: In Produktion würde hier der DB-Pool übergeben werden
# Für jetzt nur Status setzen
_crawl_status["running"] = True
_crawl_status["last_run"] = "started"
return CrawlStatusResponse(
status="started",
message="Crawl wurde gestartet. Nutzen Sie /status um den Fortschritt zu prüfen."
)
@router.get("/status", response_model=CrawlStatusResponse)
async def get_crawl_status():
"""Gibt den aktuellen Crawl-Status zurück."""
return CrawlStatusResponse(
status="running" if _crawl_status["running"] else "idle",
message=_crawl_status.get("last_run") or "Noch nie gecrawlt",
stats=_crawl_status.get("last_stats")
)
@router.get("/documents", response_model=List[LegalDocumentResponse])
async def get_legal_documents(
state: Optional[str] = None,
doc_type: Optional[str] = None,
limit: int = 50
):
"""
Gibt gecrawlte rechtliche Dokumente zurück.
Args:
state: Filter nach Bundesland (z.B. "NW", "BY")
doc_type: Filter nach Dokumenttyp (z.B. "schulgesetz")
limit: Max. Anzahl Dokumente
Returns:
Liste von LegalDocumentResponse
"""
# TODO: DB-Query implementieren wenn DB-Pool verfügbar
# Für jetzt leere Liste zurückgeben
return []
@router.get("/references/{state}")
async def get_legal_references_for_state(state: str):
"""
Gibt rechtliche Referenzen für ein Bundesland zurück.
Dies ist der Endpoint, den der Communication-Service nutzt.
Args:
state: Bundesland-Kürzel (z.B. "NW", "BY", "BE")
Returns:
Dict mit Schulgesetz-Informationen und Paragraphen
"""
# TODO: Aus DB laden
# Mapping von state-Kürzeln zu DB-Werten
state_mapping = {
"NRW": "NW",
"NW": "NW",
"BY": "BY",
"BW": "BW",
"BE": "BE",
"BB": "BB",
"HB": "HB",
"HH": "HH",
"HE": "HE",
"MV": "MV",
"NI": "NI",
"RP": "RP",
"SL": "SL",
"SN": "SN",
"ST": "ST",
"SH": "SH",
"TH": "TH",
}
db_state = state_mapping.get(state.upper(), state.upper())
# Placeholder - später aus DB
return {
"state": state,
"documents": [],
"message": "Dokumente werden nach dem ersten Crawl verfügbar sein"
}

View File

@@ -0,0 +1,96 @@
"""
Playbooks Route - System Prompt Verwaltung.
"""
import logging
from typing import Optional
from fastapi import APIRouter, HTTPException, Depends
from pydantic import BaseModel
from ..services.playbook_service import get_playbook_service, Playbook
from ..middleware.auth import verify_api_key
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/playbooks", tags=["Playbooks"])
class PlaybookSummary(BaseModel):
"""Zusammenfassung eines Playbooks (ohne System Prompt)."""
id: str
name: str
description: str
prompt_version: str
recommended_models: list[str]
class PlaybookDetail(BaseModel):
"""Vollständige Playbook-Details."""
id: str
name: str
description: str
system_prompt: str
prompt_version: str
recommended_models: list[str]
tool_policy: dict
status: str
class PlaybookListResponse(BaseModel):
"""Response für Playbook-Liste."""
items: list[PlaybookSummary]
@router.get("", response_model=PlaybookListResponse)
async def list_playbooks(
status: Optional[str] = "published",
_: str = Depends(verify_api_key),
):
"""
Liste verfügbarer Playbooks.
Playbooks sind versionierte System-Prompt-Vorlagen für spezifische Schulkontexte.
"""
service = get_playbook_service()
playbooks = service.list_playbooks(status=status)
return PlaybookListResponse(
items=[
PlaybookSummary(
id=p.id,
name=p.name,
description=p.description,
prompt_version=p.prompt_version,
recommended_models=p.recommended_models,
)
for p in playbooks
]
)
@router.get("/{playbook_id}", response_model=PlaybookDetail)
async def get_playbook(
playbook_id: str,
_: str = Depends(verify_api_key),
):
"""
Details zu einem Playbook abrufen.
Enthält den vollständigen System Prompt und Tool-Policies.
"""
service = get_playbook_service()
playbook = service.get_playbook(playbook_id)
if not playbook:
raise HTTPException(status_code=404, detail=f"Playbook {playbook_id} not found")
return PlaybookDetail(
id=playbook.id,
name=playbook.name,
description=playbook.description,
system_prompt=playbook.system_prompt,
prompt_version=playbook.prompt_version,
recommended_models=playbook.recommended_models,
tool_policy=playbook.tool_policy,
status=playbook.status,
)

View File

@@ -0,0 +1,867 @@
"""
Schools API Routes.
CRUD operations for managing German schools (~40,000 schools).
Direct database access to PostgreSQL.
"""
import os
import logging
from typing import Optional, List
from datetime import datetime
from uuid import UUID
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel, Field
import asyncpg
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/schools", tags=["schools"])
# Database connection pool
_pool: Optional[asyncpg.Pool] = None
async def get_db_pool() -> asyncpg.Pool:
"""Get or create database connection pool."""
global _pool
if _pool is None:
database_url = os.environ.get(
"DATABASE_URL",
"postgresql://breakpilot:breakpilot123@postgres:5432/breakpilot_db"
)
_pool = await asyncpg.create_pool(database_url, min_size=2, max_size=10)
return _pool
# =============================================================================
# Pydantic Models
# =============================================================================
class SchoolTypeResponse(BaseModel):
"""School type response model."""
id: str
name: str
name_short: Optional[str] = None
category: Optional[str] = None
description: Optional[str] = None
class SchoolBase(BaseModel):
"""Base school model for creation/update."""
name: str = Field(..., max_length=255)
school_number: Optional[str] = Field(None, max_length=20)
school_type_id: Optional[str] = None
school_type_raw: Optional[str] = None
state: str = Field(..., max_length=10)
district: Optional[str] = None
city: Optional[str] = None
postal_code: Optional[str] = None
street: Optional[str] = None
address_full: Optional[str] = None
latitude: Optional[float] = None
longitude: Optional[float] = None
website: Optional[str] = None
email: Optional[str] = None
phone: Optional[str] = None
fax: Optional[str] = None
principal_name: Optional[str] = None
principal_title: Optional[str] = None
principal_email: Optional[str] = None
principal_phone: Optional[str] = None
secretary_name: Optional[str] = None
secretary_email: Optional[str] = None
secretary_phone: Optional[str] = None
student_count: Optional[int] = None
teacher_count: Optional[int] = None
class_count: Optional[int] = None
founded_year: Optional[int] = None
is_public: bool = True
is_all_day: Optional[bool] = None
has_inclusion: Optional[bool] = None
languages: Optional[List[str]] = None
specializations: Optional[List[str]] = None
source: Optional[str] = None
source_url: Optional[str] = None
class SchoolCreate(SchoolBase):
"""School creation model."""
pass
class SchoolUpdate(BaseModel):
"""School update model (all fields optional)."""
name: Optional[str] = Field(None, max_length=255)
school_number: Optional[str] = None
school_type_id: Optional[str] = None
state: Optional[str] = None
district: Optional[str] = None
city: Optional[str] = None
postal_code: Optional[str] = None
street: Optional[str] = None
website: Optional[str] = None
email: Optional[str] = None
phone: Optional[str] = None
principal_name: Optional[str] = None
student_count: Optional[int] = None
teacher_count: Optional[int] = None
is_active: Optional[bool] = None
class SchoolResponse(BaseModel):
"""School response model."""
id: str
name: str
school_number: Optional[str] = None
school_type: Optional[str] = None
school_type_short: Optional[str] = None
school_category: Optional[str] = None
state: str
district: Optional[str] = None
city: Optional[str] = None
postal_code: Optional[str] = None
street: Optional[str] = None
address_full: Optional[str] = None
latitude: Optional[float] = None
longitude: Optional[float] = None
website: Optional[str] = None
email: Optional[str] = None
phone: Optional[str] = None
fax: Optional[str] = None
principal_name: Optional[str] = None
principal_email: Optional[str] = None
student_count: Optional[int] = None
teacher_count: Optional[int] = None
is_public: bool = True
is_all_day: Optional[bool] = None
staff_count: int = 0
source: Optional[str] = None
crawled_at: Optional[datetime] = None
is_active: bool = True
created_at: datetime
updated_at: datetime
class SchoolsListResponse(BaseModel):
"""List response with pagination info."""
schools: List[SchoolResponse]
total: int
page: int
page_size: int
class SchoolStaffBase(BaseModel):
"""Base school staff model."""
first_name: Optional[str] = None
last_name: str
full_name: Optional[str] = None
title: Optional[str] = None
position: Optional[str] = None
position_type: Optional[str] = None
subjects: Optional[List[str]] = None
email: Optional[str] = None
phone: Optional[str] = None
class SchoolStaffCreate(SchoolStaffBase):
"""School staff creation model."""
school_id: str
class SchoolStaffResponse(SchoolStaffBase):
"""School staff response model."""
id: str
school_id: str
school_name: Optional[str] = None
profile_url: Optional[str] = None
photo_url: Optional[str] = None
is_active: bool = True
created_at: datetime
class SchoolStaffListResponse(BaseModel):
"""Staff list response."""
staff: List[SchoolStaffResponse]
total: int
class SchoolStatsResponse(BaseModel):
"""School statistics response."""
total_schools: int
total_staff: int
schools_by_state: dict
schools_by_type: dict
schools_with_website: int
schools_with_email: int
schools_with_principal: int
total_students: int
total_teachers: int
last_crawl_time: Optional[datetime] = None
class BulkImportRequest(BaseModel):
"""Bulk import request."""
schools: List[SchoolCreate]
class BulkImportResponse(BaseModel):
"""Bulk import response."""
imported: int
updated: int
skipped: int
errors: List[str]
# =============================================================================
# School Type Endpoints
# =============================================================================
@router.get("/types", response_model=List[SchoolTypeResponse])
async def list_school_types():
"""List all school types."""
pool = await get_db_pool()
async with pool.acquire() as conn:
rows = await conn.fetch("""
SELECT id, name, name_short, category, description
FROM school_types
ORDER BY category, name
""")
return [
SchoolTypeResponse(
id=str(row["id"]),
name=row["name"],
name_short=row["name_short"],
category=row["category"],
description=row["description"],
)
for row in rows
]
# =============================================================================
# School Endpoints
# =============================================================================
@router.get("", response_model=SchoolsListResponse)
async def list_schools(
state: Optional[str] = Query(None, description="Filter by state code (BW, BY, etc.)"),
school_type: Optional[str] = Query(None, description="Filter by school type name"),
city: Optional[str] = Query(None, description="Filter by city"),
district: Optional[str] = Query(None, description="Filter by district"),
postal_code: Optional[str] = Query(None, description="Filter by postal code prefix"),
search: Optional[str] = Query(None, description="Search in name, city"),
has_email: Optional[bool] = Query(None, description="Filter schools with email"),
has_website: Optional[bool] = Query(None, description="Filter schools with website"),
is_public: Optional[bool] = Query(None, description="Filter public/private schools"),
page: int = Query(1, ge=1),
page_size: int = Query(50, ge=1, le=200),
):
"""List schools with optional filtering and pagination."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Build WHERE clause
conditions = ["s.is_active = TRUE"]
params = []
param_idx = 1
if state:
conditions.append(f"s.state = ${param_idx}")
params.append(state.upper())
param_idx += 1
if school_type:
conditions.append(f"st.name = ${param_idx}")
params.append(school_type)
param_idx += 1
if city:
conditions.append(f"LOWER(s.city) = LOWER(${param_idx})")
params.append(city)
param_idx += 1
if district:
conditions.append(f"LOWER(s.district) LIKE LOWER(${param_idx})")
params.append(f"%{district}%")
param_idx += 1
if postal_code:
conditions.append(f"s.postal_code LIKE ${param_idx}")
params.append(f"{postal_code}%")
param_idx += 1
if search:
conditions.append(f"""
(LOWER(s.name) LIKE LOWER(${param_idx})
OR LOWER(s.city) LIKE LOWER(${param_idx})
OR LOWER(s.district) LIKE LOWER(${param_idx}))
""")
params.append(f"%{search}%")
param_idx += 1
if has_email is not None:
if has_email:
conditions.append("s.email IS NOT NULL")
else:
conditions.append("s.email IS NULL")
if has_website is not None:
if has_website:
conditions.append("s.website IS NOT NULL")
else:
conditions.append("s.website IS NULL")
if is_public is not None:
conditions.append(f"s.is_public = ${param_idx}")
params.append(is_public)
param_idx += 1
where_clause = " AND ".join(conditions)
# Count total
count_query = f"""
SELECT COUNT(*) FROM schools s
LEFT JOIN school_types st ON s.school_type_id = st.id
WHERE {where_clause}
"""
total = await conn.fetchval(count_query, *params)
# Fetch schools
offset = (page - 1) * page_size
query = f"""
SELECT
s.id, s.name, s.school_number, s.state, s.district, s.city,
s.postal_code, s.street, s.address_full, s.latitude, s.longitude,
s.website, s.email, s.phone, s.fax,
s.principal_name, s.principal_email,
s.student_count, s.teacher_count,
s.is_public, s.is_all_day, s.source, s.crawled_at,
s.is_active, s.created_at, s.updated_at,
st.name as school_type, st.name_short as school_type_short, st.category as school_category,
(SELECT COUNT(*) FROM school_staff ss WHERE ss.school_id = s.id AND ss.is_active = TRUE) as staff_count
FROM schools s
LEFT JOIN school_types st ON s.school_type_id = st.id
WHERE {where_clause}
ORDER BY s.state, s.city, s.name
LIMIT ${param_idx} OFFSET ${param_idx + 1}
"""
params.extend([page_size, offset])
rows = await conn.fetch(query, *params)
schools = [
SchoolResponse(
id=str(row["id"]),
name=row["name"],
school_number=row["school_number"],
school_type=row["school_type"],
school_type_short=row["school_type_short"],
school_category=row["school_category"],
state=row["state"],
district=row["district"],
city=row["city"],
postal_code=row["postal_code"],
street=row["street"],
address_full=row["address_full"],
latitude=row["latitude"],
longitude=row["longitude"],
website=row["website"],
email=row["email"],
phone=row["phone"],
fax=row["fax"],
principal_name=row["principal_name"],
principal_email=row["principal_email"],
student_count=row["student_count"],
teacher_count=row["teacher_count"],
is_public=row["is_public"],
is_all_day=row["is_all_day"],
staff_count=row["staff_count"],
source=row["source"],
crawled_at=row["crawled_at"],
is_active=row["is_active"],
created_at=row["created_at"],
updated_at=row["updated_at"],
)
for row in rows
]
return SchoolsListResponse(
schools=schools,
total=total,
page=page,
page_size=page_size,
)
@router.get("/stats", response_model=SchoolStatsResponse)
async def get_school_stats():
"""Get school statistics."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Total schools and staff
totals = await conn.fetchrow("""
SELECT
(SELECT COUNT(*) FROM schools WHERE is_active = TRUE) as total_schools,
(SELECT COUNT(*) FROM school_staff WHERE is_active = TRUE) as total_staff,
(SELECT COUNT(*) FROM schools WHERE is_active = TRUE AND website IS NOT NULL) as with_website,
(SELECT COUNT(*) FROM schools WHERE is_active = TRUE AND email IS NOT NULL) as with_email,
(SELECT COUNT(*) FROM schools WHERE is_active = TRUE AND principal_name IS NOT NULL) as with_principal,
(SELECT COALESCE(SUM(student_count), 0) FROM schools WHERE is_active = TRUE) as total_students,
(SELECT COALESCE(SUM(teacher_count), 0) FROM schools WHERE is_active = TRUE) as total_teachers,
(SELECT MAX(crawled_at) FROM schools) as last_crawl
""")
# By state
state_rows = await conn.fetch("""
SELECT state, COUNT(*) as count
FROM schools
WHERE is_active = TRUE
GROUP BY state
ORDER BY state
""")
schools_by_state = {row["state"]: row["count"] for row in state_rows}
# By type
type_rows = await conn.fetch("""
SELECT COALESCE(st.name, 'Unbekannt') as type_name, COUNT(*) as count
FROM schools s
LEFT JOIN school_types st ON s.school_type_id = st.id
WHERE s.is_active = TRUE
GROUP BY st.name
ORDER BY count DESC
""")
schools_by_type = {row["type_name"]: row["count"] for row in type_rows}
return SchoolStatsResponse(
total_schools=totals["total_schools"],
total_staff=totals["total_staff"],
schools_by_state=schools_by_state,
schools_by_type=schools_by_type,
schools_with_website=totals["with_website"],
schools_with_email=totals["with_email"],
schools_with_principal=totals["with_principal"],
total_students=totals["total_students"],
total_teachers=totals["total_teachers"],
last_crawl_time=totals["last_crawl"],
)
@router.get("/{school_id}", response_model=SchoolResponse)
async def get_school(school_id: str):
"""Get a single school by ID."""
pool = await get_db_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow("""
SELECT
s.id, s.name, s.school_number, s.state, s.district, s.city,
s.postal_code, s.street, s.address_full, s.latitude, s.longitude,
s.website, s.email, s.phone, s.fax,
s.principal_name, s.principal_email,
s.student_count, s.teacher_count,
s.is_public, s.is_all_day, s.source, s.crawled_at,
s.is_active, s.created_at, s.updated_at,
st.name as school_type, st.name_short as school_type_short, st.category as school_category,
(SELECT COUNT(*) FROM school_staff ss WHERE ss.school_id = s.id AND ss.is_active = TRUE) as staff_count
FROM schools s
LEFT JOIN school_types st ON s.school_type_id = st.id
WHERE s.id = $1
""", school_id)
if not row:
raise HTTPException(status_code=404, detail="School not found")
return SchoolResponse(
id=str(row["id"]),
name=row["name"],
school_number=row["school_number"],
school_type=row["school_type"],
school_type_short=row["school_type_short"],
school_category=row["school_category"],
state=row["state"],
district=row["district"],
city=row["city"],
postal_code=row["postal_code"],
street=row["street"],
address_full=row["address_full"],
latitude=row["latitude"],
longitude=row["longitude"],
website=row["website"],
email=row["email"],
phone=row["phone"],
fax=row["fax"],
principal_name=row["principal_name"],
principal_email=row["principal_email"],
student_count=row["student_count"],
teacher_count=row["teacher_count"],
is_public=row["is_public"],
is_all_day=row["is_all_day"],
staff_count=row["staff_count"],
source=row["source"],
crawled_at=row["crawled_at"],
is_active=row["is_active"],
created_at=row["created_at"],
updated_at=row["updated_at"],
)
@router.post("/bulk-import", response_model=BulkImportResponse)
async def bulk_import_schools(request: BulkImportRequest):
"""Bulk import schools. Updates existing schools based on school_number + state."""
pool = await get_db_pool()
imported = 0
updated = 0
skipped = 0
errors = []
async with pool.acquire() as conn:
# Get school type mapping
type_rows = await conn.fetch("SELECT id, name FROM school_types")
type_map = {row["name"].lower(): str(row["id"]) for row in type_rows}
for school in request.schools:
try:
# Find school type ID
school_type_id = None
if school.school_type_raw:
school_type_id = type_map.get(school.school_type_raw.lower())
# Check if school exists (by school_number + state, or by name + city + state)
existing = None
if school.school_number:
existing = await conn.fetchrow(
"SELECT id FROM schools WHERE school_number = $1 AND state = $2",
school.school_number, school.state
)
if not existing and school.city:
existing = await conn.fetchrow(
"SELECT id FROM schools WHERE LOWER(name) = LOWER($1) AND LOWER(city) = LOWER($2) AND state = $3",
school.name, school.city, school.state
)
if existing:
# Update existing school
await conn.execute("""
UPDATE schools SET
name = $2,
school_type_id = COALESCE($3, school_type_id),
school_type_raw = COALESCE($4, school_type_raw),
district = COALESCE($5, district),
city = COALESCE($6, city),
postal_code = COALESCE($7, postal_code),
street = COALESCE($8, street),
address_full = COALESCE($9, address_full),
latitude = COALESCE($10, latitude),
longitude = COALESCE($11, longitude),
website = COALESCE($12, website),
email = COALESCE($13, email),
phone = COALESCE($14, phone),
fax = COALESCE($15, fax),
principal_name = COALESCE($16, principal_name),
principal_title = COALESCE($17, principal_title),
principal_email = COALESCE($18, principal_email),
principal_phone = COALESCE($19, principal_phone),
student_count = COALESCE($20, student_count),
teacher_count = COALESCE($21, teacher_count),
is_public = $22,
source = COALESCE($23, source),
source_url = COALESCE($24, source_url),
updated_at = NOW()
WHERE id = $1
""",
existing["id"],
school.name,
school_type_id,
school.school_type_raw,
school.district,
school.city,
school.postal_code,
school.street,
school.address_full,
school.latitude,
school.longitude,
school.website,
school.email,
school.phone,
school.fax,
school.principal_name,
school.principal_title,
school.principal_email,
school.principal_phone,
school.student_count,
school.teacher_count,
school.is_public,
school.source,
school.source_url,
)
updated += 1
else:
# Insert new school
await conn.execute("""
INSERT INTO schools (
name, school_number, school_type_id, school_type_raw,
state, district, city, postal_code, street, address_full,
latitude, longitude, website, email, phone, fax,
principal_name, principal_title, principal_email, principal_phone,
student_count, teacher_count, is_public,
source, source_url, crawled_at
) VALUES (
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10,
$11, $12, $13, $14, $15, $16, $17, $18, $19, $20,
$21, $22, $23, $24, $25, NOW()
)
""",
school.name,
school.school_number,
school_type_id,
school.school_type_raw,
school.state,
school.district,
school.city,
school.postal_code,
school.street,
school.address_full,
school.latitude,
school.longitude,
school.website,
school.email,
school.phone,
school.fax,
school.principal_name,
school.principal_title,
school.principal_email,
school.principal_phone,
school.student_count,
school.teacher_count,
school.is_public,
school.source,
school.source_url,
)
imported += 1
except Exception as e:
errors.append(f"Error importing {school.name}: {str(e)}")
if len(errors) > 100:
errors.append("... (more errors truncated)")
break
return BulkImportResponse(
imported=imported,
updated=updated,
skipped=skipped,
errors=errors[:100],
)
# =============================================================================
# School Staff Endpoints
# =============================================================================
@router.get("/{school_id}/staff", response_model=SchoolStaffListResponse)
async def get_school_staff(school_id: str):
"""Get staff members for a school."""
pool = await get_db_pool()
async with pool.acquire() as conn:
rows = await conn.fetch("""
SELECT
ss.id, ss.school_id, ss.first_name, ss.last_name, ss.full_name,
ss.title, ss.position, ss.position_type, ss.subjects,
ss.email, ss.phone, ss.profile_url, ss.photo_url,
ss.is_active, ss.created_at,
s.name as school_name
FROM school_staff ss
JOIN schools s ON ss.school_id = s.id
WHERE ss.school_id = $1 AND ss.is_active = TRUE
ORDER BY
CASE ss.position_type
WHEN 'principal' THEN 1
WHEN 'vice_principal' THEN 2
WHEN 'secretary' THEN 3
ELSE 4
END,
ss.last_name
""", school_id)
staff = [
SchoolStaffResponse(
id=str(row["id"]),
school_id=str(row["school_id"]),
school_name=row["school_name"],
first_name=row["first_name"],
last_name=row["last_name"],
full_name=row["full_name"],
title=row["title"],
position=row["position"],
position_type=row["position_type"],
subjects=row["subjects"],
email=row["email"],
phone=row["phone"],
profile_url=row["profile_url"],
photo_url=row["photo_url"],
is_active=row["is_active"],
created_at=row["created_at"],
)
for row in rows
]
return SchoolStaffListResponse(
staff=staff,
total=len(staff),
)
@router.post("/{school_id}/staff", response_model=SchoolStaffResponse)
async def create_school_staff(school_id: str, staff: SchoolStaffBase):
"""Add a staff member to a school."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Verify school exists
school = await conn.fetchrow("SELECT name FROM schools WHERE id = $1", school_id)
if not school:
raise HTTPException(status_code=404, detail="School not found")
# Create full name
full_name = staff.full_name
if not full_name:
parts = []
if staff.title:
parts.append(staff.title)
if staff.first_name:
parts.append(staff.first_name)
parts.append(staff.last_name)
full_name = " ".join(parts)
row = await conn.fetchrow("""
INSERT INTO school_staff (
school_id, first_name, last_name, full_name, title,
position, position_type, subjects, email, phone
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
RETURNING id, created_at
""",
school_id,
staff.first_name,
staff.last_name,
full_name,
staff.title,
staff.position,
staff.position_type,
staff.subjects,
staff.email,
staff.phone,
)
return SchoolStaffResponse(
id=str(row["id"]),
school_id=school_id,
school_name=school["name"],
first_name=staff.first_name,
last_name=staff.last_name,
full_name=full_name,
title=staff.title,
position=staff.position,
position_type=staff.position_type,
subjects=staff.subjects,
email=staff.email,
phone=staff.phone,
is_active=True,
created_at=row["created_at"],
)
# =============================================================================
# Search Endpoints
# =============================================================================
@router.get("/search/staff", response_model=SchoolStaffListResponse)
async def search_school_staff(
q: Optional[str] = Query(None, description="Search query"),
state: Optional[str] = Query(None, description="Filter by state"),
position_type: Optional[str] = Query(None, description="Filter by position type"),
has_email: Optional[bool] = Query(None, description="Only staff with email"),
page: int = Query(1, ge=1),
page_size: int = Query(50, ge=1, le=200),
):
"""Search school staff across all schools."""
pool = await get_db_pool()
async with pool.acquire() as conn:
conditions = ["ss.is_active = TRUE", "s.is_active = TRUE"]
params = []
param_idx = 1
if q:
conditions.append(f"""
(LOWER(ss.full_name) LIKE LOWER(${param_idx})
OR LOWER(ss.last_name) LIKE LOWER(${param_idx})
OR LOWER(s.name) LIKE LOWER(${param_idx}))
""")
params.append(f"%{q}%")
param_idx += 1
if state:
conditions.append(f"s.state = ${param_idx}")
params.append(state.upper())
param_idx += 1
if position_type:
conditions.append(f"ss.position_type = ${param_idx}")
params.append(position_type)
param_idx += 1
if has_email is not None and has_email:
conditions.append("ss.email IS NOT NULL")
where_clause = " AND ".join(conditions)
# Count total
total = await conn.fetchval(f"""
SELECT COUNT(*) FROM school_staff ss
JOIN schools s ON ss.school_id = s.id
WHERE {where_clause}
""", *params)
# Fetch staff
offset = (page - 1) * page_size
rows = await conn.fetch(f"""
SELECT
ss.id, ss.school_id, ss.first_name, ss.last_name, ss.full_name,
ss.title, ss.position, ss.position_type, ss.subjects,
ss.email, ss.phone, ss.profile_url, ss.photo_url,
ss.is_active, ss.created_at,
s.name as school_name
FROM school_staff ss
JOIN schools s ON ss.school_id = s.id
WHERE {where_clause}
ORDER BY ss.last_name, ss.first_name
LIMIT ${param_idx} OFFSET ${param_idx + 1}
""", *params, page_size, offset)
staff = [
SchoolStaffResponse(
id=str(row["id"]),
school_id=str(row["school_id"]),
school_name=row["school_name"],
first_name=row["first_name"],
last_name=row["last_name"],
full_name=row["full_name"],
title=row["title"],
position=row["position"],
position_type=row["position_type"],
subjects=row["subjects"],
email=row["email"],
phone=row["phone"],
profile_url=row["profile_url"],
photo_url=row["photo_url"],
is_active=row["is_active"],
created_at=row["created_at"],
)
for row in rows
]
return SchoolStaffListResponse(
staff=staff,
total=total,
)

View File

@@ -0,0 +1,174 @@
"""
Tool Routes für LLM Gateway.
Bietet API-Endpoints für externe Tools wie Web Search.
"""
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel, Field
from ..middleware.auth import verify_api_key
from ..services.tool_gateway import (
ToolGateway,
get_tool_gateway,
SearchDepth,
TavilyError,
ToolGatewayError,
)
router = APIRouter()
# Request/Response Models
class SearchRequest(BaseModel):
"""Request für Web-Suche."""
query: str = Field(..., min_length=1, max_length=1000, description="Suchanfrage")
search_depth: Optional[SearchDepth] = Field(
default=None,
description="Suchtiefe: basic (schnell) oder advanced (gründlich)",
)
max_results: Optional[int] = Field(
default=None,
ge=1,
le=20,
description="Maximale Anzahl Ergebnisse (1-20)",
)
include_domains: Optional[list[str]] = Field(
default=None,
description="Nur diese Domains durchsuchen",
)
exclude_domains: Optional[list[str]] = Field(
default=None,
description="Diese Domains ausschließen",
)
class SearchResultItem(BaseModel):
"""Ein Suchergebnis."""
title: str
url: str
content: str
score: float
published_date: Optional[str] = None
class SearchResponse(BaseModel):
"""Response für Web-Suche."""
query: str
redacted_query: Optional[str] = Field(
default=None,
description="Redaktierte Query (nur wenn PII gefunden)",
)
results: list[SearchResultItem]
answer: Optional[str] = Field(
default=None,
description="KI-generierte Zusammenfassung der Ergebnisse",
)
pii_detected: bool = Field(
default=False,
description="True wenn PII in der Anfrage erkannt und redaktiert wurde",
)
pii_types: list[str] = Field(
default_factory=list,
description="Liste der erkannten PII-Typen",
)
response_time_ms: int = Field(
default=0,
description="Antwortzeit in Millisekunden",
)
class ToolsHealthResponse(BaseModel):
"""Health-Status der Tools."""
tavily: dict
pii_redaction: dict
@router.post("/search", response_model=SearchResponse)
async def web_search(
request: SearchRequest,
_: str = Depends(verify_api_key),
tool_gateway: ToolGateway = Depends(get_tool_gateway),
):
"""
Führt eine Web-Suche durch.
Die Suchanfrage wird automatisch auf personenbezogene Daten (PII)
geprüft. Gefundene PII werden vor dem Versand an den Suchdienst
redaktiert, um DSGVO-Konformität zu gewährleisten.
**PII-Erkennung umfasst:**
- E-Mail-Adressen
- Telefonnummern
- IBAN/Bankkonten
- Kreditkartennummern
- Sozialversicherungsnummern
- IP-Adressen
- Geburtsdaten
**Beispiel:**
```
POST /llm/tools/search
{
"query": "Schulrecht Bayern Datenschutz",
"max_results": 5
}
```
"""
try:
result = await tool_gateway.search(
query=request.query,
search_depth=request.search_depth,
max_results=request.max_results,
include_domains=request.include_domains,
exclude_domains=request.exclude_domains,
)
return SearchResponse(
query=result.query,
redacted_query=result.redacted_query,
results=[
SearchResultItem(
title=r.title,
url=r.url,
content=r.content,
score=r.score,
published_date=r.published_date,
)
for r in result.results
],
answer=result.answer,
pii_detected=result.pii_detected,
pii_types=result.pii_types,
response_time_ms=result.response_time_ms,
)
except TavilyError as e:
# TavilyError first (more specific, inherits from ToolGatewayError)
raise HTTPException(
status_code=502,
detail=f"Search service error: {e}",
)
except ToolGatewayError as e:
raise HTTPException(
status_code=503,
detail=f"Tool service unavailable: {e}",
)
@router.get("/health", response_model=ToolsHealthResponse)
async def tools_health(
_: str = Depends(verify_api_key),
tool_gateway: ToolGateway = Depends(get_tool_gateway),
):
"""
Prüft den Gesundheitsstatus der Tool-Services.
Gibt Status für jeden konfigurierten Tool-Service zurück:
- Tavily: Web-Suche
- PII Redaction: Datenschutz-Filter
"""
status = await tool_gateway.health_check()
return ToolsHealthResponse(**status)