fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.
This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).
Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
21
backend/llm_gateway/routes/__init__.py
Normal file
21
backend/llm_gateway/routes/__init__.py
Normal file
@@ -0,0 +1,21 @@
|
||||
"""
|
||||
LLM Gateway Routes.
|
||||
"""
|
||||
|
||||
from .chat import router as chat_router
|
||||
from .playbooks import router as playbooks_router
|
||||
from .health import router as health_router
|
||||
from .tools import router as tools_router
|
||||
from .comparison import router as comparison_router
|
||||
from .edu_search_seeds import router as edu_search_seeds_router
|
||||
from .communication import router as communication_router
|
||||
|
||||
__all__ = [
|
||||
"chat_router",
|
||||
"playbooks_router",
|
||||
"health_router",
|
||||
"tools_router",
|
||||
"comparison_router",
|
||||
"edu_search_seeds_router",
|
||||
"communication_router",
|
||||
]
|
||||
112
backend/llm_gateway/routes/chat.py
Normal file
112
backend/llm_gateway/routes/chat.py
Normal file
@@ -0,0 +1,112 @@
|
||||
"""
|
||||
Chat Completions Route - OpenAI-kompatible API.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
from typing import AsyncIterator
|
||||
from fastapi import APIRouter, HTTPException, Depends
|
||||
from fastapi.responses import StreamingResponse
|
||||
|
||||
from ..models.chat import (
|
||||
ChatCompletionRequest,
|
||||
ChatCompletionResponse,
|
||||
ChatMessage,
|
||||
ModelListResponse,
|
||||
)
|
||||
from ..services.inference import get_inference_service, InferenceService
|
||||
from ..services.playbook_service import get_playbook_service, PlaybookService
|
||||
from ..middleware.auth import verify_api_key
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(tags=["LLM"])
|
||||
|
||||
|
||||
def get_services():
|
||||
"""Dependency für Services."""
|
||||
return get_inference_service(), get_playbook_service()
|
||||
|
||||
|
||||
@router.post("/chat/completions", response_model=ChatCompletionResponse)
|
||||
async def chat_completions(
|
||||
request: ChatCompletionRequest,
|
||||
_: str = Depends(verify_api_key),
|
||||
):
|
||||
"""
|
||||
OpenAI-kompatible Chat Completions.
|
||||
|
||||
Unterstützt:
|
||||
- Streaming (stream=true)
|
||||
- Playbook-basierte System Prompts (metadata.playbook_id)
|
||||
- Multiple Models (breakpilot-teacher-8b, claude-3-5-sonnet, etc.)
|
||||
"""
|
||||
inference_service, playbook_service = get_services()
|
||||
|
||||
# Playbook System Prompt injizieren
|
||||
if request.metadata and request.metadata.playbook_id:
|
||||
playbook = playbook_service.get_playbook(request.metadata.playbook_id)
|
||||
if playbook:
|
||||
# System Prompt an den Anfang der Messages einfügen
|
||||
system_msg = ChatMessage(role="system", content=playbook.system_prompt)
|
||||
# Prüfen ob bereits ein System Prompt existiert
|
||||
has_system = any(m.role == "system" for m in request.messages)
|
||||
if not has_system:
|
||||
request.messages.insert(0, system_msg)
|
||||
else:
|
||||
# Playbook Prompt vor bestehenden System Prompt setzen
|
||||
for i, msg in enumerate(request.messages):
|
||||
if msg.role == "system":
|
||||
msg.content = f"{playbook.system_prompt}\n\n{msg.content}"
|
||||
break
|
||||
|
||||
try:
|
||||
if request.stream:
|
||||
return StreamingResponse(
|
||||
stream_response(request, inference_service),
|
||||
media_type="text/event-stream",
|
||||
headers={
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"X-Accel-Buffering": "no",
|
||||
},
|
||||
)
|
||||
else:
|
||||
response = await inference_service.complete(request)
|
||||
return response
|
||||
|
||||
except ValueError as e:
|
||||
logger.error(f"Chat completion error: {e}")
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
except Exception as e:
|
||||
logger.exception(f"Chat completion failed: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
|
||||
async def stream_response(
|
||||
request: ChatCompletionRequest,
|
||||
inference_service: InferenceService,
|
||||
) -> AsyncIterator[str]:
|
||||
"""Generator für SSE Streaming."""
|
||||
try:
|
||||
async for chunk in inference_service.stream(request):
|
||||
data = chunk.model_dump_json()
|
||||
yield f"data: {data}\n\n"
|
||||
yield "data: [DONE]\n\n"
|
||||
except Exception as e:
|
||||
logger.exception(f"Streaming error: {e}")
|
||||
error_data = json.dumps({"error": str(e)})
|
||||
yield f"data: {error_data}\n\n"
|
||||
|
||||
|
||||
@router.get("/models", response_model=ModelListResponse)
|
||||
async def list_models(
|
||||
_: str = Depends(verify_api_key),
|
||||
):
|
||||
"""
|
||||
Liste verfügbarer Modelle.
|
||||
|
||||
Gibt alle konfigurierten Modelle zurück, die aktuell verfügbar sind.
|
||||
"""
|
||||
inference_service = get_inference_service()
|
||||
return await inference_service.list_models()
|
||||
403
backend/llm_gateway/routes/communication.py
Normal file
403
backend/llm_gateway/routes/communication.py
Normal file
@@ -0,0 +1,403 @@
|
||||
"""
|
||||
Communication API Routes.
|
||||
|
||||
API-Endpoints für KI-gestützte Lehrer-Eltern-Kommunikation.
|
||||
Basiert auf den Prinzipien der gewaltfreien Kommunikation (GFK)
|
||||
und deutschen Schulgesetzen.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Optional, List
|
||||
from datetime import datetime
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Depends
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..services.communication_service import (
|
||||
get_communication_service,
|
||||
CommunicationService,
|
||||
CommunicationType,
|
||||
CommunicationTone,
|
||||
)
|
||||
from ..services.inference import InferenceService, get_inference_service
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/communication", tags=["communication"])
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Pydantic Models
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class CommunicationTypeResponse(BaseModel):
|
||||
"""Response für Kommunikationstypen."""
|
||||
value: str
|
||||
label: str
|
||||
|
||||
|
||||
class ToneResponse(BaseModel):
|
||||
"""Response für Tonalitäten."""
|
||||
value: str
|
||||
label: str
|
||||
|
||||
|
||||
class StateResponse(BaseModel):
|
||||
"""Response für Bundesländer."""
|
||||
value: str
|
||||
label: str
|
||||
|
||||
|
||||
class LegalReferenceResponse(BaseModel):
|
||||
"""Response für rechtliche Referenzen."""
|
||||
law: str
|
||||
paragraph: str
|
||||
title: str
|
||||
summary: str
|
||||
relevance: str
|
||||
|
||||
|
||||
class GFKPrincipleResponse(BaseModel):
|
||||
"""Response für GFK-Prinzipien."""
|
||||
principle: str
|
||||
description: str
|
||||
example: str
|
||||
|
||||
|
||||
class GenerateRequest(BaseModel):
|
||||
"""Request für Nachrichtengenerierung."""
|
||||
communication_type: str = Field(..., description="Art der Kommunikation (z.B. 'behavior', 'academic')")
|
||||
tone: str = Field("professional", description="Tonalität (formal, professional, warm, concerned, appreciative)")
|
||||
state: str = Field("NRW", description="Bundesland für rechtliche Referenzen")
|
||||
student_name: str = Field(..., description="Name des Schülers/der Schülerin")
|
||||
parent_name: str = Field(..., description="Name der Eltern (z.B. 'Frau Müller')")
|
||||
situation: str = Field(..., description="Beschreibung der Situation")
|
||||
additional_info: Optional[str] = Field(None, description="Zusätzliche Informationen")
|
||||
|
||||
|
||||
class GenerateResponse(BaseModel):
|
||||
"""Response für generierte Nachrichten."""
|
||||
message: str
|
||||
subject: str
|
||||
validation: dict
|
||||
legal_references: List[LegalReferenceResponse]
|
||||
gfk_principles: List[GFKPrincipleResponse]
|
||||
|
||||
|
||||
class ValidateRequest(BaseModel):
|
||||
"""Request für Textvalidierung."""
|
||||
text: str = Field(..., description="Der zu validierende Text")
|
||||
|
||||
|
||||
class ValidateResponse(BaseModel):
|
||||
"""Response für Validierung."""
|
||||
is_valid: bool
|
||||
issues: List[str]
|
||||
suggestions: List[str]
|
||||
positive_elements: List[str]
|
||||
gfk_score: float
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Endpoints
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@router.get("/types", response_model=List[CommunicationTypeResponse])
|
||||
async def get_communication_types():
|
||||
"""
|
||||
Gibt alle verfügbaren Kommunikationstypen zurück.
|
||||
|
||||
Returns:
|
||||
Liste aller Kommunikationstypen mit Wert und Label
|
||||
"""
|
||||
service = get_communication_service()
|
||||
return service.get_all_communication_types()
|
||||
|
||||
|
||||
@router.get("/tones", response_model=List[ToneResponse])
|
||||
async def get_tones():
|
||||
"""
|
||||
Gibt alle verfügbaren Tonalitäten zurück.
|
||||
|
||||
Returns:
|
||||
Liste aller Tonalitäten mit Wert und Label
|
||||
"""
|
||||
service = get_communication_service()
|
||||
return service.get_all_tones()
|
||||
|
||||
|
||||
@router.get("/states", response_model=List[StateResponse])
|
||||
async def get_states():
|
||||
"""
|
||||
Gibt alle verfügbaren Bundesländer zurück.
|
||||
|
||||
Returns:
|
||||
Liste aller Bundesländer mit Wert und Label
|
||||
"""
|
||||
service = get_communication_service()
|
||||
return service.get_states()
|
||||
|
||||
|
||||
@router.get("/legal-references/{state}")
|
||||
async def get_legal_references(state: str):
|
||||
"""
|
||||
Gibt rechtliche Referenzen für ein Bundesland zurück.
|
||||
|
||||
Args:
|
||||
state: Bundesland-Kürzel (z.B. NRW, BY)
|
||||
|
||||
Returns:
|
||||
Rechtliche Referenzen für das Bundesland
|
||||
"""
|
||||
service = get_communication_service()
|
||||
refs = service.get_legal_references(state, "elternpflichten")
|
||||
|
||||
return [
|
||||
LegalReferenceResponse(
|
||||
law=ref.law,
|
||||
paragraph=ref.paragraph,
|
||||
title=ref.title,
|
||||
summary=ref.summary,
|
||||
relevance=ref.relevance
|
||||
)
|
||||
for ref in refs
|
||||
]
|
||||
|
||||
|
||||
@router.get("/gfk-principles", response_model=List[GFKPrincipleResponse])
|
||||
async def get_gfk_principles():
|
||||
"""
|
||||
Gibt die Prinzipien der gewaltfreien Kommunikation zurück.
|
||||
|
||||
Returns:
|
||||
Liste der GFK-Prinzipien mit Beschreibung und Beispielen
|
||||
"""
|
||||
service = get_communication_service()
|
||||
principles = service.get_gfk_guidance(CommunicationType.GENERAL_INFO)
|
||||
|
||||
return [
|
||||
GFKPrincipleResponse(
|
||||
principle=p.principle,
|
||||
description=p.description,
|
||||
example=p.example
|
||||
)
|
||||
for p in principles
|
||||
]
|
||||
|
||||
|
||||
@router.post("/generate", response_model=GenerateResponse)
|
||||
async def generate_communication(request: GenerateRequest):
|
||||
"""
|
||||
Generiert einen Elternbrief basierend auf dem Kontext.
|
||||
|
||||
Args:
|
||||
request: GenerateRequest mit allen nötigen Informationen
|
||||
|
||||
Returns:
|
||||
GenerateResponse mit generiertem Text und Metadaten
|
||||
"""
|
||||
service = get_communication_service()
|
||||
|
||||
# Kommunikationstyp validieren
|
||||
try:
|
||||
comm_type = CommunicationType(request.communication_type)
|
||||
except ValueError:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Ungültiger Kommunikationstyp: {request.communication_type}"
|
||||
)
|
||||
|
||||
# Tonalität validieren
|
||||
try:
|
||||
tone = CommunicationTone(request.tone)
|
||||
except ValueError:
|
||||
tone = CommunicationTone.PROFESSIONAL
|
||||
|
||||
# System- und User-Prompt erstellen
|
||||
system_prompt = service.build_system_prompt(comm_type, request.state, tone)
|
||||
user_prompt = service.build_user_prompt(comm_type, {
|
||||
"student_name": request.student_name,
|
||||
"parent_name": request.parent_name,
|
||||
"situation": request.situation,
|
||||
"additional_info": request.additional_info,
|
||||
})
|
||||
|
||||
# Inference-Service aufrufen
|
||||
try:
|
||||
inference_service = get_inference_service()
|
||||
response = await inference_service.generate(
|
||||
prompt=user_prompt,
|
||||
system_prompt=system_prompt,
|
||||
temperature=0.7, # Etwas kreativ, aber kontrolliert
|
||||
max_tokens=2000,
|
||||
)
|
||||
generated_message = response.get("content", "")
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei der Nachrichtengenerierung: {e}")
|
||||
# Fallback: Vorlage verwenden
|
||||
template = service.get_template(comm_type)
|
||||
generated_message = f"""{template['opening'].format(
|
||||
parent_name=request.parent_name,
|
||||
student_name=request.student_name,
|
||||
topic=request.situation[:50] + '...' if len(request.situation) > 50 else request.situation
|
||||
)}
|
||||
|
||||
{request.situation}
|
||||
|
||||
{template['closing'].format(
|
||||
student_name=request.student_name,
|
||||
legal_reference=f"des Schulgesetzes"
|
||||
)}"""
|
||||
|
||||
# Validierung durchführen
|
||||
validation = service.validate_communication(generated_message)
|
||||
|
||||
# Rechtliche Referenzen holen
|
||||
topic_map = {
|
||||
CommunicationType.ATTENDANCE: "schulpflicht",
|
||||
CommunicationType.BEHAVIOR: "ordnungsmassnahmen",
|
||||
CommunicationType.ACADEMIC: "foerderung",
|
||||
CommunicationType.SPECIAL_NEEDS: "foerderung",
|
||||
}
|
||||
topic = topic_map.get(comm_type, "elternpflichten")
|
||||
legal_refs = service.get_legal_references(request.state, topic)
|
||||
|
||||
# GFK-Prinzipien
|
||||
gfk_principles = service.get_gfk_guidance(comm_type)
|
||||
|
||||
# Betreff generieren
|
||||
template = service.get_template(comm_type)
|
||||
subject = template.get("subject", "Mitteilung der Schule").format(
|
||||
student_name=request.student_name,
|
||||
topic=request.situation[:30] + '...' if len(request.situation) > 30 else request.situation
|
||||
)
|
||||
|
||||
return GenerateResponse(
|
||||
message=generated_message,
|
||||
subject=subject,
|
||||
validation=validation,
|
||||
legal_references=[
|
||||
LegalReferenceResponse(
|
||||
law=ref.law,
|
||||
paragraph=ref.paragraph,
|
||||
title=ref.title,
|
||||
summary=ref.summary,
|
||||
relevance=ref.relevance
|
||||
)
|
||||
for ref in legal_refs
|
||||
],
|
||||
gfk_principles=[
|
||||
GFKPrincipleResponse(
|
||||
principle=p.principle,
|
||||
description=p.description,
|
||||
example=p.example
|
||||
)
|
||||
for p in gfk_principles
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@router.post("/validate", response_model=ValidateResponse)
|
||||
async def validate_communication(request: ValidateRequest):
|
||||
"""
|
||||
Validiert einen Text auf GFK-Konformität.
|
||||
|
||||
Args:
|
||||
request: ValidateRequest mit dem zu prüfenden Text
|
||||
|
||||
Returns:
|
||||
ValidateResponse mit Validierungsergebnissen
|
||||
"""
|
||||
service = get_communication_service()
|
||||
result = service.validate_communication(request.text)
|
||||
|
||||
return ValidateResponse(
|
||||
is_valid=result["is_valid"],
|
||||
issues=result["issues"],
|
||||
suggestions=result["suggestions"],
|
||||
positive_elements=result["positive_elements"],
|
||||
gfk_score=result["gfk_score"]
|
||||
)
|
||||
|
||||
|
||||
@router.post("/improve")
|
||||
async def improve_communication(request: ValidateRequest):
|
||||
"""
|
||||
Verbessert einen bestehenden Text nach GFK-Prinzipien.
|
||||
|
||||
Args:
|
||||
request: ValidateRequest mit dem zu verbessernden Text
|
||||
|
||||
Returns:
|
||||
Verbesserter Text mit Änderungsvorschlägen
|
||||
"""
|
||||
service = get_communication_service()
|
||||
|
||||
# Erst validieren
|
||||
validation = service.validate_communication(request.text)
|
||||
|
||||
if validation["is_valid"] and validation["gfk_score"] >= 0.8:
|
||||
return {
|
||||
"improved_text": request.text,
|
||||
"changes": [],
|
||||
"was_improved": False,
|
||||
"message": "Der Text entspricht bereits den GFK-Prinzipien."
|
||||
}
|
||||
|
||||
# System-Prompt für Verbesserung
|
||||
system_prompt = """Du bist ein Experte für gewaltfreie Kommunikation (GFK) nach Marshall Rosenberg.
|
||||
Deine Aufgabe ist es, einen Elternbrief zu verbessern, sodass er den GFK-Prinzipien entspricht.
|
||||
|
||||
VERBESSERUNGSREGELN:
|
||||
1. Ersetze Bewertungen durch Beobachtungen
|
||||
2. Ersetze "Sie müssen/sollten" durch Ich-Botschaften und Bitten
|
||||
3. Entferne Schuldzuweisungen
|
||||
4. Füge empathische Elemente hinzu
|
||||
5. Behalte den sachlichen Inhalt bei
|
||||
|
||||
Gib den verbesserten Text zurück und erkläre kurz die wichtigsten Änderungen."""
|
||||
|
||||
user_prompt = f"""Bitte verbessere folgenden Elternbrief nach den GFK-Prinzipien:
|
||||
|
||||
---
|
||||
{request.text}
|
||||
---
|
||||
|
||||
Identifizierte Probleme:
|
||||
{', '.join(validation['issues']) if validation['issues'] else 'Keine spezifischen Probleme gefunden, aber GFK-Score könnte verbessert werden.'}
|
||||
|
||||
Vorschläge:
|
||||
{', '.join(validation['suggestions']) if validation['suggestions'] else 'Allgemeine Verbesserungen möglich.'}"""
|
||||
|
||||
try:
|
||||
inference_service = get_inference_service()
|
||||
response = await inference_service.generate(
|
||||
prompt=user_prompt,
|
||||
system_prompt=system_prompt,
|
||||
temperature=0.5,
|
||||
max_tokens=2500,
|
||||
)
|
||||
improved_text = response.get("content", request.text)
|
||||
|
||||
# Nochmal validieren
|
||||
new_validation = service.validate_communication(improved_text)
|
||||
|
||||
return {
|
||||
"improved_text": improved_text,
|
||||
"original_issues": validation["issues"],
|
||||
"was_improved": True,
|
||||
"old_score": validation["gfk_score"],
|
||||
"new_score": new_validation["gfk_score"],
|
||||
"remaining_issues": new_validation["issues"],
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei der Textverbesserung: {e}")
|
||||
return {
|
||||
"improved_text": request.text,
|
||||
"changes": [],
|
||||
"was_improved": False,
|
||||
"error": str(e),
|
||||
"message": "Die automatische Verbesserung ist derzeit nicht verfügbar."
|
||||
}
|
||||
584
backend/llm_gateway/routes/comparison.py
Normal file
584
backend/llm_gateway/routes/comparison.py
Normal file
@@ -0,0 +1,584 @@
|
||||
"""
|
||||
LLM Comparison Route - Vergleicht Antworten verschiedener LLM Backends.
|
||||
|
||||
Dieses Modul ermoeglicht:
|
||||
- Parallele Anfragen an OpenAI, Claude, Self-hosted+Tavily, Self-hosted+EduSearch
|
||||
- Speichern von Vergleichsergebnissen fuer QA
|
||||
- Parameter-Tuning fuer Self-hosted Modelle
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
from pydantic import BaseModel, Field
|
||||
from fastapi import APIRouter, HTTPException, Depends
|
||||
|
||||
from ..models.chat import ChatMessage
|
||||
from ..middleware.auth import verify_api_key
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/comparison", tags=["LLM Comparison"])
|
||||
|
||||
|
||||
class ComparisonRequest(BaseModel):
|
||||
"""Request fuer LLM-Vergleich."""
|
||||
prompt: str = Field(..., description="User prompt (z.B. Lehrer-Frage)")
|
||||
system_prompt: Optional[str] = Field(None, description="Optionaler System Prompt")
|
||||
enable_openai: bool = Field(True, description="OpenAI/ChatGPT aktivieren")
|
||||
enable_claude: bool = Field(True, description="Claude aktivieren")
|
||||
enable_selfhosted_tavily: bool = Field(True, description="Self-hosted + Tavily aktivieren")
|
||||
enable_selfhosted_edusearch: bool = Field(True, description="Self-hosted + EduSearch aktivieren")
|
||||
|
||||
# Parameter fuer Self-hosted Modelle
|
||||
selfhosted_model: str = Field("llama3.2:3b", description="Self-hosted Modell")
|
||||
temperature: float = Field(0.7, ge=0.0, le=2.0, description="Temperature")
|
||||
top_p: float = Field(0.9, ge=0.0, le=1.0, description="Top-p Sampling")
|
||||
max_tokens: int = Field(2048, ge=1, le=8192, description="Max Tokens")
|
||||
|
||||
# Search Parameter
|
||||
search_results_count: int = Field(5, ge=1, le=20, description="Anzahl Suchergebnisse")
|
||||
edu_search_filters: Optional[dict] = Field(None, description="Filter fuer EduSearch")
|
||||
|
||||
|
||||
class LLMResponse(BaseModel):
|
||||
"""Antwort eines einzelnen LLM."""
|
||||
provider: str
|
||||
model: str
|
||||
response: str
|
||||
latency_ms: int
|
||||
tokens_used: Optional[int] = None
|
||||
search_results: Optional[list] = None
|
||||
error: Optional[str] = None
|
||||
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
||||
|
||||
|
||||
class ComparisonResponse(BaseModel):
|
||||
"""Gesamt-Antwort des Vergleichs."""
|
||||
comparison_id: str
|
||||
prompt: str
|
||||
system_prompt: Optional[str]
|
||||
responses: list[LLMResponse]
|
||||
created_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
|
||||
|
||||
class SavedComparison(BaseModel):
|
||||
"""Gespeicherter Vergleich fuer QA."""
|
||||
comparison_id: str
|
||||
prompt: str
|
||||
system_prompt: Optional[str]
|
||||
responses: list[LLMResponse]
|
||||
notes: Optional[str] = None
|
||||
rating: Optional[dict] = None # {"openai": 4, "claude": 5, ...}
|
||||
created_at: datetime
|
||||
created_by: Optional[str] = None
|
||||
|
||||
|
||||
# In-Memory Storage (in Production: Database)
|
||||
_comparisons_store: dict[str, SavedComparison] = {}
|
||||
_system_prompts_store: dict[str, dict] = {
|
||||
"default": {
|
||||
"id": "default",
|
||||
"name": "Standard Lehrer-Assistent",
|
||||
"prompt": """Du bist ein hilfreicher Assistent fuer Lehrkraefte in Deutschland.
|
||||
Deine Aufgaben:
|
||||
- Hilfe bei der Unterrichtsplanung
|
||||
- Erklaerung von Fachinhalten
|
||||
- Erstellung von Arbeitsblaettern und Pruefungen
|
||||
- Beratung zu paedagogischen Methoden
|
||||
|
||||
Antworte immer auf Deutsch und beachte den deutschen Lehrplankontext.""",
|
||||
"created_at": datetime.now(timezone.utc).isoformat(),
|
||||
},
|
||||
"curriculum": {
|
||||
"id": "curriculum",
|
||||
"name": "Lehrplan-Experte",
|
||||
"prompt": """Du bist ein Experte fuer deutsche Lehrplaene und Bildungsstandards.
|
||||
Du kennst:
|
||||
- Lehrplaene aller 16 Bundeslaender
|
||||
- KMK Bildungsstandards
|
||||
- Kompetenzorientierung im deutschen Bildungssystem
|
||||
|
||||
Beziehe dich immer auf konkrete Lehrplanvorgaben wenn moeglich.""",
|
||||
"created_at": datetime.now(timezone.utc).isoformat(),
|
||||
},
|
||||
"worksheet": {
|
||||
"id": "worksheet",
|
||||
"name": "Arbeitsblatt-Generator",
|
||||
"prompt": """Du bist ein spezialisierter Assistent fuer die Erstellung von Arbeitsblaettern.
|
||||
Erstelle didaktisch sinnvolle Aufgaben mit:
|
||||
- Klaren Arbeitsanweisungen
|
||||
- Differenzierungsmoeglichkeiten
|
||||
- Loesungshinweisen
|
||||
|
||||
Format: Markdown mit klarer Struktur.""",
|
||||
"created_at": datetime.now(timezone.utc).isoformat(),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
async def _call_openai(prompt: str, system_prompt: Optional[str]) -> LLMResponse:
|
||||
"""Ruft OpenAI ChatGPT auf."""
|
||||
import os
|
||||
import httpx
|
||||
|
||||
start_time = time.time()
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
|
||||
if not api_key:
|
||||
return LLMResponse(
|
||||
provider="openai",
|
||||
model="gpt-4o-mini",
|
||||
response="",
|
||||
latency_ms=0,
|
||||
error="OPENAI_API_KEY nicht konfiguriert"
|
||||
)
|
||||
|
||||
messages = []
|
||||
if system_prompt:
|
||||
messages.append({"role": "system", "content": system_prompt})
|
||||
messages.append({"role": "user", "content": prompt})
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
response = await client.post(
|
||||
"https://api.openai.com/v1/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": "gpt-4o-mini",
|
||||
"messages": messages,
|
||||
"temperature": 0.7,
|
||||
"max_tokens": 2048,
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
latency_ms = int((time.time() - start_time) * 1000)
|
||||
content = data["choices"][0]["message"]["content"]
|
||||
tokens = data.get("usage", {}).get("total_tokens")
|
||||
|
||||
return LLMResponse(
|
||||
provider="openai",
|
||||
model="gpt-4o-mini",
|
||||
response=content,
|
||||
latency_ms=latency_ms,
|
||||
tokens_used=tokens,
|
||||
)
|
||||
except Exception as e:
|
||||
return LLMResponse(
|
||||
provider="openai",
|
||||
model="gpt-4o-mini",
|
||||
response="",
|
||||
latency_ms=int((time.time() - start_time) * 1000),
|
||||
error=str(e),
|
||||
)
|
||||
|
||||
|
||||
async def _call_claude(prompt: str, system_prompt: Optional[str]) -> LLMResponse:
|
||||
"""Ruft Anthropic Claude auf."""
|
||||
import os
|
||||
|
||||
start_time = time.time()
|
||||
api_key = os.getenv("ANTHROPIC_API_KEY")
|
||||
|
||||
if not api_key:
|
||||
return LLMResponse(
|
||||
provider="claude",
|
||||
model="claude-3-5-sonnet-20241022",
|
||||
response="",
|
||||
latency_ms=0,
|
||||
error="ANTHROPIC_API_KEY nicht konfiguriert"
|
||||
)
|
||||
|
||||
try:
|
||||
import anthropic
|
||||
client = anthropic.AsyncAnthropic(api_key=api_key)
|
||||
|
||||
response = await client.messages.create(
|
||||
model="claude-3-5-sonnet-20241022",
|
||||
max_tokens=2048,
|
||||
system=system_prompt or "",
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
)
|
||||
|
||||
latency_ms = int((time.time() - start_time) * 1000)
|
||||
content = response.content[0].text if response.content else ""
|
||||
tokens = response.usage.input_tokens + response.usage.output_tokens
|
||||
|
||||
return LLMResponse(
|
||||
provider="claude",
|
||||
model="claude-3-5-sonnet-20241022",
|
||||
response=content,
|
||||
latency_ms=latency_ms,
|
||||
tokens_used=tokens,
|
||||
)
|
||||
except Exception as e:
|
||||
return LLMResponse(
|
||||
provider="claude",
|
||||
model="claude-3-5-sonnet-20241022",
|
||||
response="",
|
||||
latency_ms=int((time.time() - start_time) * 1000),
|
||||
error=str(e),
|
||||
)
|
||||
|
||||
|
||||
async def _search_tavily(query: str, count: int = 5) -> list[dict]:
|
||||
"""Sucht mit Tavily API."""
|
||||
import os
|
||||
import httpx
|
||||
|
||||
api_key = os.getenv("TAVILY_API_KEY")
|
||||
if not api_key:
|
||||
return []
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
response = await client.post(
|
||||
"https://api.tavily.com/search",
|
||||
json={
|
||||
"api_key": api_key,
|
||||
"query": query,
|
||||
"max_results": count,
|
||||
"include_domains": [
|
||||
"kmk.org", "bildungsserver.de", "bpb.de",
|
||||
"bayern.de", "nrw.de", "berlin.de",
|
||||
],
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data.get("results", [])
|
||||
except Exception as e:
|
||||
logger.error(f"Tavily search error: {e}")
|
||||
return []
|
||||
|
||||
|
||||
async def _search_edusearch(query: str, count: int = 5, filters: Optional[dict] = None) -> list[dict]:
|
||||
"""Sucht mit EduSearch API."""
|
||||
import os
|
||||
import httpx
|
||||
|
||||
edu_search_url = os.getenv("EDU_SEARCH_URL", "http://edu-search-service:8084")
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
payload = {
|
||||
"q": query,
|
||||
"limit": count,
|
||||
"mode": "keyword",
|
||||
}
|
||||
if filters:
|
||||
payload["filters"] = filters
|
||||
|
||||
response = await client.post(
|
||||
f"{edu_search_url}/v1/search",
|
||||
json=payload,
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
# Formatiere Ergebnisse
|
||||
results = []
|
||||
for r in data.get("results", []):
|
||||
results.append({
|
||||
"title": r.get("title", ""),
|
||||
"url": r.get("url", ""),
|
||||
"content": r.get("snippet", ""),
|
||||
"score": r.get("scores", {}).get("final", 0),
|
||||
})
|
||||
return results
|
||||
except Exception as e:
|
||||
logger.error(f"EduSearch error: {e}")
|
||||
return []
|
||||
|
||||
|
||||
async def _call_selfhosted_with_search(
|
||||
prompt: str,
|
||||
system_prompt: Optional[str],
|
||||
search_provider: str,
|
||||
search_results: list[dict],
|
||||
model: str,
|
||||
temperature: float,
|
||||
top_p: float,
|
||||
max_tokens: int,
|
||||
) -> LLMResponse:
|
||||
"""Ruft Self-hosted LLM mit Suchergebnissen auf."""
|
||||
import os
|
||||
import httpx
|
||||
|
||||
start_time = time.time()
|
||||
ollama_url = os.getenv("OLLAMA_URL", "http://localhost:11434")
|
||||
|
||||
# Baue Kontext aus Suchergebnissen
|
||||
context_parts = []
|
||||
for i, result in enumerate(search_results, 1):
|
||||
context_parts.append(f"[{i}] {result.get('title', 'Untitled')}")
|
||||
context_parts.append(f" URL: {result.get('url', '')}")
|
||||
context_parts.append(f" {result.get('content', '')[:500]}")
|
||||
context_parts.append("")
|
||||
|
||||
search_context = "\n".join(context_parts)
|
||||
|
||||
# Erweitere System Prompt mit Suchergebnissen
|
||||
augmented_system = f"""{system_prompt or ''}
|
||||
|
||||
Du hast Zugriff auf folgende Suchergebnisse aus {"Tavily" if search_provider == "tavily" else "EduSearch (deutsche Bildungsquellen)"}:
|
||||
|
||||
{search_context}
|
||||
|
||||
Nutze diese Quellen um deine Antwort zu unterstuetzen. Zitiere relevante Quellen mit [Nummer]."""
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": augmented_system},
|
||||
{"role": "user", "content": prompt},
|
||||
]
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
response = await client.post(
|
||||
f"{ollama_url}/api/chat",
|
||||
json={
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"stream": False,
|
||||
"options": {
|
||||
"temperature": temperature,
|
||||
"top_p": top_p,
|
||||
"num_predict": max_tokens,
|
||||
},
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
latency_ms = int((time.time() - start_time) * 1000)
|
||||
content = data.get("message", {}).get("content", "")
|
||||
tokens = data.get("prompt_eval_count", 0) + data.get("eval_count", 0)
|
||||
|
||||
return LLMResponse(
|
||||
provider=f"selfhosted_{search_provider}",
|
||||
model=model,
|
||||
response=content,
|
||||
latency_ms=latency_ms,
|
||||
tokens_used=tokens,
|
||||
search_results=search_results,
|
||||
)
|
||||
except Exception as e:
|
||||
return LLMResponse(
|
||||
provider=f"selfhosted_{search_provider}",
|
||||
model=model,
|
||||
response="",
|
||||
latency_ms=int((time.time() - start_time) * 1000),
|
||||
error=str(e),
|
||||
search_results=search_results,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/run", response_model=ComparisonResponse)
|
||||
async def run_comparison(
|
||||
request: ComparisonRequest,
|
||||
_: str = Depends(verify_api_key),
|
||||
):
|
||||
"""
|
||||
Fuehrt LLM-Vergleich durch.
|
||||
|
||||
Sendet den Prompt parallel an alle aktivierten Provider und
|
||||
sammelt die Antworten.
|
||||
"""
|
||||
comparison_id = f"cmp-{uuid.uuid4().hex[:12]}"
|
||||
tasks = []
|
||||
|
||||
# System Prompt vorbereiten
|
||||
system_prompt = request.system_prompt
|
||||
|
||||
# OpenAI
|
||||
if request.enable_openai:
|
||||
tasks.append(("openai", _call_openai(request.prompt, system_prompt)))
|
||||
|
||||
# Claude
|
||||
if request.enable_claude:
|
||||
tasks.append(("claude", _call_claude(request.prompt, system_prompt)))
|
||||
|
||||
# Self-hosted + Tavily
|
||||
if request.enable_selfhosted_tavily:
|
||||
tavily_results = await _search_tavily(request.prompt, request.search_results_count)
|
||||
tasks.append((
|
||||
"selfhosted_tavily",
|
||||
_call_selfhosted_with_search(
|
||||
request.prompt,
|
||||
system_prompt,
|
||||
"tavily",
|
||||
tavily_results,
|
||||
request.selfhosted_model,
|
||||
request.temperature,
|
||||
request.top_p,
|
||||
request.max_tokens,
|
||||
)
|
||||
))
|
||||
|
||||
# Self-hosted + EduSearch
|
||||
if request.enable_selfhosted_edusearch:
|
||||
edu_results = await _search_edusearch(
|
||||
request.prompt,
|
||||
request.search_results_count,
|
||||
request.edu_search_filters,
|
||||
)
|
||||
tasks.append((
|
||||
"selfhosted_edusearch",
|
||||
_call_selfhosted_with_search(
|
||||
request.prompt,
|
||||
system_prompt,
|
||||
"edusearch",
|
||||
edu_results,
|
||||
request.selfhosted_model,
|
||||
request.temperature,
|
||||
request.top_p,
|
||||
request.max_tokens,
|
||||
)
|
||||
))
|
||||
|
||||
# Parallele Ausfuehrung
|
||||
responses = []
|
||||
if tasks:
|
||||
results = await asyncio.gather(*[t[1] for t in tasks], return_exceptions=True)
|
||||
for (name, _), result in zip(tasks, results):
|
||||
if isinstance(result, Exception):
|
||||
responses.append(LLMResponse(
|
||||
provider=name,
|
||||
model="unknown",
|
||||
response="",
|
||||
latency_ms=0,
|
||||
error=str(result),
|
||||
))
|
||||
else:
|
||||
responses.append(result)
|
||||
|
||||
return ComparisonResponse(
|
||||
comparison_id=comparison_id,
|
||||
prompt=request.prompt,
|
||||
system_prompt=system_prompt,
|
||||
responses=responses,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/save/{comparison_id}")
|
||||
async def save_comparison(
|
||||
comparison_id: str,
|
||||
comparison: ComparisonResponse,
|
||||
notes: Optional[str] = None,
|
||||
rating: Optional[dict] = None,
|
||||
_: str = Depends(verify_api_key),
|
||||
):
|
||||
"""Speichert einen Vergleich fuer spaetere Analyse."""
|
||||
saved = SavedComparison(
|
||||
comparison_id=comparison_id,
|
||||
prompt=comparison.prompt,
|
||||
system_prompt=comparison.system_prompt,
|
||||
responses=comparison.responses,
|
||||
notes=notes,
|
||||
rating=rating,
|
||||
created_at=comparison.created_at,
|
||||
)
|
||||
_comparisons_store[comparison_id] = saved
|
||||
return {"status": "saved", "comparison_id": comparison_id}
|
||||
|
||||
|
||||
@router.get("/history")
|
||||
async def get_comparison_history(
|
||||
limit: int = 50,
|
||||
_: str = Depends(verify_api_key),
|
||||
):
|
||||
"""Gibt gespeicherte Vergleiche zurueck."""
|
||||
comparisons = list(_comparisons_store.values())
|
||||
comparisons.sort(key=lambda x: x.created_at, reverse=True)
|
||||
return {"comparisons": comparisons[:limit]}
|
||||
|
||||
|
||||
@router.get("/history/{comparison_id}")
|
||||
async def get_comparison(
|
||||
comparison_id: str,
|
||||
_: str = Depends(verify_api_key),
|
||||
):
|
||||
"""Gibt einen bestimmten Vergleich zurueck."""
|
||||
if comparison_id not in _comparisons_store:
|
||||
raise HTTPException(status_code=404, detail="Vergleich nicht gefunden")
|
||||
return _comparisons_store[comparison_id]
|
||||
|
||||
|
||||
# System Prompt Management
|
||||
|
||||
@router.get("/prompts")
|
||||
async def list_system_prompts(
|
||||
_: str = Depends(verify_api_key),
|
||||
):
|
||||
"""Listet alle gespeicherten System Prompts."""
|
||||
return {"prompts": list(_system_prompts_store.values())}
|
||||
|
||||
|
||||
@router.post("/prompts")
|
||||
async def create_system_prompt(
|
||||
name: str,
|
||||
prompt: str,
|
||||
_: str = Depends(verify_api_key),
|
||||
):
|
||||
"""Erstellt einen neuen System Prompt."""
|
||||
prompt_id = f"sp-{uuid.uuid4().hex[:8]}"
|
||||
_system_prompts_store[prompt_id] = {
|
||||
"id": prompt_id,
|
||||
"name": name,
|
||||
"prompt": prompt,
|
||||
"created_at": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
return {"status": "created", "prompt_id": prompt_id}
|
||||
|
||||
|
||||
@router.put("/prompts/{prompt_id}")
|
||||
async def update_system_prompt(
|
||||
prompt_id: str,
|
||||
name: str,
|
||||
prompt: str,
|
||||
_: str = Depends(verify_api_key),
|
||||
):
|
||||
"""Aktualisiert einen System Prompt."""
|
||||
if prompt_id not in _system_prompts_store:
|
||||
raise HTTPException(status_code=404, detail="System Prompt nicht gefunden")
|
||||
|
||||
_system_prompts_store[prompt_id].update({
|
||||
"name": name,
|
||||
"prompt": prompt,
|
||||
"updated_at": datetime.now(timezone.utc).isoformat(),
|
||||
})
|
||||
return {"status": "updated", "prompt_id": prompt_id}
|
||||
|
||||
|
||||
@router.delete("/prompts/{prompt_id}")
|
||||
async def delete_system_prompt(
|
||||
prompt_id: str,
|
||||
_: str = Depends(verify_api_key),
|
||||
):
|
||||
"""Loescht einen System Prompt."""
|
||||
if prompt_id not in _system_prompts_store:
|
||||
raise HTTPException(status_code=404, detail="System Prompt nicht gefunden")
|
||||
if prompt_id in ["default", "curriculum", "worksheet"]:
|
||||
raise HTTPException(status_code=400, detail="Standard-Prompts koennen nicht geloescht werden")
|
||||
|
||||
del _system_prompts_store[prompt_id]
|
||||
return {"status": "deleted", "prompt_id": prompt_id}
|
||||
|
||||
|
||||
@router.get("/prompts/{prompt_id}")
|
||||
async def get_system_prompt(
|
||||
prompt_id: str,
|
||||
_: str = Depends(verify_api_key),
|
||||
):
|
||||
"""Gibt einen System Prompt zurueck."""
|
||||
if prompt_id not in _system_prompts_store:
|
||||
raise HTTPException(status_code=404, detail="System Prompt nicht gefunden")
|
||||
return _system_prompts_store[prompt_id]
|
||||
710
backend/llm_gateway/routes/edu_search_seeds.py
Normal file
710
backend/llm_gateway/routes/edu_search_seeds.py
Normal file
@@ -0,0 +1,710 @@
|
||||
"""
|
||||
EduSearch Seeds API Routes.
|
||||
|
||||
CRUD operations for managing education search crawler seed URLs.
|
||||
Direct database access to PostgreSQL.
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
from typing import Optional, List
|
||||
from datetime import datetime
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Depends, Query
|
||||
from pydantic import BaseModel, Field, HttpUrl
|
||||
import asyncpg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/edu-search", tags=["edu-search"])
|
||||
|
||||
# Database connection pool
|
||||
_pool: Optional[asyncpg.Pool] = None
|
||||
|
||||
|
||||
async def get_db_pool() -> asyncpg.Pool:
|
||||
"""Get or create database connection pool."""
|
||||
global _pool
|
||||
if _pool is None:
|
||||
database_url = os.environ.get("DATABASE_URL")
|
||||
if not database_url:
|
||||
raise RuntimeError("DATABASE_URL nicht konfiguriert - bitte via Vault oder Umgebungsvariable setzen")
|
||||
_pool = await asyncpg.create_pool(database_url, min_size=2, max_size=10)
|
||||
return _pool
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Pydantic Models
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class CategoryResponse(BaseModel):
|
||||
"""Category response model."""
|
||||
id: str
|
||||
name: str
|
||||
display_name: str
|
||||
description: Optional[str] = None
|
||||
icon: Optional[str] = None
|
||||
sort_order: int
|
||||
is_active: bool
|
||||
|
||||
|
||||
class SeedBase(BaseModel):
|
||||
"""Base seed model for creation/update."""
|
||||
url: str = Field(..., max_length=500)
|
||||
name: str = Field(..., max_length=255)
|
||||
description: Optional[str] = None
|
||||
category_name: Optional[str] = Field(None, description="Category name (federal, states, etc.)")
|
||||
source_type: str = Field("GOV", description="GOV, EDU, UNI, etc.")
|
||||
scope: str = Field("FEDERAL", description="FEDERAL, STATE, etc.")
|
||||
state: Optional[str] = Field(None, max_length=5, description="State code (BW, BY, etc.)")
|
||||
trust_boost: float = Field(0.50, ge=0.0, le=1.0)
|
||||
enabled: bool = True
|
||||
crawl_depth: int = Field(2, ge=1, le=5)
|
||||
crawl_frequency: str = Field("weekly", description="hourly, daily, weekly, monthly")
|
||||
|
||||
|
||||
class SeedCreate(SeedBase):
|
||||
"""Seed creation model."""
|
||||
pass
|
||||
|
||||
|
||||
class SeedUpdate(BaseModel):
|
||||
"""Seed update model (all fields optional)."""
|
||||
url: Optional[str] = Field(None, max_length=500)
|
||||
name: Optional[str] = Field(None, max_length=255)
|
||||
description: Optional[str] = None
|
||||
category_name: Optional[str] = None
|
||||
source_type: Optional[str] = None
|
||||
scope: Optional[str] = None
|
||||
state: Optional[str] = Field(None, max_length=5)
|
||||
trust_boost: Optional[float] = Field(None, ge=0.0, le=1.0)
|
||||
enabled: Optional[bool] = None
|
||||
crawl_depth: Optional[int] = Field(None, ge=1, le=5)
|
||||
crawl_frequency: Optional[str] = None
|
||||
|
||||
|
||||
class SeedResponse(BaseModel):
|
||||
"""Seed response model."""
|
||||
id: str
|
||||
url: str
|
||||
name: str
|
||||
description: Optional[str] = None
|
||||
category: Optional[str] = None
|
||||
category_display_name: Optional[str] = None
|
||||
source_type: str
|
||||
scope: str
|
||||
state: Optional[str] = None
|
||||
trust_boost: float
|
||||
enabled: bool
|
||||
crawl_depth: int
|
||||
crawl_frequency: str
|
||||
last_crawled_at: Optional[datetime] = None
|
||||
last_crawl_status: Optional[str] = None
|
||||
last_crawl_docs: int = 0
|
||||
total_documents: int = 0
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
|
||||
class SeedsListResponse(BaseModel):
|
||||
"""List response with pagination info."""
|
||||
seeds: List[SeedResponse]
|
||||
total: int
|
||||
page: int
|
||||
page_size: int
|
||||
|
||||
|
||||
class StatsResponse(BaseModel):
|
||||
"""Crawl statistics response."""
|
||||
total_seeds: int
|
||||
enabled_seeds: int
|
||||
total_documents: int
|
||||
seeds_by_category: dict
|
||||
seeds_by_state: dict
|
||||
last_crawl_time: Optional[datetime] = None
|
||||
|
||||
|
||||
class BulkImportRequest(BaseModel):
|
||||
"""Bulk import request."""
|
||||
seeds: List[SeedCreate]
|
||||
|
||||
|
||||
class BulkImportResponse(BaseModel):
|
||||
"""Bulk import response."""
|
||||
imported: int
|
||||
skipped: int
|
||||
errors: List[str]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# API Endpoints
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@router.get("/categories", response_model=List[CategoryResponse])
|
||||
async def list_categories():
|
||||
"""List all seed categories."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch("""
|
||||
SELECT id, name, display_name, description, icon, sort_order, is_active
|
||||
FROM edu_search_categories
|
||||
WHERE is_active = TRUE
|
||||
ORDER BY sort_order
|
||||
""")
|
||||
return [
|
||||
CategoryResponse(
|
||||
id=str(row["id"]),
|
||||
name=row["name"],
|
||||
display_name=row["display_name"],
|
||||
description=row["description"],
|
||||
icon=row["icon"],
|
||||
sort_order=row["sort_order"],
|
||||
is_active=row["is_active"],
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
|
||||
@router.get("/seeds", response_model=SeedsListResponse)
|
||||
async def list_seeds(
|
||||
category: Optional[str] = Query(None, description="Filter by category name"),
|
||||
state: Optional[str] = Query(None, description="Filter by state code"),
|
||||
enabled: Optional[bool] = Query(None, description="Filter by enabled status"),
|
||||
search: Optional[str] = Query(None, description="Search in name/url"),
|
||||
page: int = Query(1, ge=1),
|
||||
page_size: int = Query(50, ge=1, le=200),
|
||||
):
|
||||
"""List seeds with optional filtering and pagination."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
# Build WHERE clause
|
||||
conditions = []
|
||||
params = []
|
||||
param_idx = 1
|
||||
|
||||
if category:
|
||||
conditions.append(f"c.name = ${param_idx}")
|
||||
params.append(category)
|
||||
param_idx += 1
|
||||
|
||||
if state:
|
||||
conditions.append(f"s.state = ${param_idx}")
|
||||
params.append(state)
|
||||
param_idx += 1
|
||||
|
||||
if enabled is not None:
|
||||
conditions.append(f"s.enabled = ${param_idx}")
|
||||
params.append(enabled)
|
||||
param_idx += 1
|
||||
|
||||
if search:
|
||||
conditions.append(f"(s.name ILIKE ${param_idx} OR s.url ILIKE ${param_idx})")
|
||||
params.append(f"%{search}%")
|
||||
param_idx += 1
|
||||
|
||||
where_clause = " AND ".join(conditions) if conditions else "TRUE"
|
||||
|
||||
# Count total
|
||||
count_query = f"""
|
||||
SELECT COUNT(*) FROM edu_search_seeds s
|
||||
LEFT JOIN edu_search_categories c ON s.category_id = c.id
|
||||
WHERE {where_clause}
|
||||
"""
|
||||
total = await conn.fetchval(count_query, *params)
|
||||
|
||||
# Get paginated results
|
||||
offset = (page - 1) * page_size
|
||||
params.extend([page_size, offset])
|
||||
|
||||
query = f"""
|
||||
SELECT
|
||||
s.id, s.url, s.name, s.description,
|
||||
c.name as category, c.display_name as category_display_name,
|
||||
s.source_type, s.scope, s.state, s.trust_boost, s.enabled,
|
||||
s.crawl_depth, s.crawl_frequency, s.last_crawled_at,
|
||||
s.last_crawl_status, s.last_crawl_docs, s.total_documents,
|
||||
s.created_at, s.updated_at
|
||||
FROM edu_search_seeds s
|
||||
LEFT JOIN edu_search_categories c ON s.category_id = c.id
|
||||
WHERE {where_clause}
|
||||
ORDER BY c.sort_order, s.name
|
||||
LIMIT ${param_idx} OFFSET ${param_idx + 1}
|
||||
"""
|
||||
|
||||
rows = await conn.fetch(query, *params)
|
||||
|
||||
seeds = [
|
||||
SeedResponse(
|
||||
id=str(row["id"]),
|
||||
url=row["url"],
|
||||
name=row["name"],
|
||||
description=row["description"],
|
||||
category=row["category"],
|
||||
category_display_name=row["category_display_name"],
|
||||
source_type=row["source_type"],
|
||||
scope=row["scope"],
|
||||
state=row["state"],
|
||||
trust_boost=float(row["trust_boost"]),
|
||||
enabled=row["enabled"],
|
||||
crawl_depth=row["crawl_depth"],
|
||||
crawl_frequency=row["crawl_frequency"],
|
||||
last_crawled_at=row["last_crawled_at"],
|
||||
last_crawl_status=row["last_crawl_status"],
|
||||
last_crawl_docs=row["last_crawl_docs"] or 0,
|
||||
total_documents=row["total_documents"] or 0,
|
||||
created_at=row["created_at"],
|
||||
updated_at=row["updated_at"],
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
return SeedsListResponse(
|
||||
seeds=seeds,
|
||||
total=total,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/seeds/{seed_id}", response_model=SeedResponse)
|
||||
async def get_seed(seed_id: str):
|
||||
"""Get a single seed by ID."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow("""
|
||||
SELECT
|
||||
s.id, s.url, s.name, s.description,
|
||||
c.name as category, c.display_name as category_display_name,
|
||||
s.source_type, s.scope, s.state, s.trust_boost, s.enabled,
|
||||
s.crawl_depth, s.crawl_frequency, s.last_crawled_at,
|
||||
s.last_crawl_status, s.last_crawl_docs, s.total_documents,
|
||||
s.created_at, s.updated_at
|
||||
FROM edu_search_seeds s
|
||||
LEFT JOIN edu_search_categories c ON s.category_id = c.id
|
||||
WHERE s.id = $1
|
||||
""", seed_id)
|
||||
|
||||
if not row:
|
||||
raise HTTPException(status_code=404, detail="Seed nicht gefunden")
|
||||
|
||||
return SeedResponse(
|
||||
id=str(row["id"]),
|
||||
url=row["url"],
|
||||
name=row["name"],
|
||||
description=row["description"],
|
||||
category=row["category"],
|
||||
category_display_name=row["category_display_name"],
|
||||
source_type=row["source_type"],
|
||||
scope=row["scope"],
|
||||
state=row["state"],
|
||||
trust_boost=float(row["trust_boost"]),
|
||||
enabled=row["enabled"],
|
||||
crawl_depth=row["crawl_depth"],
|
||||
crawl_frequency=row["crawl_frequency"],
|
||||
last_crawled_at=row["last_crawled_at"],
|
||||
last_crawl_status=row["last_crawl_status"],
|
||||
last_crawl_docs=row["last_crawl_docs"] or 0,
|
||||
total_documents=row["total_documents"] or 0,
|
||||
created_at=row["created_at"],
|
||||
updated_at=row["updated_at"],
|
||||
)
|
||||
|
||||
|
||||
@router.post("/seeds", response_model=SeedResponse, status_code=201)
|
||||
async def create_seed(seed: SeedCreate):
|
||||
"""Create a new seed URL."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
# Get category ID if provided
|
||||
category_id = None
|
||||
if seed.category_name:
|
||||
category_id = await conn.fetchval(
|
||||
"SELECT id FROM edu_search_categories WHERE name = $1",
|
||||
seed.category_name
|
||||
)
|
||||
|
||||
try:
|
||||
row = await conn.fetchrow("""
|
||||
INSERT INTO edu_search_seeds (
|
||||
url, name, description, category_id, source_type, scope,
|
||||
state, trust_boost, enabled, crawl_depth, crawl_frequency
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
|
||||
RETURNING id, created_at, updated_at
|
||||
""",
|
||||
seed.url, seed.name, seed.description, category_id,
|
||||
seed.source_type, seed.scope, seed.state, seed.trust_boost,
|
||||
seed.enabled, seed.crawl_depth, seed.crawl_frequency
|
||||
)
|
||||
except asyncpg.UniqueViolationError:
|
||||
raise HTTPException(status_code=409, detail="URL existiert bereits")
|
||||
|
||||
return SeedResponse(
|
||||
id=str(row["id"]),
|
||||
url=seed.url,
|
||||
name=seed.name,
|
||||
description=seed.description,
|
||||
category=seed.category_name,
|
||||
category_display_name=None,
|
||||
source_type=seed.source_type,
|
||||
scope=seed.scope,
|
||||
state=seed.state,
|
||||
trust_boost=seed.trust_boost,
|
||||
enabled=seed.enabled,
|
||||
crawl_depth=seed.crawl_depth,
|
||||
crawl_frequency=seed.crawl_frequency,
|
||||
last_crawled_at=None,
|
||||
last_crawl_status=None,
|
||||
last_crawl_docs=0,
|
||||
total_documents=0,
|
||||
created_at=row["created_at"],
|
||||
updated_at=row["updated_at"],
|
||||
)
|
||||
|
||||
|
||||
@router.put("/seeds/{seed_id}", response_model=SeedResponse)
|
||||
async def update_seed(seed_id: str, seed: SeedUpdate):
|
||||
"""Update an existing seed."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
# Build update statement dynamically
|
||||
updates = []
|
||||
params = []
|
||||
param_idx = 1
|
||||
|
||||
if seed.url is not None:
|
||||
updates.append(f"url = ${param_idx}")
|
||||
params.append(seed.url)
|
||||
param_idx += 1
|
||||
|
||||
if seed.name is not None:
|
||||
updates.append(f"name = ${param_idx}")
|
||||
params.append(seed.name)
|
||||
param_idx += 1
|
||||
|
||||
if seed.description is not None:
|
||||
updates.append(f"description = ${param_idx}")
|
||||
params.append(seed.description)
|
||||
param_idx += 1
|
||||
|
||||
if seed.category_name is not None:
|
||||
category_id = await conn.fetchval(
|
||||
"SELECT id FROM edu_search_categories WHERE name = $1",
|
||||
seed.category_name
|
||||
)
|
||||
updates.append(f"category_id = ${param_idx}")
|
||||
params.append(category_id)
|
||||
param_idx += 1
|
||||
|
||||
if seed.source_type is not None:
|
||||
updates.append(f"source_type = ${param_idx}")
|
||||
params.append(seed.source_type)
|
||||
param_idx += 1
|
||||
|
||||
if seed.scope is not None:
|
||||
updates.append(f"scope = ${param_idx}")
|
||||
params.append(seed.scope)
|
||||
param_idx += 1
|
||||
|
||||
if seed.state is not None:
|
||||
updates.append(f"state = ${param_idx}")
|
||||
params.append(seed.state)
|
||||
param_idx += 1
|
||||
|
||||
if seed.trust_boost is not None:
|
||||
updates.append(f"trust_boost = ${param_idx}")
|
||||
params.append(seed.trust_boost)
|
||||
param_idx += 1
|
||||
|
||||
if seed.enabled is not None:
|
||||
updates.append(f"enabled = ${param_idx}")
|
||||
params.append(seed.enabled)
|
||||
param_idx += 1
|
||||
|
||||
if seed.crawl_depth is not None:
|
||||
updates.append(f"crawl_depth = ${param_idx}")
|
||||
params.append(seed.crawl_depth)
|
||||
param_idx += 1
|
||||
|
||||
if seed.crawl_frequency is not None:
|
||||
updates.append(f"crawl_frequency = ${param_idx}")
|
||||
params.append(seed.crawl_frequency)
|
||||
param_idx += 1
|
||||
|
||||
if not updates:
|
||||
raise HTTPException(status_code=400, detail="Keine Felder zum Aktualisieren")
|
||||
|
||||
updates.append("updated_at = NOW()")
|
||||
params.append(seed_id)
|
||||
|
||||
query = f"""
|
||||
UPDATE edu_search_seeds
|
||||
SET {", ".join(updates)}
|
||||
WHERE id = ${param_idx}
|
||||
RETURNING id
|
||||
"""
|
||||
|
||||
result = await conn.fetchrow(query, *params)
|
||||
if not result:
|
||||
raise HTTPException(status_code=404, detail="Seed nicht gefunden")
|
||||
|
||||
# Return updated seed
|
||||
return await get_seed(seed_id)
|
||||
|
||||
|
||||
@router.delete("/seeds/{seed_id}")
|
||||
async def delete_seed(seed_id: str):
|
||||
"""Delete a seed."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
result = await conn.execute(
|
||||
"DELETE FROM edu_search_seeds WHERE id = $1",
|
||||
seed_id
|
||||
)
|
||||
if result == "DELETE 0":
|
||||
raise HTTPException(status_code=404, detail="Seed nicht gefunden")
|
||||
|
||||
return {"status": "deleted", "id": seed_id}
|
||||
|
||||
|
||||
@router.post("/seeds/bulk-import", response_model=BulkImportResponse)
|
||||
async def bulk_import_seeds(request: BulkImportRequest):
|
||||
"""Bulk import seeds (skip duplicates)."""
|
||||
pool = await get_db_pool()
|
||||
imported = 0
|
||||
skipped = 0
|
||||
errors = []
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
# Pre-fetch all category IDs
|
||||
categories = {}
|
||||
rows = await conn.fetch("SELECT id, name FROM edu_search_categories")
|
||||
for row in rows:
|
||||
categories[row["name"]] = row["id"]
|
||||
|
||||
for seed in request.seeds:
|
||||
try:
|
||||
category_id = categories.get(seed.category_name) if seed.category_name else None
|
||||
|
||||
await conn.execute("""
|
||||
INSERT INTO edu_search_seeds (
|
||||
url, name, description, category_id, source_type, scope,
|
||||
state, trust_boost, enabled, crawl_depth, crawl_frequency
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
|
||||
ON CONFLICT (url) DO NOTHING
|
||||
""",
|
||||
seed.url, seed.name, seed.description, category_id,
|
||||
seed.source_type, seed.scope, seed.state, seed.trust_boost,
|
||||
seed.enabled, seed.crawl_depth, seed.crawl_frequency
|
||||
)
|
||||
imported += 1
|
||||
except asyncpg.UniqueViolationError:
|
||||
skipped += 1
|
||||
except Exception as e:
|
||||
errors.append(f"{seed.url}: {str(e)}")
|
||||
|
||||
return BulkImportResponse(imported=imported, skipped=skipped, errors=errors)
|
||||
|
||||
|
||||
@router.get("/stats", response_model=StatsResponse)
|
||||
async def get_stats():
|
||||
"""Get crawl statistics."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
# Basic counts
|
||||
total = await conn.fetchval("SELECT COUNT(*) FROM edu_search_seeds")
|
||||
enabled = await conn.fetchval("SELECT COUNT(*) FROM edu_search_seeds WHERE enabled = TRUE")
|
||||
total_docs = await conn.fetchval("SELECT COALESCE(SUM(total_documents), 0) FROM edu_search_seeds")
|
||||
|
||||
# By category
|
||||
cat_rows = await conn.fetch("""
|
||||
SELECT c.name, COUNT(s.id) as count
|
||||
FROM edu_search_categories c
|
||||
LEFT JOIN edu_search_seeds s ON c.id = s.category_id
|
||||
GROUP BY c.name
|
||||
""")
|
||||
by_category = {row["name"]: row["count"] for row in cat_rows}
|
||||
|
||||
# By state
|
||||
state_rows = await conn.fetch("""
|
||||
SELECT COALESCE(state, 'federal') as state, COUNT(*) as count
|
||||
FROM edu_search_seeds
|
||||
GROUP BY state
|
||||
""")
|
||||
by_state = {row["state"]: row["count"] for row in state_rows}
|
||||
|
||||
# Last crawl time
|
||||
last_crawl = await conn.fetchval(
|
||||
"SELECT MAX(last_crawled_at) FROM edu_search_seeds"
|
||||
)
|
||||
|
||||
return StatsResponse(
|
||||
total_seeds=total,
|
||||
enabled_seeds=enabled,
|
||||
total_documents=total_docs,
|
||||
seeds_by_category=by_category,
|
||||
seeds_by_state=by_state,
|
||||
last_crawl_time=last_crawl,
|
||||
)
|
||||
|
||||
|
||||
# Export for external use (edu-search-service)
|
||||
@router.get("/seeds/export/for-crawler")
|
||||
async def export_seeds_for_crawler():
|
||||
"""Export enabled seeds in format suitable for crawler."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch("""
|
||||
SELECT
|
||||
s.url, s.trust_boost, s.source_type, s.scope, s.state,
|
||||
s.crawl_depth, c.name as category
|
||||
FROM edu_search_seeds s
|
||||
LEFT JOIN edu_search_categories c ON s.category_id = c.id
|
||||
WHERE s.enabled = TRUE
|
||||
ORDER BY s.trust_boost DESC
|
||||
""")
|
||||
|
||||
return {
|
||||
"seeds": [
|
||||
{
|
||||
"url": row["url"],
|
||||
"trust": float(row["trust_boost"]),
|
||||
"source": row["source_type"],
|
||||
"scope": row["scope"],
|
||||
"state": row["state"],
|
||||
"depth": row["crawl_depth"],
|
||||
"category": row["category"],
|
||||
}
|
||||
for row in rows
|
||||
],
|
||||
"total": len(rows),
|
||||
"exported_at": datetime.utcnow().isoformat(),
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Crawl Status Feedback (from edu-search-service)
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class CrawlStatusUpdate(BaseModel):
|
||||
"""Crawl status update from edu-search-service."""
|
||||
seed_url: str = Field(..., description="The seed URL that was crawled")
|
||||
status: str = Field(..., description="Crawl status: success, error, partial")
|
||||
documents_crawled: int = Field(0, ge=0, description="Number of documents crawled")
|
||||
error_message: Optional[str] = Field(None, description="Error message if status is error")
|
||||
crawl_duration_seconds: float = Field(0.0, ge=0.0, description="Duration of the crawl in seconds")
|
||||
|
||||
|
||||
class CrawlStatusResponse(BaseModel):
|
||||
"""Response for crawl status update."""
|
||||
success: bool
|
||||
seed_url: str
|
||||
message: str
|
||||
|
||||
|
||||
@router.post("/seeds/crawl-status", response_model=CrawlStatusResponse)
|
||||
async def update_crawl_status(update: CrawlStatusUpdate):
|
||||
"""Update crawl status for a seed URL (called by edu-search-service)."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
# Find the seed by URL
|
||||
seed = await conn.fetchrow(
|
||||
"SELECT id, total_documents FROM edu_search_seeds WHERE url = $1",
|
||||
update.seed_url
|
||||
)
|
||||
|
||||
if not seed:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"Seed nicht gefunden: {update.seed_url}"
|
||||
)
|
||||
|
||||
# Update the seed with crawl status
|
||||
new_total = (seed["total_documents"] or 0) + update.documents_crawled
|
||||
|
||||
await conn.execute("""
|
||||
UPDATE edu_search_seeds
|
||||
SET
|
||||
last_crawled_at = NOW(),
|
||||
last_crawl_status = $2,
|
||||
last_crawl_docs = $3,
|
||||
total_documents = $4,
|
||||
updated_at = NOW()
|
||||
WHERE id = $1
|
||||
""", seed["id"], update.status, update.documents_crawled, new_total)
|
||||
|
||||
logger.info(
|
||||
f"Crawl status updated: {update.seed_url} - "
|
||||
f"status={update.status}, docs={update.documents_crawled}, "
|
||||
f"duration={update.crawl_duration_seconds:.1f}s"
|
||||
)
|
||||
|
||||
return CrawlStatusResponse(
|
||||
success=True,
|
||||
seed_url=update.seed_url,
|
||||
message=f"Status aktualisiert: {update.documents_crawled} Dokumente gecrawlt"
|
||||
)
|
||||
|
||||
|
||||
class BulkCrawlStatusUpdate(BaseModel):
|
||||
"""Bulk crawl status update."""
|
||||
updates: List[CrawlStatusUpdate]
|
||||
|
||||
|
||||
class BulkCrawlStatusResponse(BaseModel):
|
||||
"""Response for bulk crawl status update."""
|
||||
updated: int
|
||||
failed: int
|
||||
errors: List[str]
|
||||
|
||||
|
||||
@router.post("/seeds/crawl-status/bulk", response_model=BulkCrawlStatusResponse)
|
||||
async def bulk_update_crawl_status(request: BulkCrawlStatusUpdate):
|
||||
"""Bulk update crawl status for multiple seeds."""
|
||||
pool = await get_db_pool()
|
||||
updated = 0
|
||||
failed = 0
|
||||
errors = []
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
for update in request.updates:
|
||||
try:
|
||||
seed = await conn.fetchrow(
|
||||
"SELECT id, total_documents FROM edu_search_seeds WHERE url = $1",
|
||||
update.seed_url
|
||||
)
|
||||
|
||||
if not seed:
|
||||
failed += 1
|
||||
errors.append(f"Seed nicht gefunden: {update.seed_url}")
|
||||
continue
|
||||
|
||||
new_total = (seed["total_documents"] or 0) + update.documents_crawled
|
||||
|
||||
await conn.execute("""
|
||||
UPDATE edu_search_seeds
|
||||
SET
|
||||
last_crawled_at = NOW(),
|
||||
last_crawl_status = $2,
|
||||
last_crawl_docs = $3,
|
||||
total_documents = $4,
|
||||
updated_at = NOW()
|
||||
WHERE id = $1
|
||||
""", seed["id"], update.status, update.documents_crawled, new_total)
|
||||
|
||||
updated += 1
|
||||
|
||||
except Exception as e:
|
||||
failed += 1
|
||||
errors.append(f"{update.seed_url}: {str(e)}")
|
||||
|
||||
logger.info(f"Bulk crawl status update: {updated} updated, {failed} failed")
|
||||
|
||||
return BulkCrawlStatusResponse(
|
||||
updated=updated,
|
||||
failed=failed,
|
||||
errors=errors
|
||||
)
|
||||
127
backend/llm_gateway/routes/health.py
Normal file
127
backend/llm_gateway/routes/health.py
Normal file
@@ -0,0 +1,127 @@
|
||||
"""
|
||||
Health Check Route.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from fastapi import APIRouter
|
||||
from pydantic import BaseModel
|
||||
|
||||
from ..config import get_config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(tags=["Health"])
|
||||
|
||||
|
||||
class ComponentStatus(BaseModel):
|
||||
"""Status einer Komponente."""
|
||||
name: str
|
||||
status: str # healthy, degraded, unhealthy
|
||||
message: str = ""
|
||||
|
||||
|
||||
class HealthResponse(BaseModel):
|
||||
"""Health Check Response."""
|
||||
status: str # ok, degraded, error
|
||||
ts: str
|
||||
version: str
|
||||
components: list[ComponentStatus]
|
||||
|
||||
|
||||
@router.get("/health", response_model=HealthResponse)
|
||||
async def health_check():
|
||||
"""
|
||||
Health Check Endpoint.
|
||||
|
||||
Prüft den Status aller Komponenten:
|
||||
- Gateway selbst
|
||||
- LLM Backend Erreichbarkeit
|
||||
- Datenbank (wenn konfiguriert)
|
||||
"""
|
||||
config = get_config()
|
||||
components = []
|
||||
overall_status = "ok"
|
||||
|
||||
# Gateway selbst
|
||||
components.append(ComponentStatus(
|
||||
name="gateway",
|
||||
status="healthy",
|
||||
message="Gateway is running",
|
||||
))
|
||||
|
||||
# Ollama Backend
|
||||
if config.ollama and config.ollama.enabled:
|
||||
try:
|
||||
import httpx
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
response = await client.get(f"{config.ollama.base_url}/api/tags")
|
||||
if response.status_code == 200:
|
||||
components.append(ComponentStatus(
|
||||
name="ollama",
|
||||
status="healthy",
|
||||
message="Ollama is reachable",
|
||||
))
|
||||
else:
|
||||
components.append(ComponentStatus(
|
||||
name="ollama",
|
||||
status="degraded",
|
||||
message=f"Ollama returned status {response.status_code}",
|
||||
))
|
||||
overall_status = "degraded"
|
||||
except Exception as e:
|
||||
components.append(ComponentStatus(
|
||||
name="ollama",
|
||||
status="unhealthy",
|
||||
message=f"Cannot reach Ollama: {str(e)}",
|
||||
))
|
||||
# Nicht critical wenn andere Backends verfügbar
|
||||
if not (config.vllm and config.vllm.enabled) and not (config.anthropic and config.anthropic.enabled):
|
||||
overall_status = "error"
|
||||
|
||||
# vLLM Backend
|
||||
if config.vllm and config.vllm.enabled:
|
||||
try:
|
||||
import httpx
|
||||
headers = {}
|
||||
if config.vllm.api_key:
|
||||
headers["Authorization"] = f"Bearer {config.vllm.api_key}"
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
response = await client.get(
|
||||
f"{config.vllm.base_url}/v1/models",
|
||||
headers=headers,
|
||||
)
|
||||
if response.status_code == 200:
|
||||
components.append(ComponentStatus(
|
||||
name="vllm",
|
||||
status="healthy",
|
||||
message="vLLM is reachable",
|
||||
))
|
||||
else:
|
||||
components.append(ComponentStatus(
|
||||
name="vllm",
|
||||
status="degraded",
|
||||
message=f"vLLM returned status {response.status_code}",
|
||||
))
|
||||
overall_status = "degraded"
|
||||
except Exception as e:
|
||||
components.append(ComponentStatus(
|
||||
name="vllm",
|
||||
status="unhealthy",
|
||||
message=f"Cannot reach vLLM: {str(e)}",
|
||||
))
|
||||
|
||||
# Anthropic Backend
|
||||
if config.anthropic and config.anthropic.enabled:
|
||||
components.append(ComponentStatus(
|
||||
name="anthropic",
|
||||
status="healthy",
|
||||
message="Anthropic API configured (not checked)",
|
||||
))
|
||||
|
||||
return HealthResponse(
|
||||
status=overall_status,
|
||||
ts=datetime.utcnow().isoformat() + "Z",
|
||||
version="0.1.0",
|
||||
components=components,
|
||||
)
|
||||
173
backend/llm_gateway/routes/legal_crawler.py
Normal file
173
backend/llm_gateway/routes/legal_crawler.py
Normal file
@@ -0,0 +1,173 @@
|
||||
"""
|
||||
Legal Crawler API Routes.
|
||||
|
||||
Endpoints für das Crawlen und Abrufen von rechtlichen Bildungsinhalten.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import asyncio
|
||||
from typing import List, Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException, BackgroundTasks
|
||||
from pydantic import BaseModel
|
||||
|
||||
from ..services.legal_crawler import get_legal_crawler, LegalCrawler
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/legal-crawler", tags=["legal-crawler"])
|
||||
|
||||
|
||||
class CrawlStatusResponse(BaseModel):
|
||||
"""Response für Crawl-Status."""
|
||||
status: str
|
||||
message: str
|
||||
stats: Optional[dict] = None
|
||||
|
||||
|
||||
class LegalDocumentResponse(BaseModel):
|
||||
"""Response für ein rechtliches Dokument."""
|
||||
id: str
|
||||
url: str
|
||||
title: str
|
||||
law_name: Optional[str]
|
||||
state: Optional[str]
|
||||
paragraphs: Optional[list]
|
||||
last_crawled_at: Optional[str]
|
||||
|
||||
|
||||
class LegalReferenceFromDB(BaseModel):
|
||||
"""Rechtliche Referenz aus der DB."""
|
||||
law: str
|
||||
url: str
|
||||
state: Optional[str]
|
||||
title: str
|
||||
paragraphs: list
|
||||
|
||||
|
||||
# Globaler Status für laufenden Crawl
|
||||
_crawl_status = {
|
||||
"running": False,
|
||||
"last_run": None,
|
||||
"last_stats": None,
|
||||
}
|
||||
|
||||
|
||||
async def _run_crawl(db_pool):
|
||||
"""Führt den Crawl asynchron durch."""
|
||||
global _crawl_status
|
||||
_crawl_status["running"] = True
|
||||
|
||||
try:
|
||||
crawler = get_legal_crawler()
|
||||
stats = await crawler.crawl_legal_seeds(db_pool)
|
||||
_crawl_status["last_stats"] = stats
|
||||
_crawl_status["last_run"] = "completed"
|
||||
except Exception as e:
|
||||
logger.error(f"Crawl-Fehler: {e}")
|
||||
_crawl_status["last_run"] = f"error: {str(e)}"
|
||||
finally:
|
||||
_crawl_status["running"] = False
|
||||
|
||||
|
||||
@router.post("/start", response_model=CrawlStatusResponse)
|
||||
async def start_crawl(background_tasks: BackgroundTasks):
|
||||
"""
|
||||
Startet einen neuen Crawl für alle Legal-Seeds.
|
||||
|
||||
Der Crawl läuft im Hintergrund und kann über /status abgefragt werden.
|
||||
"""
|
||||
global _crawl_status
|
||||
|
||||
if _crawl_status["running"]:
|
||||
return CrawlStatusResponse(
|
||||
status="already_running",
|
||||
message="Ein Crawl läuft bereits. Bitte warten Sie, bis er abgeschlossen ist."
|
||||
)
|
||||
|
||||
# Hinweis: In Produktion würde hier der DB-Pool übergeben werden
|
||||
# Für jetzt nur Status setzen
|
||||
_crawl_status["running"] = True
|
||||
_crawl_status["last_run"] = "started"
|
||||
|
||||
return CrawlStatusResponse(
|
||||
status="started",
|
||||
message="Crawl wurde gestartet. Nutzen Sie /status um den Fortschritt zu prüfen."
|
||||
)
|
||||
|
||||
|
||||
@router.get("/status", response_model=CrawlStatusResponse)
|
||||
async def get_crawl_status():
|
||||
"""Gibt den aktuellen Crawl-Status zurück."""
|
||||
return CrawlStatusResponse(
|
||||
status="running" if _crawl_status["running"] else "idle",
|
||||
message=_crawl_status.get("last_run") or "Noch nie gecrawlt",
|
||||
stats=_crawl_status.get("last_stats")
|
||||
)
|
||||
|
||||
|
||||
@router.get("/documents", response_model=List[LegalDocumentResponse])
|
||||
async def get_legal_documents(
|
||||
state: Optional[str] = None,
|
||||
doc_type: Optional[str] = None,
|
||||
limit: int = 50
|
||||
):
|
||||
"""
|
||||
Gibt gecrawlte rechtliche Dokumente zurück.
|
||||
|
||||
Args:
|
||||
state: Filter nach Bundesland (z.B. "NW", "BY")
|
||||
doc_type: Filter nach Dokumenttyp (z.B. "schulgesetz")
|
||||
limit: Max. Anzahl Dokumente
|
||||
|
||||
Returns:
|
||||
Liste von LegalDocumentResponse
|
||||
"""
|
||||
# TODO: DB-Query implementieren wenn DB-Pool verfügbar
|
||||
# Für jetzt leere Liste zurückgeben
|
||||
return []
|
||||
|
||||
|
||||
@router.get("/references/{state}")
|
||||
async def get_legal_references_for_state(state: str):
|
||||
"""
|
||||
Gibt rechtliche Referenzen für ein Bundesland zurück.
|
||||
|
||||
Dies ist der Endpoint, den der Communication-Service nutzt.
|
||||
|
||||
Args:
|
||||
state: Bundesland-Kürzel (z.B. "NW", "BY", "BE")
|
||||
|
||||
Returns:
|
||||
Dict mit Schulgesetz-Informationen und Paragraphen
|
||||
"""
|
||||
# TODO: Aus DB laden
|
||||
# Mapping von state-Kürzeln zu DB-Werten
|
||||
state_mapping = {
|
||||
"NRW": "NW",
|
||||
"NW": "NW",
|
||||
"BY": "BY",
|
||||
"BW": "BW",
|
||||
"BE": "BE",
|
||||
"BB": "BB",
|
||||
"HB": "HB",
|
||||
"HH": "HH",
|
||||
"HE": "HE",
|
||||
"MV": "MV",
|
||||
"NI": "NI",
|
||||
"RP": "RP",
|
||||
"SL": "SL",
|
||||
"SN": "SN",
|
||||
"ST": "ST",
|
||||
"SH": "SH",
|
||||
"TH": "TH",
|
||||
}
|
||||
|
||||
db_state = state_mapping.get(state.upper(), state.upper())
|
||||
|
||||
# Placeholder - später aus DB
|
||||
return {
|
||||
"state": state,
|
||||
"documents": [],
|
||||
"message": "Dokumente werden nach dem ersten Crawl verfügbar sein"
|
||||
}
|
||||
96
backend/llm_gateway/routes/playbooks.py
Normal file
96
backend/llm_gateway/routes/playbooks.py
Normal file
@@ -0,0 +1,96 @@
|
||||
"""
|
||||
Playbooks Route - System Prompt Verwaltung.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Optional
|
||||
from fastapi import APIRouter, HTTPException, Depends
|
||||
from pydantic import BaseModel
|
||||
|
||||
from ..services.playbook_service import get_playbook_service, Playbook
|
||||
from ..middleware.auth import verify_api_key
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/playbooks", tags=["Playbooks"])
|
||||
|
||||
|
||||
class PlaybookSummary(BaseModel):
|
||||
"""Zusammenfassung eines Playbooks (ohne System Prompt)."""
|
||||
id: str
|
||||
name: str
|
||||
description: str
|
||||
prompt_version: str
|
||||
recommended_models: list[str]
|
||||
|
||||
|
||||
class PlaybookDetail(BaseModel):
|
||||
"""Vollständige Playbook-Details."""
|
||||
id: str
|
||||
name: str
|
||||
description: str
|
||||
system_prompt: str
|
||||
prompt_version: str
|
||||
recommended_models: list[str]
|
||||
tool_policy: dict
|
||||
status: str
|
||||
|
||||
|
||||
class PlaybookListResponse(BaseModel):
|
||||
"""Response für Playbook-Liste."""
|
||||
items: list[PlaybookSummary]
|
||||
|
||||
|
||||
@router.get("", response_model=PlaybookListResponse)
|
||||
async def list_playbooks(
|
||||
status: Optional[str] = "published",
|
||||
_: str = Depends(verify_api_key),
|
||||
):
|
||||
"""
|
||||
Liste verfügbarer Playbooks.
|
||||
|
||||
Playbooks sind versionierte System-Prompt-Vorlagen für spezifische Schulkontexte.
|
||||
"""
|
||||
service = get_playbook_service()
|
||||
playbooks = service.list_playbooks(status=status)
|
||||
|
||||
return PlaybookListResponse(
|
||||
items=[
|
||||
PlaybookSummary(
|
||||
id=p.id,
|
||||
name=p.name,
|
||||
description=p.description,
|
||||
prompt_version=p.prompt_version,
|
||||
recommended_models=p.recommended_models,
|
||||
)
|
||||
for p in playbooks
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{playbook_id}", response_model=PlaybookDetail)
|
||||
async def get_playbook(
|
||||
playbook_id: str,
|
||||
_: str = Depends(verify_api_key),
|
||||
):
|
||||
"""
|
||||
Details zu einem Playbook abrufen.
|
||||
|
||||
Enthält den vollständigen System Prompt und Tool-Policies.
|
||||
"""
|
||||
service = get_playbook_service()
|
||||
playbook = service.get_playbook(playbook_id)
|
||||
|
||||
if not playbook:
|
||||
raise HTTPException(status_code=404, detail=f"Playbook {playbook_id} not found")
|
||||
|
||||
return PlaybookDetail(
|
||||
id=playbook.id,
|
||||
name=playbook.name,
|
||||
description=playbook.description,
|
||||
system_prompt=playbook.system_prompt,
|
||||
prompt_version=playbook.prompt_version,
|
||||
recommended_models=playbook.recommended_models,
|
||||
tool_policy=playbook.tool_policy,
|
||||
status=playbook.status,
|
||||
)
|
||||
867
backend/llm_gateway/routes/schools.py
Normal file
867
backend/llm_gateway/routes/schools.py
Normal file
@@ -0,0 +1,867 @@
|
||||
"""
|
||||
Schools API Routes.
|
||||
|
||||
CRUD operations for managing German schools (~40,000 schools).
|
||||
Direct database access to PostgreSQL.
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
from typing import Optional, List
|
||||
from datetime import datetime
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from pydantic import BaseModel, Field
|
||||
import asyncpg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/schools", tags=["schools"])
|
||||
|
||||
# Database connection pool
|
||||
_pool: Optional[asyncpg.Pool] = None
|
||||
|
||||
|
||||
async def get_db_pool() -> asyncpg.Pool:
|
||||
"""Get or create database connection pool."""
|
||||
global _pool
|
||||
if _pool is None:
|
||||
database_url = os.environ.get(
|
||||
"DATABASE_URL",
|
||||
"postgresql://breakpilot:breakpilot123@postgres:5432/breakpilot_db"
|
||||
)
|
||||
_pool = await asyncpg.create_pool(database_url, min_size=2, max_size=10)
|
||||
return _pool
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Pydantic Models
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class SchoolTypeResponse(BaseModel):
|
||||
"""School type response model."""
|
||||
id: str
|
||||
name: str
|
||||
name_short: Optional[str] = None
|
||||
category: Optional[str] = None
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class SchoolBase(BaseModel):
|
||||
"""Base school model for creation/update."""
|
||||
name: str = Field(..., max_length=255)
|
||||
school_number: Optional[str] = Field(None, max_length=20)
|
||||
school_type_id: Optional[str] = None
|
||||
school_type_raw: Optional[str] = None
|
||||
state: str = Field(..., max_length=10)
|
||||
district: Optional[str] = None
|
||||
city: Optional[str] = None
|
||||
postal_code: Optional[str] = None
|
||||
street: Optional[str] = None
|
||||
address_full: Optional[str] = None
|
||||
latitude: Optional[float] = None
|
||||
longitude: Optional[float] = None
|
||||
website: Optional[str] = None
|
||||
email: Optional[str] = None
|
||||
phone: Optional[str] = None
|
||||
fax: Optional[str] = None
|
||||
principal_name: Optional[str] = None
|
||||
principal_title: Optional[str] = None
|
||||
principal_email: Optional[str] = None
|
||||
principal_phone: Optional[str] = None
|
||||
secretary_name: Optional[str] = None
|
||||
secretary_email: Optional[str] = None
|
||||
secretary_phone: Optional[str] = None
|
||||
student_count: Optional[int] = None
|
||||
teacher_count: Optional[int] = None
|
||||
class_count: Optional[int] = None
|
||||
founded_year: Optional[int] = None
|
||||
is_public: bool = True
|
||||
is_all_day: Optional[bool] = None
|
||||
has_inclusion: Optional[bool] = None
|
||||
languages: Optional[List[str]] = None
|
||||
specializations: Optional[List[str]] = None
|
||||
source: Optional[str] = None
|
||||
source_url: Optional[str] = None
|
||||
|
||||
|
||||
class SchoolCreate(SchoolBase):
|
||||
"""School creation model."""
|
||||
pass
|
||||
|
||||
|
||||
class SchoolUpdate(BaseModel):
|
||||
"""School update model (all fields optional)."""
|
||||
name: Optional[str] = Field(None, max_length=255)
|
||||
school_number: Optional[str] = None
|
||||
school_type_id: Optional[str] = None
|
||||
state: Optional[str] = None
|
||||
district: Optional[str] = None
|
||||
city: Optional[str] = None
|
||||
postal_code: Optional[str] = None
|
||||
street: Optional[str] = None
|
||||
website: Optional[str] = None
|
||||
email: Optional[str] = None
|
||||
phone: Optional[str] = None
|
||||
principal_name: Optional[str] = None
|
||||
student_count: Optional[int] = None
|
||||
teacher_count: Optional[int] = None
|
||||
is_active: Optional[bool] = None
|
||||
|
||||
|
||||
class SchoolResponse(BaseModel):
|
||||
"""School response model."""
|
||||
id: str
|
||||
name: str
|
||||
school_number: Optional[str] = None
|
||||
school_type: Optional[str] = None
|
||||
school_type_short: Optional[str] = None
|
||||
school_category: Optional[str] = None
|
||||
state: str
|
||||
district: Optional[str] = None
|
||||
city: Optional[str] = None
|
||||
postal_code: Optional[str] = None
|
||||
street: Optional[str] = None
|
||||
address_full: Optional[str] = None
|
||||
latitude: Optional[float] = None
|
||||
longitude: Optional[float] = None
|
||||
website: Optional[str] = None
|
||||
email: Optional[str] = None
|
||||
phone: Optional[str] = None
|
||||
fax: Optional[str] = None
|
||||
principal_name: Optional[str] = None
|
||||
principal_email: Optional[str] = None
|
||||
student_count: Optional[int] = None
|
||||
teacher_count: Optional[int] = None
|
||||
is_public: bool = True
|
||||
is_all_day: Optional[bool] = None
|
||||
staff_count: int = 0
|
||||
source: Optional[str] = None
|
||||
crawled_at: Optional[datetime] = None
|
||||
is_active: bool = True
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
|
||||
class SchoolsListResponse(BaseModel):
|
||||
"""List response with pagination info."""
|
||||
schools: List[SchoolResponse]
|
||||
total: int
|
||||
page: int
|
||||
page_size: int
|
||||
|
||||
|
||||
class SchoolStaffBase(BaseModel):
|
||||
"""Base school staff model."""
|
||||
first_name: Optional[str] = None
|
||||
last_name: str
|
||||
full_name: Optional[str] = None
|
||||
title: Optional[str] = None
|
||||
position: Optional[str] = None
|
||||
position_type: Optional[str] = None
|
||||
subjects: Optional[List[str]] = None
|
||||
email: Optional[str] = None
|
||||
phone: Optional[str] = None
|
||||
|
||||
|
||||
class SchoolStaffCreate(SchoolStaffBase):
|
||||
"""School staff creation model."""
|
||||
school_id: str
|
||||
|
||||
|
||||
class SchoolStaffResponse(SchoolStaffBase):
|
||||
"""School staff response model."""
|
||||
id: str
|
||||
school_id: str
|
||||
school_name: Optional[str] = None
|
||||
profile_url: Optional[str] = None
|
||||
photo_url: Optional[str] = None
|
||||
is_active: bool = True
|
||||
created_at: datetime
|
||||
|
||||
|
||||
class SchoolStaffListResponse(BaseModel):
|
||||
"""Staff list response."""
|
||||
staff: List[SchoolStaffResponse]
|
||||
total: int
|
||||
|
||||
|
||||
class SchoolStatsResponse(BaseModel):
|
||||
"""School statistics response."""
|
||||
total_schools: int
|
||||
total_staff: int
|
||||
schools_by_state: dict
|
||||
schools_by_type: dict
|
||||
schools_with_website: int
|
||||
schools_with_email: int
|
||||
schools_with_principal: int
|
||||
total_students: int
|
||||
total_teachers: int
|
||||
last_crawl_time: Optional[datetime] = None
|
||||
|
||||
|
||||
class BulkImportRequest(BaseModel):
|
||||
"""Bulk import request."""
|
||||
schools: List[SchoolCreate]
|
||||
|
||||
|
||||
class BulkImportResponse(BaseModel):
|
||||
"""Bulk import response."""
|
||||
imported: int
|
||||
updated: int
|
||||
skipped: int
|
||||
errors: List[str]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# School Type Endpoints
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@router.get("/types", response_model=List[SchoolTypeResponse])
|
||||
async def list_school_types():
|
||||
"""List all school types."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch("""
|
||||
SELECT id, name, name_short, category, description
|
||||
FROM school_types
|
||||
ORDER BY category, name
|
||||
""")
|
||||
return [
|
||||
SchoolTypeResponse(
|
||||
id=str(row["id"]),
|
||||
name=row["name"],
|
||||
name_short=row["name_short"],
|
||||
category=row["category"],
|
||||
description=row["description"],
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# School Endpoints
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@router.get("", response_model=SchoolsListResponse)
|
||||
async def list_schools(
|
||||
state: Optional[str] = Query(None, description="Filter by state code (BW, BY, etc.)"),
|
||||
school_type: Optional[str] = Query(None, description="Filter by school type name"),
|
||||
city: Optional[str] = Query(None, description="Filter by city"),
|
||||
district: Optional[str] = Query(None, description="Filter by district"),
|
||||
postal_code: Optional[str] = Query(None, description="Filter by postal code prefix"),
|
||||
search: Optional[str] = Query(None, description="Search in name, city"),
|
||||
has_email: Optional[bool] = Query(None, description="Filter schools with email"),
|
||||
has_website: Optional[bool] = Query(None, description="Filter schools with website"),
|
||||
is_public: Optional[bool] = Query(None, description="Filter public/private schools"),
|
||||
page: int = Query(1, ge=1),
|
||||
page_size: int = Query(50, ge=1, le=200),
|
||||
):
|
||||
"""List schools with optional filtering and pagination."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
# Build WHERE clause
|
||||
conditions = ["s.is_active = TRUE"]
|
||||
params = []
|
||||
param_idx = 1
|
||||
|
||||
if state:
|
||||
conditions.append(f"s.state = ${param_idx}")
|
||||
params.append(state.upper())
|
||||
param_idx += 1
|
||||
|
||||
if school_type:
|
||||
conditions.append(f"st.name = ${param_idx}")
|
||||
params.append(school_type)
|
||||
param_idx += 1
|
||||
|
||||
if city:
|
||||
conditions.append(f"LOWER(s.city) = LOWER(${param_idx})")
|
||||
params.append(city)
|
||||
param_idx += 1
|
||||
|
||||
if district:
|
||||
conditions.append(f"LOWER(s.district) LIKE LOWER(${param_idx})")
|
||||
params.append(f"%{district}%")
|
||||
param_idx += 1
|
||||
|
||||
if postal_code:
|
||||
conditions.append(f"s.postal_code LIKE ${param_idx}")
|
||||
params.append(f"{postal_code}%")
|
||||
param_idx += 1
|
||||
|
||||
if search:
|
||||
conditions.append(f"""
|
||||
(LOWER(s.name) LIKE LOWER(${param_idx})
|
||||
OR LOWER(s.city) LIKE LOWER(${param_idx})
|
||||
OR LOWER(s.district) LIKE LOWER(${param_idx}))
|
||||
""")
|
||||
params.append(f"%{search}%")
|
||||
param_idx += 1
|
||||
|
||||
if has_email is not None:
|
||||
if has_email:
|
||||
conditions.append("s.email IS NOT NULL")
|
||||
else:
|
||||
conditions.append("s.email IS NULL")
|
||||
|
||||
if has_website is not None:
|
||||
if has_website:
|
||||
conditions.append("s.website IS NOT NULL")
|
||||
else:
|
||||
conditions.append("s.website IS NULL")
|
||||
|
||||
if is_public is not None:
|
||||
conditions.append(f"s.is_public = ${param_idx}")
|
||||
params.append(is_public)
|
||||
param_idx += 1
|
||||
|
||||
where_clause = " AND ".join(conditions)
|
||||
|
||||
# Count total
|
||||
count_query = f"""
|
||||
SELECT COUNT(*) FROM schools s
|
||||
LEFT JOIN school_types st ON s.school_type_id = st.id
|
||||
WHERE {where_clause}
|
||||
"""
|
||||
total = await conn.fetchval(count_query, *params)
|
||||
|
||||
# Fetch schools
|
||||
offset = (page - 1) * page_size
|
||||
query = f"""
|
||||
SELECT
|
||||
s.id, s.name, s.school_number, s.state, s.district, s.city,
|
||||
s.postal_code, s.street, s.address_full, s.latitude, s.longitude,
|
||||
s.website, s.email, s.phone, s.fax,
|
||||
s.principal_name, s.principal_email,
|
||||
s.student_count, s.teacher_count,
|
||||
s.is_public, s.is_all_day, s.source, s.crawled_at,
|
||||
s.is_active, s.created_at, s.updated_at,
|
||||
st.name as school_type, st.name_short as school_type_short, st.category as school_category,
|
||||
(SELECT COUNT(*) FROM school_staff ss WHERE ss.school_id = s.id AND ss.is_active = TRUE) as staff_count
|
||||
FROM schools s
|
||||
LEFT JOIN school_types st ON s.school_type_id = st.id
|
||||
WHERE {where_clause}
|
||||
ORDER BY s.state, s.city, s.name
|
||||
LIMIT ${param_idx} OFFSET ${param_idx + 1}
|
||||
"""
|
||||
params.extend([page_size, offset])
|
||||
rows = await conn.fetch(query, *params)
|
||||
|
||||
schools = [
|
||||
SchoolResponse(
|
||||
id=str(row["id"]),
|
||||
name=row["name"],
|
||||
school_number=row["school_number"],
|
||||
school_type=row["school_type"],
|
||||
school_type_short=row["school_type_short"],
|
||||
school_category=row["school_category"],
|
||||
state=row["state"],
|
||||
district=row["district"],
|
||||
city=row["city"],
|
||||
postal_code=row["postal_code"],
|
||||
street=row["street"],
|
||||
address_full=row["address_full"],
|
||||
latitude=row["latitude"],
|
||||
longitude=row["longitude"],
|
||||
website=row["website"],
|
||||
email=row["email"],
|
||||
phone=row["phone"],
|
||||
fax=row["fax"],
|
||||
principal_name=row["principal_name"],
|
||||
principal_email=row["principal_email"],
|
||||
student_count=row["student_count"],
|
||||
teacher_count=row["teacher_count"],
|
||||
is_public=row["is_public"],
|
||||
is_all_day=row["is_all_day"],
|
||||
staff_count=row["staff_count"],
|
||||
source=row["source"],
|
||||
crawled_at=row["crawled_at"],
|
||||
is_active=row["is_active"],
|
||||
created_at=row["created_at"],
|
||||
updated_at=row["updated_at"],
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
return SchoolsListResponse(
|
||||
schools=schools,
|
||||
total=total,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/stats", response_model=SchoolStatsResponse)
|
||||
async def get_school_stats():
|
||||
"""Get school statistics."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
# Total schools and staff
|
||||
totals = await conn.fetchrow("""
|
||||
SELECT
|
||||
(SELECT COUNT(*) FROM schools WHERE is_active = TRUE) as total_schools,
|
||||
(SELECT COUNT(*) FROM school_staff WHERE is_active = TRUE) as total_staff,
|
||||
(SELECT COUNT(*) FROM schools WHERE is_active = TRUE AND website IS NOT NULL) as with_website,
|
||||
(SELECT COUNT(*) FROM schools WHERE is_active = TRUE AND email IS NOT NULL) as with_email,
|
||||
(SELECT COUNT(*) FROM schools WHERE is_active = TRUE AND principal_name IS NOT NULL) as with_principal,
|
||||
(SELECT COALESCE(SUM(student_count), 0) FROM schools WHERE is_active = TRUE) as total_students,
|
||||
(SELECT COALESCE(SUM(teacher_count), 0) FROM schools WHERE is_active = TRUE) as total_teachers,
|
||||
(SELECT MAX(crawled_at) FROM schools) as last_crawl
|
||||
""")
|
||||
|
||||
# By state
|
||||
state_rows = await conn.fetch("""
|
||||
SELECT state, COUNT(*) as count
|
||||
FROM schools
|
||||
WHERE is_active = TRUE
|
||||
GROUP BY state
|
||||
ORDER BY state
|
||||
""")
|
||||
schools_by_state = {row["state"]: row["count"] for row in state_rows}
|
||||
|
||||
# By type
|
||||
type_rows = await conn.fetch("""
|
||||
SELECT COALESCE(st.name, 'Unbekannt') as type_name, COUNT(*) as count
|
||||
FROM schools s
|
||||
LEFT JOIN school_types st ON s.school_type_id = st.id
|
||||
WHERE s.is_active = TRUE
|
||||
GROUP BY st.name
|
||||
ORDER BY count DESC
|
||||
""")
|
||||
schools_by_type = {row["type_name"]: row["count"] for row in type_rows}
|
||||
|
||||
return SchoolStatsResponse(
|
||||
total_schools=totals["total_schools"],
|
||||
total_staff=totals["total_staff"],
|
||||
schools_by_state=schools_by_state,
|
||||
schools_by_type=schools_by_type,
|
||||
schools_with_website=totals["with_website"],
|
||||
schools_with_email=totals["with_email"],
|
||||
schools_with_principal=totals["with_principal"],
|
||||
total_students=totals["total_students"],
|
||||
total_teachers=totals["total_teachers"],
|
||||
last_crawl_time=totals["last_crawl"],
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{school_id}", response_model=SchoolResponse)
|
||||
async def get_school(school_id: str):
|
||||
"""Get a single school by ID."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow("""
|
||||
SELECT
|
||||
s.id, s.name, s.school_number, s.state, s.district, s.city,
|
||||
s.postal_code, s.street, s.address_full, s.latitude, s.longitude,
|
||||
s.website, s.email, s.phone, s.fax,
|
||||
s.principal_name, s.principal_email,
|
||||
s.student_count, s.teacher_count,
|
||||
s.is_public, s.is_all_day, s.source, s.crawled_at,
|
||||
s.is_active, s.created_at, s.updated_at,
|
||||
st.name as school_type, st.name_short as school_type_short, st.category as school_category,
|
||||
(SELECT COUNT(*) FROM school_staff ss WHERE ss.school_id = s.id AND ss.is_active = TRUE) as staff_count
|
||||
FROM schools s
|
||||
LEFT JOIN school_types st ON s.school_type_id = st.id
|
||||
WHERE s.id = $1
|
||||
""", school_id)
|
||||
|
||||
if not row:
|
||||
raise HTTPException(status_code=404, detail="School not found")
|
||||
|
||||
return SchoolResponse(
|
||||
id=str(row["id"]),
|
||||
name=row["name"],
|
||||
school_number=row["school_number"],
|
||||
school_type=row["school_type"],
|
||||
school_type_short=row["school_type_short"],
|
||||
school_category=row["school_category"],
|
||||
state=row["state"],
|
||||
district=row["district"],
|
||||
city=row["city"],
|
||||
postal_code=row["postal_code"],
|
||||
street=row["street"],
|
||||
address_full=row["address_full"],
|
||||
latitude=row["latitude"],
|
||||
longitude=row["longitude"],
|
||||
website=row["website"],
|
||||
email=row["email"],
|
||||
phone=row["phone"],
|
||||
fax=row["fax"],
|
||||
principal_name=row["principal_name"],
|
||||
principal_email=row["principal_email"],
|
||||
student_count=row["student_count"],
|
||||
teacher_count=row["teacher_count"],
|
||||
is_public=row["is_public"],
|
||||
is_all_day=row["is_all_day"],
|
||||
staff_count=row["staff_count"],
|
||||
source=row["source"],
|
||||
crawled_at=row["crawled_at"],
|
||||
is_active=row["is_active"],
|
||||
created_at=row["created_at"],
|
||||
updated_at=row["updated_at"],
|
||||
)
|
||||
|
||||
|
||||
@router.post("/bulk-import", response_model=BulkImportResponse)
|
||||
async def bulk_import_schools(request: BulkImportRequest):
|
||||
"""Bulk import schools. Updates existing schools based on school_number + state."""
|
||||
pool = await get_db_pool()
|
||||
imported = 0
|
||||
updated = 0
|
||||
skipped = 0
|
||||
errors = []
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
# Get school type mapping
|
||||
type_rows = await conn.fetch("SELECT id, name FROM school_types")
|
||||
type_map = {row["name"].lower(): str(row["id"]) for row in type_rows}
|
||||
|
||||
for school in request.schools:
|
||||
try:
|
||||
# Find school type ID
|
||||
school_type_id = None
|
||||
if school.school_type_raw:
|
||||
school_type_id = type_map.get(school.school_type_raw.lower())
|
||||
|
||||
# Check if school exists (by school_number + state, or by name + city + state)
|
||||
existing = None
|
||||
if school.school_number:
|
||||
existing = await conn.fetchrow(
|
||||
"SELECT id FROM schools WHERE school_number = $1 AND state = $2",
|
||||
school.school_number, school.state
|
||||
)
|
||||
if not existing and school.city:
|
||||
existing = await conn.fetchrow(
|
||||
"SELECT id FROM schools WHERE LOWER(name) = LOWER($1) AND LOWER(city) = LOWER($2) AND state = $3",
|
||||
school.name, school.city, school.state
|
||||
)
|
||||
|
||||
if existing:
|
||||
# Update existing school
|
||||
await conn.execute("""
|
||||
UPDATE schools SET
|
||||
name = $2,
|
||||
school_type_id = COALESCE($3, school_type_id),
|
||||
school_type_raw = COALESCE($4, school_type_raw),
|
||||
district = COALESCE($5, district),
|
||||
city = COALESCE($6, city),
|
||||
postal_code = COALESCE($7, postal_code),
|
||||
street = COALESCE($8, street),
|
||||
address_full = COALESCE($9, address_full),
|
||||
latitude = COALESCE($10, latitude),
|
||||
longitude = COALESCE($11, longitude),
|
||||
website = COALESCE($12, website),
|
||||
email = COALESCE($13, email),
|
||||
phone = COALESCE($14, phone),
|
||||
fax = COALESCE($15, fax),
|
||||
principal_name = COALESCE($16, principal_name),
|
||||
principal_title = COALESCE($17, principal_title),
|
||||
principal_email = COALESCE($18, principal_email),
|
||||
principal_phone = COALESCE($19, principal_phone),
|
||||
student_count = COALESCE($20, student_count),
|
||||
teacher_count = COALESCE($21, teacher_count),
|
||||
is_public = $22,
|
||||
source = COALESCE($23, source),
|
||||
source_url = COALESCE($24, source_url),
|
||||
updated_at = NOW()
|
||||
WHERE id = $1
|
||||
""",
|
||||
existing["id"],
|
||||
school.name,
|
||||
school_type_id,
|
||||
school.school_type_raw,
|
||||
school.district,
|
||||
school.city,
|
||||
school.postal_code,
|
||||
school.street,
|
||||
school.address_full,
|
||||
school.latitude,
|
||||
school.longitude,
|
||||
school.website,
|
||||
school.email,
|
||||
school.phone,
|
||||
school.fax,
|
||||
school.principal_name,
|
||||
school.principal_title,
|
||||
school.principal_email,
|
||||
school.principal_phone,
|
||||
school.student_count,
|
||||
school.teacher_count,
|
||||
school.is_public,
|
||||
school.source,
|
||||
school.source_url,
|
||||
)
|
||||
updated += 1
|
||||
else:
|
||||
# Insert new school
|
||||
await conn.execute("""
|
||||
INSERT INTO schools (
|
||||
name, school_number, school_type_id, school_type_raw,
|
||||
state, district, city, postal_code, street, address_full,
|
||||
latitude, longitude, website, email, phone, fax,
|
||||
principal_name, principal_title, principal_email, principal_phone,
|
||||
student_count, teacher_count, is_public,
|
||||
source, source_url, crawled_at
|
||||
) VALUES (
|
||||
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10,
|
||||
$11, $12, $13, $14, $15, $16, $17, $18, $19, $20,
|
||||
$21, $22, $23, $24, $25, NOW()
|
||||
)
|
||||
""",
|
||||
school.name,
|
||||
school.school_number,
|
||||
school_type_id,
|
||||
school.school_type_raw,
|
||||
school.state,
|
||||
school.district,
|
||||
school.city,
|
||||
school.postal_code,
|
||||
school.street,
|
||||
school.address_full,
|
||||
school.latitude,
|
||||
school.longitude,
|
||||
school.website,
|
||||
school.email,
|
||||
school.phone,
|
||||
school.fax,
|
||||
school.principal_name,
|
||||
school.principal_title,
|
||||
school.principal_email,
|
||||
school.principal_phone,
|
||||
school.student_count,
|
||||
school.teacher_count,
|
||||
school.is_public,
|
||||
school.source,
|
||||
school.source_url,
|
||||
)
|
||||
imported += 1
|
||||
|
||||
except Exception as e:
|
||||
errors.append(f"Error importing {school.name}: {str(e)}")
|
||||
if len(errors) > 100:
|
||||
errors.append("... (more errors truncated)")
|
||||
break
|
||||
|
||||
return BulkImportResponse(
|
||||
imported=imported,
|
||||
updated=updated,
|
||||
skipped=skipped,
|
||||
errors=errors[:100],
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# School Staff Endpoints
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@router.get("/{school_id}/staff", response_model=SchoolStaffListResponse)
|
||||
async def get_school_staff(school_id: str):
|
||||
"""Get staff members for a school."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch("""
|
||||
SELECT
|
||||
ss.id, ss.school_id, ss.first_name, ss.last_name, ss.full_name,
|
||||
ss.title, ss.position, ss.position_type, ss.subjects,
|
||||
ss.email, ss.phone, ss.profile_url, ss.photo_url,
|
||||
ss.is_active, ss.created_at,
|
||||
s.name as school_name
|
||||
FROM school_staff ss
|
||||
JOIN schools s ON ss.school_id = s.id
|
||||
WHERE ss.school_id = $1 AND ss.is_active = TRUE
|
||||
ORDER BY
|
||||
CASE ss.position_type
|
||||
WHEN 'principal' THEN 1
|
||||
WHEN 'vice_principal' THEN 2
|
||||
WHEN 'secretary' THEN 3
|
||||
ELSE 4
|
||||
END,
|
||||
ss.last_name
|
||||
""", school_id)
|
||||
|
||||
staff = [
|
||||
SchoolStaffResponse(
|
||||
id=str(row["id"]),
|
||||
school_id=str(row["school_id"]),
|
||||
school_name=row["school_name"],
|
||||
first_name=row["first_name"],
|
||||
last_name=row["last_name"],
|
||||
full_name=row["full_name"],
|
||||
title=row["title"],
|
||||
position=row["position"],
|
||||
position_type=row["position_type"],
|
||||
subjects=row["subjects"],
|
||||
email=row["email"],
|
||||
phone=row["phone"],
|
||||
profile_url=row["profile_url"],
|
||||
photo_url=row["photo_url"],
|
||||
is_active=row["is_active"],
|
||||
created_at=row["created_at"],
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
return SchoolStaffListResponse(
|
||||
staff=staff,
|
||||
total=len(staff),
|
||||
)
|
||||
|
||||
|
||||
@router.post("/{school_id}/staff", response_model=SchoolStaffResponse)
|
||||
async def create_school_staff(school_id: str, staff: SchoolStaffBase):
|
||||
"""Add a staff member to a school."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
# Verify school exists
|
||||
school = await conn.fetchrow("SELECT name FROM schools WHERE id = $1", school_id)
|
||||
if not school:
|
||||
raise HTTPException(status_code=404, detail="School not found")
|
||||
|
||||
# Create full name
|
||||
full_name = staff.full_name
|
||||
if not full_name:
|
||||
parts = []
|
||||
if staff.title:
|
||||
parts.append(staff.title)
|
||||
if staff.first_name:
|
||||
parts.append(staff.first_name)
|
||||
parts.append(staff.last_name)
|
||||
full_name = " ".join(parts)
|
||||
|
||||
row = await conn.fetchrow("""
|
||||
INSERT INTO school_staff (
|
||||
school_id, first_name, last_name, full_name, title,
|
||||
position, position_type, subjects, email, phone
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
|
||||
RETURNING id, created_at
|
||||
""",
|
||||
school_id,
|
||||
staff.first_name,
|
||||
staff.last_name,
|
||||
full_name,
|
||||
staff.title,
|
||||
staff.position,
|
||||
staff.position_type,
|
||||
staff.subjects,
|
||||
staff.email,
|
||||
staff.phone,
|
||||
)
|
||||
|
||||
return SchoolStaffResponse(
|
||||
id=str(row["id"]),
|
||||
school_id=school_id,
|
||||
school_name=school["name"],
|
||||
first_name=staff.first_name,
|
||||
last_name=staff.last_name,
|
||||
full_name=full_name,
|
||||
title=staff.title,
|
||||
position=staff.position,
|
||||
position_type=staff.position_type,
|
||||
subjects=staff.subjects,
|
||||
email=staff.email,
|
||||
phone=staff.phone,
|
||||
is_active=True,
|
||||
created_at=row["created_at"],
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Search Endpoints
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@router.get("/search/staff", response_model=SchoolStaffListResponse)
|
||||
async def search_school_staff(
|
||||
q: Optional[str] = Query(None, description="Search query"),
|
||||
state: Optional[str] = Query(None, description="Filter by state"),
|
||||
position_type: Optional[str] = Query(None, description="Filter by position type"),
|
||||
has_email: Optional[bool] = Query(None, description="Only staff with email"),
|
||||
page: int = Query(1, ge=1),
|
||||
page_size: int = Query(50, ge=1, le=200),
|
||||
):
|
||||
"""Search school staff across all schools."""
|
||||
pool = await get_db_pool()
|
||||
async with pool.acquire() as conn:
|
||||
conditions = ["ss.is_active = TRUE", "s.is_active = TRUE"]
|
||||
params = []
|
||||
param_idx = 1
|
||||
|
||||
if q:
|
||||
conditions.append(f"""
|
||||
(LOWER(ss.full_name) LIKE LOWER(${param_idx})
|
||||
OR LOWER(ss.last_name) LIKE LOWER(${param_idx})
|
||||
OR LOWER(s.name) LIKE LOWER(${param_idx}))
|
||||
""")
|
||||
params.append(f"%{q}%")
|
||||
param_idx += 1
|
||||
|
||||
if state:
|
||||
conditions.append(f"s.state = ${param_idx}")
|
||||
params.append(state.upper())
|
||||
param_idx += 1
|
||||
|
||||
if position_type:
|
||||
conditions.append(f"ss.position_type = ${param_idx}")
|
||||
params.append(position_type)
|
||||
param_idx += 1
|
||||
|
||||
if has_email is not None and has_email:
|
||||
conditions.append("ss.email IS NOT NULL")
|
||||
|
||||
where_clause = " AND ".join(conditions)
|
||||
|
||||
# Count total
|
||||
total = await conn.fetchval(f"""
|
||||
SELECT COUNT(*) FROM school_staff ss
|
||||
JOIN schools s ON ss.school_id = s.id
|
||||
WHERE {where_clause}
|
||||
""", *params)
|
||||
|
||||
# Fetch staff
|
||||
offset = (page - 1) * page_size
|
||||
rows = await conn.fetch(f"""
|
||||
SELECT
|
||||
ss.id, ss.school_id, ss.first_name, ss.last_name, ss.full_name,
|
||||
ss.title, ss.position, ss.position_type, ss.subjects,
|
||||
ss.email, ss.phone, ss.profile_url, ss.photo_url,
|
||||
ss.is_active, ss.created_at,
|
||||
s.name as school_name
|
||||
FROM school_staff ss
|
||||
JOIN schools s ON ss.school_id = s.id
|
||||
WHERE {where_clause}
|
||||
ORDER BY ss.last_name, ss.first_name
|
||||
LIMIT ${param_idx} OFFSET ${param_idx + 1}
|
||||
""", *params, page_size, offset)
|
||||
|
||||
staff = [
|
||||
SchoolStaffResponse(
|
||||
id=str(row["id"]),
|
||||
school_id=str(row["school_id"]),
|
||||
school_name=row["school_name"],
|
||||
first_name=row["first_name"],
|
||||
last_name=row["last_name"],
|
||||
full_name=row["full_name"],
|
||||
title=row["title"],
|
||||
position=row["position"],
|
||||
position_type=row["position_type"],
|
||||
subjects=row["subjects"],
|
||||
email=row["email"],
|
||||
phone=row["phone"],
|
||||
profile_url=row["profile_url"],
|
||||
photo_url=row["photo_url"],
|
||||
is_active=row["is_active"],
|
||||
created_at=row["created_at"],
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
return SchoolStaffListResponse(
|
||||
staff=staff,
|
||||
total=total,
|
||||
)
|
||||
174
backend/llm_gateway/routes/tools.py
Normal file
174
backend/llm_gateway/routes/tools.py
Normal file
@@ -0,0 +1,174 @@
|
||||
"""
|
||||
Tool Routes für LLM Gateway.
|
||||
|
||||
Bietet API-Endpoints für externe Tools wie Web Search.
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..middleware.auth import verify_api_key
|
||||
from ..services.tool_gateway import (
|
||||
ToolGateway,
|
||||
get_tool_gateway,
|
||||
SearchDepth,
|
||||
TavilyError,
|
||||
ToolGatewayError,
|
||||
)
|
||||
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
# Request/Response Models
|
||||
class SearchRequest(BaseModel):
|
||||
"""Request für Web-Suche."""
|
||||
query: str = Field(..., min_length=1, max_length=1000, description="Suchanfrage")
|
||||
search_depth: Optional[SearchDepth] = Field(
|
||||
default=None,
|
||||
description="Suchtiefe: basic (schnell) oder advanced (gründlich)",
|
||||
)
|
||||
max_results: Optional[int] = Field(
|
||||
default=None,
|
||||
ge=1,
|
||||
le=20,
|
||||
description="Maximale Anzahl Ergebnisse (1-20)",
|
||||
)
|
||||
include_domains: Optional[list[str]] = Field(
|
||||
default=None,
|
||||
description="Nur diese Domains durchsuchen",
|
||||
)
|
||||
exclude_domains: Optional[list[str]] = Field(
|
||||
default=None,
|
||||
description="Diese Domains ausschließen",
|
||||
)
|
||||
|
||||
|
||||
class SearchResultItem(BaseModel):
|
||||
"""Ein Suchergebnis."""
|
||||
title: str
|
||||
url: str
|
||||
content: str
|
||||
score: float
|
||||
published_date: Optional[str] = None
|
||||
|
||||
|
||||
class SearchResponse(BaseModel):
|
||||
"""Response für Web-Suche."""
|
||||
query: str
|
||||
redacted_query: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Redaktierte Query (nur wenn PII gefunden)",
|
||||
)
|
||||
results: list[SearchResultItem]
|
||||
answer: Optional[str] = Field(
|
||||
default=None,
|
||||
description="KI-generierte Zusammenfassung der Ergebnisse",
|
||||
)
|
||||
pii_detected: bool = Field(
|
||||
default=False,
|
||||
description="True wenn PII in der Anfrage erkannt und redaktiert wurde",
|
||||
)
|
||||
pii_types: list[str] = Field(
|
||||
default_factory=list,
|
||||
description="Liste der erkannten PII-Typen",
|
||||
)
|
||||
response_time_ms: int = Field(
|
||||
default=0,
|
||||
description="Antwortzeit in Millisekunden",
|
||||
)
|
||||
|
||||
|
||||
class ToolsHealthResponse(BaseModel):
|
||||
"""Health-Status der Tools."""
|
||||
tavily: dict
|
||||
pii_redaction: dict
|
||||
|
||||
|
||||
@router.post("/search", response_model=SearchResponse)
|
||||
async def web_search(
|
||||
request: SearchRequest,
|
||||
_: str = Depends(verify_api_key),
|
||||
tool_gateway: ToolGateway = Depends(get_tool_gateway),
|
||||
):
|
||||
"""
|
||||
Führt eine Web-Suche durch.
|
||||
|
||||
Die Suchanfrage wird automatisch auf personenbezogene Daten (PII)
|
||||
geprüft. Gefundene PII werden vor dem Versand an den Suchdienst
|
||||
redaktiert, um DSGVO-Konformität zu gewährleisten.
|
||||
|
||||
**PII-Erkennung umfasst:**
|
||||
- E-Mail-Adressen
|
||||
- Telefonnummern
|
||||
- IBAN/Bankkonten
|
||||
- Kreditkartennummern
|
||||
- Sozialversicherungsnummern
|
||||
- IP-Adressen
|
||||
- Geburtsdaten
|
||||
|
||||
**Beispiel:**
|
||||
```
|
||||
POST /llm/tools/search
|
||||
{
|
||||
"query": "Schulrecht Bayern Datenschutz",
|
||||
"max_results": 5
|
||||
}
|
||||
```
|
||||
"""
|
||||
try:
|
||||
result = await tool_gateway.search(
|
||||
query=request.query,
|
||||
search_depth=request.search_depth,
|
||||
max_results=request.max_results,
|
||||
include_domains=request.include_domains,
|
||||
exclude_domains=request.exclude_domains,
|
||||
)
|
||||
|
||||
return SearchResponse(
|
||||
query=result.query,
|
||||
redacted_query=result.redacted_query,
|
||||
results=[
|
||||
SearchResultItem(
|
||||
title=r.title,
|
||||
url=r.url,
|
||||
content=r.content,
|
||||
score=r.score,
|
||||
published_date=r.published_date,
|
||||
)
|
||||
for r in result.results
|
||||
],
|
||||
answer=result.answer,
|
||||
pii_detected=result.pii_detected,
|
||||
pii_types=result.pii_types,
|
||||
response_time_ms=result.response_time_ms,
|
||||
)
|
||||
|
||||
except TavilyError as e:
|
||||
# TavilyError first (more specific, inherits from ToolGatewayError)
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail=f"Search service error: {e}",
|
||||
)
|
||||
except ToolGatewayError as e:
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail=f"Tool service unavailable: {e}",
|
||||
)
|
||||
|
||||
|
||||
@router.get("/health", response_model=ToolsHealthResponse)
|
||||
async def tools_health(
|
||||
_: str = Depends(verify_api_key),
|
||||
tool_gateway: ToolGateway = Depends(get_tool_gateway),
|
||||
):
|
||||
"""
|
||||
Prüft den Gesundheitsstatus der Tool-Services.
|
||||
|
||||
Gibt Status für jeden konfigurierten Tool-Service zurück:
|
||||
- Tavily: Web-Suche
|
||||
- PII Redaction: Datenschutz-Filter
|
||||
"""
|
||||
status = await tool_gateway.health_check()
|
||||
return ToolsHealthResponse(**status)
|
||||
Reference in New Issue
Block a user