Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website, Klausur-Service, School-Service, Voice-Service, Geo-Service, BreakPilot Drive, Agent-Core Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
113 lines
3.7 KiB
Python
113 lines
3.7 KiB
Python
"""
|
|
Chat Completions Route - OpenAI-kompatible API.
|
|
"""
|
|
|
|
import logging
|
|
import json
|
|
from typing import AsyncIterator
|
|
from fastapi import APIRouter, HTTPException, Depends
|
|
from fastapi.responses import StreamingResponse
|
|
|
|
from ..models.chat import (
|
|
ChatCompletionRequest,
|
|
ChatCompletionResponse,
|
|
ChatMessage,
|
|
ModelListResponse,
|
|
)
|
|
from ..services.inference import get_inference_service, InferenceService
|
|
from ..services.playbook_service import get_playbook_service, PlaybookService
|
|
from ..middleware.auth import verify_api_key
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
router = APIRouter(tags=["LLM"])
|
|
|
|
|
|
def get_services():
|
|
"""Dependency für Services."""
|
|
return get_inference_service(), get_playbook_service()
|
|
|
|
|
|
@router.post("/chat/completions", response_model=ChatCompletionResponse)
|
|
async def chat_completions(
|
|
request: ChatCompletionRequest,
|
|
_: str = Depends(verify_api_key),
|
|
):
|
|
"""
|
|
OpenAI-kompatible Chat Completions.
|
|
|
|
Unterstützt:
|
|
- Streaming (stream=true)
|
|
- Playbook-basierte System Prompts (metadata.playbook_id)
|
|
- Multiple Models (breakpilot-teacher-8b, claude-3-5-sonnet, etc.)
|
|
"""
|
|
inference_service, playbook_service = get_services()
|
|
|
|
# Playbook System Prompt injizieren
|
|
if request.metadata and request.metadata.playbook_id:
|
|
playbook = playbook_service.get_playbook(request.metadata.playbook_id)
|
|
if playbook:
|
|
# System Prompt an den Anfang der Messages einfügen
|
|
system_msg = ChatMessage(role="system", content=playbook.system_prompt)
|
|
# Prüfen ob bereits ein System Prompt existiert
|
|
has_system = any(m.role == "system" for m in request.messages)
|
|
if not has_system:
|
|
request.messages.insert(0, system_msg)
|
|
else:
|
|
# Playbook Prompt vor bestehenden System Prompt setzen
|
|
for i, msg in enumerate(request.messages):
|
|
if msg.role == "system":
|
|
msg.content = f"{playbook.system_prompt}\n\n{msg.content}"
|
|
break
|
|
|
|
try:
|
|
if request.stream:
|
|
return StreamingResponse(
|
|
stream_response(request, inference_service),
|
|
media_type="text/event-stream",
|
|
headers={
|
|
"Cache-Control": "no-cache",
|
|
"Connection": "keep-alive",
|
|
"X-Accel-Buffering": "no",
|
|
},
|
|
)
|
|
else:
|
|
response = await inference_service.complete(request)
|
|
return response
|
|
|
|
except ValueError as e:
|
|
logger.error(f"Chat completion error: {e}")
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
except Exception as e:
|
|
logger.exception(f"Chat completion failed: {e}")
|
|
raise HTTPException(status_code=500, detail="Internal server error")
|
|
|
|
|
|
async def stream_response(
|
|
request: ChatCompletionRequest,
|
|
inference_service: InferenceService,
|
|
) -> AsyncIterator[str]:
|
|
"""Generator für SSE Streaming."""
|
|
try:
|
|
async for chunk in inference_service.stream(request):
|
|
data = chunk.model_dump_json()
|
|
yield f"data: {data}\n\n"
|
|
yield "data: [DONE]\n\n"
|
|
except Exception as e:
|
|
logger.exception(f"Streaming error: {e}")
|
|
error_data = json.dumps({"error": str(e)})
|
|
yield f"data: {error_data}\n\n"
|
|
|
|
|
|
@router.get("/models", response_model=ModelListResponse)
|
|
async def list_models(
|
|
_: str = Depends(verify_api_key),
|
|
):
|
|
"""
|
|
Liste verfügbarer Modelle.
|
|
|
|
Gibt alle konfigurierten Modelle zurück, die aktuell verfügbar sind.
|
|
"""
|
|
inference_service = get_inference_service()
|
|
return await inference_service.list_models()
|