""" Chat Completions Route - OpenAI-kompatible API. """ import logging import json from typing import AsyncIterator from fastapi import APIRouter, HTTPException, Depends from fastapi.responses import StreamingResponse from ..models.chat import ( ChatCompletionRequest, ChatCompletionResponse, ChatMessage, ModelListResponse, ) from ..services.inference import get_inference_service, InferenceService from ..services.playbook_service import get_playbook_service, PlaybookService from ..middleware.auth import verify_api_key logger = logging.getLogger(__name__) router = APIRouter(tags=["LLM"]) def get_services(): """Dependency für Services.""" return get_inference_service(), get_playbook_service() @router.post("/chat/completions", response_model=ChatCompletionResponse) async def chat_completions( request: ChatCompletionRequest, _: str = Depends(verify_api_key), ): """ OpenAI-kompatible Chat Completions. Unterstützt: - Streaming (stream=true) - Playbook-basierte System Prompts (metadata.playbook_id) - Multiple Models (breakpilot-teacher-8b, claude-3-5-sonnet, etc.) """ inference_service, playbook_service = get_services() # Playbook System Prompt injizieren if request.metadata and request.metadata.playbook_id: playbook = playbook_service.get_playbook(request.metadata.playbook_id) if playbook: # System Prompt an den Anfang der Messages einfügen system_msg = ChatMessage(role="system", content=playbook.system_prompt) # Prüfen ob bereits ein System Prompt existiert has_system = any(m.role == "system" for m in request.messages) if not has_system: request.messages.insert(0, system_msg) else: # Playbook Prompt vor bestehenden System Prompt setzen for i, msg in enumerate(request.messages): if msg.role == "system": msg.content = f"{playbook.system_prompt}\n\n{msg.content}" break try: if request.stream: return StreamingResponse( stream_response(request, inference_service), media_type="text/event-stream", headers={ "Cache-Control": "no-cache", "Connection": "keep-alive", "X-Accel-Buffering": "no", }, ) else: response = await inference_service.complete(request) return response except ValueError as e: logger.error(f"Chat completion error: {e}") raise HTTPException(status_code=400, detail=str(e)) except Exception as e: logger.exception(f"Chat completion failed: {e}") raise HTTPException(status_code=500, detail="Internal server error") async def stream_response( request: ChatCompletionRequest, inference_service: InferenceService, ) -> AsyncIterator[str]: """Generator für SSE Streaming.""" try: async for chunk in inference_service.stream(request): data = chunk.model_dump_json() yield f"data: {data}\n\n" yield "data: [DONE]\n\n" except Exception as e: logger.exception(f"Streaming error: {e}") error_data = json.dumps({"error": str(e)}) yield f"data: {error_data}\n\n" @router.get("/models", response_model=ModelListResponse) async def list_models( _: str = Depends(verify_api_key), ): """ Liste verfügbarer Modelle. Gibt alle konfigurierten Modelle zurück, die aktuell verfügbar sind. """ inference_service = get_inference_service() return await inference_service.list_models()