Some checks failed
Tests / Go Tests (push) Has been cancelled
Tests / Python Tests (push) Has been cancelled
Tests / Integration Tests (push) Has been cancelled
Tests / Go Lint (push) Has been cancelled
Tests / Python Lint (push) Has been cancelled
Tests / Security Scan (push) Has been cancelled
Tests / All Checks Passed (push) Has been cancelled
Security Scanning / Secret Scanning (push) Has been cancelled
Security Scanning / Dependency Vulnerability Scan (push) Has been cancelled
Security Scanning / Go Security Scan (push) Has been cancelled
Security Scanning / Python Security Scan (push) Has been cancelled
Security Scanning / Node.js Security Scan (push) Has been cancelled
Security Scanning / Docker Image Security (push) Has been cancelled
Security Scanning / Security Summary (push) Has been cancelled
CI/CD Pipeline / Go Tests (push) Has been cancelled
CI/CD Pipeline / Python Tests (push) Has been cancelled
CI/CD Pipeline / Website Tests (push) Has been cancelled
CI/CD Pipeline / Linting (push) Has been cancelled
CI/CD Pipeline / Security Scan (push) Has been cancelled
CI/CD Pipeline / Docker Build & Push (push) Has been cancelled
CI/CD Pipeline / Integration Tests (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / CI Summary (push) Has been cancelled
ci/woodpecker/manual/build-ci-image Pipeline was successful
ci/woodpecker/manual/main Pipeline failed
All services: admin-v2, studio-v2, website, ai-compliance-sdk, consent-service, klausur-service, voice-service, and infrastructure. Large PDFs and compiled binaries excluded via .gitignore.
113 lines
3.7 KiB
Python
113 lines
3.7 KiB
Python
"""
|
|
Chat Completions Route - OpenAI-kompatible API.
|
|
"""
|
|
|
|
import logging
|
|
import json
|
|
from typing import AsyncIterator
|
|
from fastapi import APIRouter, HTTPException, Depends
|
|
from fastapi.responses import StreamingResponse
|
|
|
|
from ..models.chat import (
|
|
ChatCompletionRequest,
|
|
ChatCompletionResponse,
|
|
ChatMessage,
|
|
ModelListResponse,
|
|
)
|
|
from ..services.inference import get_inference_service, InferenceService
|
|
from ..services.playbook_service import get_playbook_service, PlaybookService
|
|
from ..middleware.auth import verify_api_key
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
router = APIRouter(tags=["LLM"])
|
|
|
|
|
|
def get_services():
|
|
"""Dependency für Services."""
|
|
return get_inference_service(), get_playbook_service()
|
|
|
|
|
|
@router.post("/chat/completions", response_model=ChatCompletionResponse)
|
|
async def chat_completions(
|
|
request: ChatCompletionRequest,
|
|
_: str = Depends(verify_api_key),
|
|
):
|
|
"""
|
|
OpenAI-kompatible Chat Completions.
|
|
|
|
Unterstützt:
|
|
- Streaming (stream=true)
|
|
- Playbook-basierte System Prompts (metadata.playbook_id)
|
|
- Multiple Models (breakpilot-teacher-8b, claude-3-5-sonnet, etc.)
|
|
"""
|
|
inference_service, playbook_service = get_services()
|
|
|
|
# Playbook System Prompt injizieren
|
|
if request.metadata and request.metadata.playbook_id:
|
|
playbook = playbook_service.get_playbook(request.metadata.playbook_id)
|
|
if playbook:
|
|
# System Prompt an den Anfang der Messages einfügen
|
|
system_msg = ChatMessage(role="system", content=playbook.system_prompt)
|
|
# Prüfen ob bereits ein System Prompt existiert
|
|
has_system = any(m.role == "system" for m in request.messages)
|
|
if not has_system:
|
|
request.messages.insert(0, system_msg)
|
|
else:
|
|
# Playbook Prompt vor bestehenden System Prompt setzen
|
|
for i, msg in enumerate(request.messages):
|
|
if msg.role == "system":
|
|
msg.content = f"{playbook.system_prompt}\n\n{msg.content}"
|
|
break
|
|
|
|
try:
|
|
if request.stream:
|
|
return StreamingResponse(
|
|
stream_response(request, inference_service),
|
|
media_type="text/event-stream",
|
|
headers={
|
|
"Cache-Control": "no-cache",
|
|
"Connection": "keep-alive",
|
|
"X-Accel-Buffering": "no",
|
|
},
|
|
)
|
|
else:
|
|
response = await inference_service.complete(request)
|
|
return response
|
|
|
|
except ValueError as e:
|
|
logger.error(f"Chat completion error: {e}")
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
except Exception as e:
|
|
logger.exception(f"Chat completion failed: {e}")
|
|
raise HTTPException(status_code=500, detail="Internal server error")
|
|
|
|
|
|
async def stream_response(
|
|
request: ChatCompletionRequest,
|
|
inference_service: InferenceService,
|
|
) -> AsyncIterator[str]:
|
|
"""Generator für SSE Streaming."""
|
|
try:
|
|
async for chunk in inference_service.stream(request):
|
|
data = chunk.model_dump_json()
|
|
yield f"data: {data}\n\n"
|
|
yield "data: [DONE]\n\n"
|
|
except Exception as e:
|
|
logger.exception(f"Streaming error: {e}")
|
|
error_data = json.dumps({"error": str(e)})
|
|
yield f"data: {error_data}\n\n"
|
|
|
|
|
|
@router.get("/models", response_model=ModelListResponse)
|
|
async def list_models(
|
|
_: str = Depends(verify_api_key),
|
|
):
|
|
"""
|
|
Liste verfügbarer Modelle.
|
|
|
|
Gibt alle konfigurierten Modelle zurück, die aktuell verfügbar sind.
|
|
"""
|
|
inference_service = get_inference_service()
|
|
return await inference_service.list_models()
|