fix: Restore all files lost during destructive rebase

A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.

This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).

Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-02-09 09:51:32 +01:00
parent f7487ee240
commit bfdaf63ba9
2009 changed files with 749983 additions and 1731 deletions

View File

@@ -0,0 +1,8 @@
"""
BreakPilot LLM Gateway
OpenAI-kompatibles API Gateway für Self-hosted LLMs.
Unterstützt: Ollama (lokal), vLLM (remote), Claude API (Fallback)
"""
__version__ = "0.1.0"

View File

@@ -0,0 +1,122 @@
"""
LLM Gateway Konfiguration
Lädt Einstellungen aus Umgebungsvariablen.
"""
import os
from typing import Optional
from dataclasses import dataclass, field
@dataclass
class LLMBackendConfig:
"""Konfiguration für ein LLM Backend."""
name: str
base_url: str
api_key: Optional[str] = None
default_model: str = ""
timeout: int = 120
enabled: bool = True
@dataclass
class GatewayConfig:
"""Hauptkonfiguration für das LLM Gateway."""
# Server
host: str = "0.0.0.0"
port: int = 8002
debug: bool = False
# Auth
jwt_secret: str = ""
api_keys: list[str] = field(default_factory=list)
# Rate Limiting
rate_limit_requests_per_minute: int = 60
rate_limit_tokens_per_minute: int = 100000
# Backends
ollama: Optional[LLMBackendConfig] = None
vllm: Optional[LLMBackendConfig] = None
anthropic: Optional[LLMBackendConfig] = None
# Default Backend Priorität
backend_priority: list[str] = field(default_factory=lambda: ["ollama", "vllm", "anthropic"])
# Playbooks
playbooks_enabled: bool = True
# Logging
log_level: str = "INFO"
audit_logging: bool = True
def load_config() -> GatewayConfig:
"""Lädt Konfiguration aus Umgebungsvariablen."""
config = GatewayConfig(
host=os.getenv("LLM_GATEWAY_HOST", "0.0.0.0"),
port=int(os.getenv("LLM_GATEWAY_PORT", "8002")),
debug=os.getenv("LLM_GATEWAY_DEBUG", "false").lower() == "true",
jwt_secret=os.getenv("JWT_SECRET", ""),
api_keys=os.getenv("LLM_API_KEYS", "").split(",") if os.getenv("LLM_API_KEYS") else [],
rate_limit_requests_per_minute=int(os.getenv("LLM_RATE_LIMIT_RPM", "60")),
rate_limit_tokens_per_minute=int(os.getenv("LLM_RATE_LIMIT_TPM", "100000")),
log_level=os.getenv("LLM_LOG_LEVEL", "INFO"),
audit_logging=os.getenv("LLM_AUDIT_LOGGING", "true").lower() == "true",
)
# Ollama Backend (lokal)
ollama_url = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
if ollama_url:
config.ollama = LLMBackendConfig(
name="ollama",
base_url=ollama_url,
default_model=os.getenv("OLLAMA_DEFAULT_MODEL", "llama3.1:8b"),
timeout=int(os.getenv("OLLAMA_TIMEOUT", "120")),
enabled=os.getenv("OLLAMA_ENABLED", "true").lower() == "true",
)
# vLLM Backend (remote, z.B. vast.ai)
vllm_url = os.getenv("VLLM_BASE_URL")
if vllm_url:
config.vllm = LLMBackendConfig(
name="vllm",
base_url=vllm_url,
api_key=os.getenv("VLLM_API_KEY"),
default_model=os.getenv("VLLM_DEFAULT_MODEL", "meta-llama/Meta-Llama-3.1-8B-Instruct"),
timeout=int(os.getenv("VLLM_TIMEOUT", "120")),
enabled=os.getenv("VLLM_ENABLED", "true").lower() == "true",
)
# Anthropic Backend (Claude API Fallback)
anthropic_key = os.getenv("ANTHROPIC_API_KEY")
if anthropic_key:
config.anthropic = LLMBackendConfig(
name="anthropic",
base_url="https://api.anthropic.com",
api_key=anthropic_key,
default_model=os.getenv("ANTHROPIC_DEFAULT_MODEL", "claude-3-5-sonnet-20241022"),
timeout=int(os.getenv("ANTHROPIC_TIMEOUT", "120")),
enabled=os.getenv("ANTHROPIC_ENABLED", "true").lower() == "true",
)
# Backend Priorität
priority = os.getenv("LLM_BACKEND_PRIORITY", "ollama,vllm,anthropic")
config.backend_priority = [b.strip() for b in priority.split(",")]
return config
# Globale Konfiguration (Singleton)
_config: Optional[GatewayConfig] = None
def get_config() -> GatewayConfig:
"""Gibt die globale Konfiguration zurück."""
global _config
if _config is None:
_config = load_config()
return _config

View File

@@ -0,0 +1,85 @@
"""
BreakPilot LLM Gateway - Main Application
OpenAI-kompatibles API Gateway für Self-hosted LLMs.
"""
import logging
from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from .config import get_config
from .routes import chat_router, playbooks_router, health_router, comparison_router, edu_search_seeds_router, communication_router
from .services.inference import get_inference_service
# Logging Setup
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Lifecycle Management für den Gateway."""
logger.info("Starting LLM Gateway...")
config = get_config()
logger.info(f"Debug mode: {config.debug}")
logger.info(f"Backends configured: ollama={bool(config.ollama)}, vllm={bool(config.vllm)}, anthropic={bool(config.anthropic)}")
yield
# Cleanup
logger.info("Shutting down LLM Gateway...")
inference_service = get_inference_service()
await inference_service.close()
def create_app() -> FastAPI:
"""Factory Function für die FastAPI App."""
config = get_config()
app = FastAPI(
title="BreakPilot LLM Gateway",
description="OpenAI-kompatibles API Gateway für Self-hosted LLMs",
version="0.1.0",
lifespan=lifespan,
docs_url="/docs" if config.debug else None,
redoc_url="/redoc" if config.debug else None,
)
# CORS
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # In Produktion einschränken
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Routes
app.include_router(health_router)
app.include_router(chat_router, prefix="/v1")
app.include_router(playbooks_router)
app.include_router(comparison_router, prefix="/v1")
app.include_router(edu_search_seeds_router, prefix="/v1")
app.include_router(communication_router, prefix="/v1")
return app
# App Instance für uvicorn
app = create_app()
if __name__ == "__main__":
import uvicorn
config = get_config()
uvicorn.run(
"llm_gateway.main:app",
host=config.host,
port=config.port,
reload=config.debug,
)

View File

@@ -0,0 +1,7 @@
"""
LLM Gateway Middleware.
"""
from .auth import verify_api_key
__all__ = ["verify_api_key"]

View File

@@ -0,0 +1,96 @@
"""
Auth Middleware für LLM Gateway.
Unterstützt:
- API Key Auth (X-API-Key Header oder Authorization Bearer)
- JWT Token Auth (vom Consent Service)
"""
import logging
from typing import Optional
from fastapi import HTTPException, Header, Depends
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
import jwt
from ..config import get_config
logger = logging.getLogger(__name__)
security = HTTPBearer(auto_error=False)
async def verify_api_key(
x_api_key: Optional[str] = Header(None, alias="X-API-Key"),
authorization: Optional[HTTPAuthorizationCredentials] = Depends(security),
) -> str:
"""
Verifiziert den API Key oder JWT Token.
Akzeptiert:
- X-API-Key Header
- Authorization: Bearer <token>
Returns:
str: User ID oder "api_key" bei API Key Auth
"""
config = get_config()
# 1. Prüfe X-API-Key Header
if x_api_key:
if x_api_key in config.api_keys:
return "api_key"
logger.warning(f"Invalid API key attempted")
raise HTTPException(
status_code=401,
detail={"error": "unauthorized", "message": "Invalid API key"},
)
# 2. Prüfe Authorization Header
if authorization:
token = authorization.credentials
# Prüfe ob es ein API Key ist
if token in config.api_keys:
return "api_key"
# Versuche JWT zu dekodieren
if config.jwt_secret:
try:
payload = jwt.decode(
token,
config.jwt_secret,
algorithms=["HS256"],
)
user_id = payload.get("user_id") or payload.get("sub")
if user_id:
return str(user_id)
except jwt.ExpiredSignatureError:
raise HTTPException(
status_code=401,
detail={"error": "token_expired", "message": "Token has expired"},
)
except jwt.InvalidTokenError as e:
logger.warning(f"Invalid JWT token: {e}")
raise HTTPException(
status_code=401,
detail={"error": "invalid_token", "message": "Invalid token"},
)
# 3. In Development Mode ohne Auth erlauben
if config.debug:
logger.warning("Auth bypassed in debug mode")
return "debug_user"
# 4. Keine gültige Auth gefunden
raise HTTPException(
status_code=401,
detail={
"error": "unauthorized",
"message": "API key or valid token required",
},
)
def get_current_user_id(user_id: str = Depends(verify_api_key)) -> str:
"""Dependency um die aktuelle User ID zu bekommen."""
return user_id

View File

@@ -0,0 +1,31 @@
"""
Pydantic Models für OpenAI-kompatible API.
"""
from .chat import (
ChatMessage,
ChatCompletionRequest,
ChatCompletionResponse,
ChatCompletionChunk,
ChatChoice,
ChatChoiceDelta,
Usage,
ToolCall,
FunctionCall,
Tool,
ToolFunction,
)
__all__ = [
"ChatMessage",
"ChatCompletionRequest",
"ChatCompletionResponse",
"ChatCompletionChunk",
"ChatChoice",
"ChatChoiceDelta",
"Usage",
"ToolCall",
"FunctionCall",
"Tool",
"ToolFunction",
]

View File

@@ -0,0 +1,135 @@
"""
OpenAI-kompatible Chat Completion Models.
Basiert auf OpenAI API Spezifikation:
https://platform.openai.com/docs/api-reference/chat/create
"""
from __future__ import annotations
from typing import Optional, Literal, Any, Union, List, Dict
from pydantic import BaseModel, Field
import time
import uuid
class FunctionCall(BaseModel):
"""Function call in einer Tool-Anfrage."""
name: str
arguments: str # JSON string
class ToolCall(BaseModel):
"""Tool Call vom Modell."""
id: str = Field(default_factory=lambda: f"call_{uuid.uuid4().hex[:12]}")
type: Literal["function"] = "function"
function: FunctionCall
class ChatMessage(BaseModel):
"""Eine Nachricht im Chat."""
role: Literal["system", "user", "assistant", "tool"]
content: Optional[str] = None
name: Optional[str] = None
tool_call_id: Optional[str] = None
tool_calls: Optional[list[ToolCall]] = None
class ToolFunction(BaseModel):
"""Definition einer Tool-Funktion."""
name: str
description: Optional[str] = None
parameters: dict[str, Any] = Field(default_factory=dict)
class Tool(BaseModel):
"""Tool-Definition für Function Calling."""
type: Literal["function"] = "function"
function: ToolFunction
class RequestMetadata(BaseModel):
"""Zusätzliche Metadaten für die Anfrage."""
playbook_id: Optional[str] = None
tenant_id: Optional[str] = None
user_id: Optional[str] = None
class ChatCompletionRequest(BaseModel):
"""Request für Chat Completions."""
model: str
messages: list[ChatMessage]
stream: bool = False
temperature: Optional[float] = Field(default=0.7, ge=0, le=2)
top_p: Optional[float] = Field(default=1.0, ge=0, le=1)
max_tokens: Optional[int] = Field(default=None, ge=1)
stop: Optional[Union[List[str], str]] = None
presence_penalty: Optional[float] = Field(default=0, ge=-2, le=2)
frequency_penalty: Optional[float] = Field(default=0, ge=-2, le=2)
user: Optional[str] = None
tools: Optional[list[Tool]] = None
tool_choice: Optional[Union[str, Dict[str, Any]]] = None
metadata: Optional[RequestMetadata] = None
class ChatChoice(BaseModel):
"""Ein Choice in der Response."""
index: int = 0
message: ChatMessage
finish_reason: Optional[Literal["stop", "length", "tool_calls", "content_filter"]] = None
class ChatChoiceDelta(BaseModel):
"""Delta für Streaming Response."""
role: Optional[str] = None
content: Optional[str] = None
tool_calls: Optional[list[ToolCall]] = None
class StreamChoice(BaseModel):
"""Choice in Streaming Response."""
index: int = 0
delta: ChatChoiceDelta
finish_reason: Optional[Literal["stop", "length", "tool_calls", "content_filter"]] = None
class Usage(BaseModel):
"""Token Usage Statistiken."""
prompt_tokens: int = 0
completion_tokens: int = 0
total_tokens: int = 0
class ChatCompletionResponse(BaseModel):
"""Response für Chat Completions (non-streaming)."""
id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4().hex[:12]}")
object: Literal["chat.completion"] = "chat.completion"
created: int = Field(default_factory=lambda: int(time.time()))
model: str
choices: list[ChatChoice]
usage: Optional[Usage] = None
class ChatCompletionChunk(BaseModel):
"""Chunk für Streaming Response."""
id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4().hex[:12]}")
object: Literal["chat.completion.chunk"] = "chat.completion.chunk"
created: int = Field(default_factory=lambda: int(time.time()))
model: str
choices: list[StreamChoice]
# Model Info
class ModelInfo(BaseModel):
"""Information über ein verfügbares Modell."""
id: str
object: Literal["model"] = "model"
created: int = Field(default_factory=lambda: int(time.time()))
owned_by: str = "breakpilot"
description: Optional[str] = None
context_length: int = 8192
class ModelListResponse(BaseModel):
"""Response für /v1/models."""
object: Literal["list"] = "list"
data: list[ModelInfo]

View File

@@ -0,0 +1,21 @@
"""
LLM Gateway Routes.
"""
from .chat import router as chat_router
from .playbooks import router as playbooks_router
from .health import router as health_router
from .tools import router as tools_router
from .comparison import router as comparison_router
from .edu_search_seeds import router as edu_search_seeds_router
from .communication import router as communication_router
__all__ = [
"chat_router",
"playbooks_router",
"health_router",
"tools_router",
"comparison_router",
"edu_search_seeds_router",
"communication_router",
]

View File

@@ -0,0 +1,112 @@
"""
Chat Completions Route - OpenAI-kompatible API.
"""
import logging
import json
from typing import AsyncIterator
from fastapi import APIRouter, HTTPException, Depends
from fastapi.responses import StreamingResponse
from ..models.chat import (
ChatCompletionRequest,
ChatCompletionResponse,
ChatMessage,
ModelListResponse,
)
from ..services.inference import get_inference_service, InferenceService
from ..services.playbook_service import get_playbook_service, PlaybookService
from ..middleware.auth import verify_api_key
logger = logging.getLogger(__name__)
router = APIRouter(tags=["LLM"])
def get_services():
"""Dependency für Services."""
return get_inference_service(), get_playbook_service()
@router.post("/chat/completions", response_model=ChatCompletionResponse)
async def chat_completions(
request: ChatCompletionRequest,
_: str = Depends(verify_api_key),
):
"""
OpenAI-kompatible Chat Completions.
Unterstützt:
- Streaming (stream=true)
- Playbook-basierte System Prompts (metadata.playbook_id)
- Multiple Models (breakpilot-teacher-8b, claude-3-5-sonnet, etc.)
"""
inference_service, playbook_service = get_services()
# Playbook System Prompt injizieren
if request.metadata and request.metadata.playbook_id:
playbook = playbook_service.get_playbook(request.metadata.playbook_id)
if playbook:
# System Prompt an den Anfang der Messages einfügen
system_msg = ChatMessage(role="system", content=playbook.system_prompt)
# Prüfen ob bereits ein System Prompt existiert
has_system = any(m.role == "system" for m in request.messages)
if not has_system:
request.messages.insert(0, system_msg)
else:
# Playbook Prompt vor bestehenden System Prompt setzen
for i, msg in enumerate(request.messages):
if msg.role == "system":
msg.content = f"{playbook.system_prompt}\n\n{msg.content}"
break
try:
if request.stream:
return StreamingResponse(
stream_response(request, inference_service),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no",
},
)
else:
response = await inference_service.complete(request)
return response
except ValueError as e:
logger.error(f"Chat completion error: {e}")
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.exception(f"Chat completion failed: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
async def stream_response(
request: ChatCompletionRequest,
inference_service: InferenceService,
) -> AsyncIterator[str]:
"""Generator für SSE Streaming."""
try:
async for chunk in inference_service.stream(request):
data = chunk.model_dump_json()
yield f"data: {data}\n\n"
yield "data: [DONE]\n\n"
except Exception as e:
logger.exception(f"Streaming error: {e}")
error_data = json.dumps({"error": str(e)})
yield f"data: {error_data}\n\n"
@router.get("/models", response_model=ModelListResponse)
async def list_models(
_: str = Depends(verify_api_key),
):
"""
Liste verfügbarer Modelle.
Gibt alle konfigurierten Modelle zurück, die aktuell verfügbar sind.
"""
inference_service = get_inference_service()
return await inference_service.list_models()

View File

@@ -0,0 +1,403 @@
"""
Communication API Routes.
API-Endpoints für KI-gestützte Lehrer-Eltern-Kommunikation.
Basiert auf den Prinzipien der gewaltfreien Kommunikation (GFK)
und deutschen Schulgesetzen.
"""
import logging
from typing import Optional, List
from datetime import datetime
from fastapi import APIRouter, HTTPException, Depends
from pydantic import BaseModel, Field
from ..services.communication_service import (
get_communication_service,
CommunicationService,
CommunicationType,
CommunicationTone,
)
from ..services.inference import InferenceService, get_inference_service
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/communication", tags=["communication"])
# =============================================================================
# Pydantic Models
# =============================================================================
class CommunicationTypeResponse(BaseModel):
"""Response für Kommunikationstypen."""
value: str
label: str
class ToneResponse(BaseModel):
"""Response für Tonalitäten."""
value: str
label: str
class StateResponse(BaseModel):
"""Response für Bundesländer."""
value: str
label: str
class LegalReferenceResponse(BaseModel):
"""Response für rechtliche Referenzen."""
law: str
paragraph: str
title: str
summary: str
relevance: str
class GFKPrincipleResponse(BaseModel):
"""Response für GFK-Prinzipien."""
principle: str
description: str
example: str
class GenerateRequest(BaseModel):
"""Request für Nachrichtengenerierung."""
communication_type: str = Field(..., description="Art der Kommunikation (z.B. 'behavior', 'academic')")
tone: str = Field("professional", description="Tonalität (formal, professional, warm, concerned, appreciative)")
state: str = Field("NRW", description="Bundesland für rechtliche Referenzen")
student_name: str = Field(..., description="Name des Schülers/der Schülerin")
parent_name: str = Field(..., description="Name der Eltern (z.B. 'Frau Müller')")
situation: str = Field(..., description="Beschreibung der Situation")
additional_info: Optional[str] = Field(None, description="Zusätzliche Informationen")
class GenerateResponse(BaseModel):
"""Response für generierte Nachrichten."""
message: str
subject: str
validation: dict
legal_references: List[LegalReferenceResponse]
gfk_principles: List[GFKPrincipleResponse]
class ValidateRequest(BaseModel):
"""Request für Textvalidierung."""
text: str = Field(..., description="Der zu validierende Text")
class ValidateResponse(BaseModel):
"""Response für Validierung."""
is_valid: bool
issues: List[str]
suggestions: List[str]
positive_elements: List[str]
gfk_score: float
# =============================================================================
# Endpoints
# =============================================================================
@router.get("/types", response_model=List[CommunicationTypeResponse])
async def get_communication_types():
"""
Gibt alle verfügbaren Kommunikationstypen zurück.
Returns:
Liste aller Kommunikationstypen mit Wert und Label
"""
service = get_communication_service()
return service.get_all_communication_types()
@router.get("/tones", response_model=List[ToneResponse])
async def get_tones():
"""
Gibt alle verfügbaren Tonalitäten zurück.
Returns:
Liste aller Tonalitäten mit Wert und Label
"""
service = get_communication_service()
return service.get_all_tones()
@router.get("/states", response_model=List[StateResponse])
async def get_states():
"""
Gibt alle verfügbaren Bundesländer zurück.
Returns:
Liste aller Bundesländer mit Wert und Label
"""
service = get_communication_service()
return service.get_states()
@router.get("/legal-references/{state}")
async def get_legal_references(state: str):
"""
Gibt rechtliche Referenzen für ein Bundesland zurück.
Args:
state: Bundesland-Kürzel (z.B. NRW, BY)
Returns:
Rechtliche Referenzen für das Bundesland
"""
service = get_communication_service()
refs = service.get_legal_references(state, "elternpflichten")
return [
LegalReferenceResponse(
law=ref.law,
paragraph=ref.paragraph,
title=ref.title,
summary=ref.summary,
relevance=ref.relevance
)
for ref in refs
]
@router.get("/gfk-principles", response_model=List[GFKPrincipleResponse])
async def get_gfk_principles():
"""
Gibt die Prinzipien der gewaltfreien Kommunikation zurück.
Returns:
Liste der GFK-Prinzipien mit Beschreibung und Beispielen
"""
service = get_communication_service()
principles = service.get_gfk_guidance(CommunicationType.GENERAL_INFO)
return [
GFKPrincipleResponse(
principle=p.principle,
description=p.description,
example=p.example
)
for p in principles
]
@router.post("/generate", response_model=GenerateResponse)
async def generate_communication(request: GenerateRequest):
"""
Generiert einen Elternbrief basierend auf dem Kontext.
Args:
request: GenerateRequest mit allen nötigen Informationen
Returns:
GenerateResponse mit generiertem Text und Metadaten
"""
service = get_communication_service()
# Kommunikationstyp validieren
try:
comm_type = CommunicationType(request.communication_type)
except ValueError:
raise HTTPException(
status_code=400,
detail=f"Ungültiger Kommunikationstyp: {request.communication_type}"
)
# Tonalität validieren
try:
tone = CommunicationTone(request.tone)
except ValueError:
tone = CommunicationTone.PROFESSIONAL
# System- und User-Prompt erstellen
system_prompt = service.build_system_prompt(comm_type, request.state, tone)
user_prompt = service.build_user_prompt(comm_type, {
"student_name": request.student_name,
"parent_name": request.parent_name,
"situation": request.situation,
"additional_info": request.additional_info,
})
# Inference-Service aufrufen
try:
inference_service = get_inference_service()
response = await inference_service.generate(
prompt=user_prompt,
system_prompt=system_prompt,
temperature=0.7, # Etwas kreativ, aber kontrolliert
max_tokens=2000,
)
generated_message = response.get("content", "")
except Exception as e:
logger.error(f"Fehler bei der Nachrichtengenerierung: {e}")
# Fallback: Vorlage verwenden
template = service.get_template(comm_type)
generated_message = f"""{template['opening'].format(
parent_name=request.parent_name,
student_name=request.student_name,
topic=request.situation[:50] + '...' if len(request.situation) > 50 else request.situation
)}
{request.situation}
{template['closing'].format(
student_name=request.student_name,
legal_reference=f"des Schulgesetzes"
)}"""
# Validierung durchführen
validation = service.validate_communication(generated_message)
# Rechtliche Referenzen holen
topic_map = {
CommunicationType.ATTENDANCE: "schulpflicht",
CommunicationType.BEHAVIOR: "ordnungsmassnahmen",
CommunicationType.ACADEMIC: "foerderung",
CommunicationType.SPECIAL_NEEDS: "foerderung",
}
topic = topic_map.get(comm_type, "elternpflichten")
legal_refs = service.get_legal_references(request.state, topic)
# GFK-Prinzipien
gfk_principles = service.get_gfk_guidance(comm_type)
# Betreff generieren
template = service.get_template(comm_type)
subject = template.get("subject", "Mitteilung der Schule").format(
student_name=request.student_name,
topic=request.situation[:30] + '...' if len(request.situation) > 30 else request.situation
)
return GenerateResponse(
message=generated_message,
subject=subject,
validation=validation,
legal_references=[
LegalReferenceResponse(
law=ref.law,
paragraph=ref.paragraph,
title=ref.title,
summary=ref.summary,
relevance=ref.relevance
)
for ref in legal_refs
],
gfk_principles=[
GFKPrincipleResponse(
principle=p.principle,
description=p.description,
example=p.example
)
for p in gfk_principles
]
)
@router.post("/validate", response_model=ValidateResponse)
async def validate_communication(request: ValidateRequest):
"""
Validiert einen Text auf GFK-Konformität.
Args:
request: ValidateRequest mit dem zu prüfenden Text
Returns:
ValidateResponse mit Validierungsergebnissen
"""
service = get_communication_service()
result = service.validate_communication(request.text)
return ValidateResponse(
is_valid=result["is_valid"],
issues=result["issues"],
suggestions=result["suggestions"],
positive_elements=result["positive_elements"],
gfk_score=result["gfk_score"]
)
@router.post("/improve")
async def improve_communication(request: ValidateRequest):
"""
Verbessert einen bestehenden Text nach GFK-Prinzipien.
Args:
request: ValidateRequest mit dem zu verbessernden Text
Returns:
Verbesserter Text mit Änderungsvorschlägen
"""
service = get_communication_service()
# Erst validieren
validation = service.validate_communication(request.text)
if validation["is_valid"] and validation["gfk_score"] >= 0.8:
return {
"improved_text": request.text,
"changes": [],
"was_improved": False,
"message": "Der Text entspricht bereits den GFK-Prinzipien."
}
# System-Prompt für Verbesserung
system_prompt = """Du bist ein Experte für gewaltfreie Kommunikation (GFK) nach Marshall Rosenberg.
Deine Aufgabe ist es, einen Elternbrief zu verbessern, sodass er den GFK-Prinzipien entspricht.
VERBESSERUNGSREGELN:
1. Ersetze Bewertungen durch Beobachtungen
2. Ersetze "Sie müssen/sollten" durch Ich-Botschaften und Bitten
3. Entferne Schuldzuweisungen
4. Füge empathische Elemente hinzu
5. Behalte den sachlichen Inhalt bei
Gib den verbesserten Text zurück und erkläre kurz die wichtigsten Änderungen."""
user_prompt = f"""Bitte verbessere folgenden Elternbrief nach den GFK-Prinzipien:
---
{request.text}
---
Identifizierte Probleme:
{', '.join(validation['issues']) if validation['issues'] else 'Keine spezifischen Probleme gefunden, aber GFK-Score könnte verbessert werden.'}
Vorschläge:
{', '.join(validation['suggestions']) if validation['suggestions'] else 'Allgemeine Verbesserungen möglich.'}"""
try:
inference_service = get_inference_service()
response = await inference_service.generate(
prompt=user_prompt,
system_prompt=system_prompt,
temperature=0.5,
max_tokens=2500,
)
improved_text = response.get("content", request.text)
# Nochmal validieren
new_validation = service.validate_communication(improved_text)
return {
"improved_text": improved_text,
"original_issues": validation["issues"],
"was_improved": True,
"old_score": validation["gfk_score"],
"new_score": new_validation["gfk_score"],
"remaining_issues": new_validation["issues"],
}
except Exception as e:
logger.error(f"Fehler bei der Textverbesserung: {e}")
return {
"improved_text": request.text,
"changes": [],
"was_improved": False,
"error": str(e),
"message": "Die automatische Verbesserung ist derzeit nicht verfügbar."
}

View File

@@ -0,0 +1,584 @@
"""
LLM Comparison Route - Vergleicht Antworten verschiedener LLM Backends.
Dieses Modul ermoeglicht:
- Parallele Anfragen an OpenAI, Claude, Self-hosted+Tavily, Self-hosted+EduSearch
- Speichern von Vergleichsergebnissen fuer QA
- Parameter-Tuning fuer Self-hosted Modelle
"""
import asyncio
import logging
import time
import uuid
from datetime import datetime, timezone
from typing import Optional
from pydantic import BaseModel, Field
from fastapi import APIRouter, HTTPException, Depends
from ..models.chat import ChatMessage
from ..middleware.auth import verify_api_key
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/comparison", tags=["LLM Comparison"])
class ComparisonRequest(BaseModel):
"""Request fuer LLM-Vergleich."""
prompt: str = Field(..., description="User prompt (z.B. Lehrer-Frage)")
system_prompt: Optional[str] = Field(None, description="Optionaler System Prompt")
enable_openai: bool = Field(True, description="OpenAI/ChatGPT aktivieren")
enable_claude: bool = Field(True, description="Claude aktivieren")
enable_selfhosted_tavily: bool = Field(True, description="Self-hosted + Tavily aktivieren")
enable_selfhosted_edusearch: bool = Field(True, description="Self-hosted + EduSearch aktivieren")
# Parameter fuer Self-hosted Modelle
selfhosted_model: str = Field("llama3.2:3b", description="Self-hosted Modell")
temperature: float = Field(0.7, ge=0.0, le=2.0, description="Temperature")
top_p: float = Field(0.9, ge=0.0, le=1.0, description="Top-p Sampling")
max_tokens: int = Field(2048, ge=1, le=8192, description="Max Tokens")
# Search Parameter
search_results_count: int = Field(5, ge=1, le=20, description="Anzahl Suchergebnisse")
edu_search_filters: Optional[dict] = Field(None, description="Filter fuer EduSearch")
class LLMResponse(BaseModel):
"""Antwort eines einzelnen LLM."""
provider: str
model: str
response: str
latency_ms: int
tokens_used: Optional[int] = None
search_results: Optional[list] = None
error: Optional[str] = None
timestamp: datetime = Field(default_factory=datetime.utcnow)
class ComparisonResponse(BaseModel):
"""Gesamt-Antwort des Vergleichs."""
comparison_id: str
prompt: str
system_prompt: Optional[str]
responses: list[LLMResponse]
created_at: datetime = Field(default_factory=datetime.utcnow)
class SavedComparison(BaseModel):
"""Gespeicherter Vergleich fuer QA."""
comparison_id: str
prompt: str
system_prompt: Optional[str]
responses: list[LLMResponse]
notes: Optional[str] = None
rating: Optional[dict] = None # {"openai": 4, "claude": 5, ...}
created_at: datetime
created_by: Optional[str] = None
# In-Memory Storage (in Production: Database)
_comparisons_store: dict[str, SavedComparison] = {}
_system_prompts_store: dict[str, dict] = {
"default": {
"id": "default",
"name": "Standard Lehrer-Assistent",
"prompt": """Du bist ein hilfreicher Assistent fuer Lehrkraefte in Deutschland.
Deine Aufgaben:
- Hilfe bei der Unterrichtsplanung
- Erklaerung von Fachinhalten
- Erstellung von Arbeitsblaettern und Pruefungen
- Beratung zu paedagogischen Methoden
Antworte immer auf Deutsch und beachte den deutschen Lehrplankontext.""",
"created_at": datetime.now(timezone.utc).isoformat(),
},
"curriculum": {
"id": "curriculum",
"name": "Lehrplan-Experte",
"prompt": """Du bist ein Experte fuer deutsche Lehrplaene und Bildungsstandards.
Du kennst:
- Lehrplaene aller 16 Bundeslaender
- KMK Bildungsstandards
- Kompetenzorientierung im deutschen Bildungssystem
Beziehe dich immer auf konkrete Lehrplanvorgaben wenn moeglich.""",
"created_at": datetime.now(timezone.utc).isoformat(),
},
"worksheet": {
"id": "worksheet",
"name": "Arbeitsblatt-Generator",
"prompt": """Du bist ein spezialisierter Assistent fuer die Erstellung von Arbeitsblaettern.
Erstelle didaktisch sinnvolle Aufgaben mit:
- Klaren Arbeitsanweisungen
- Differenzierungsmoeglichkeiten
- Loesungshinweisen
Format: Markdown mit klarer Struktur.""",
"created_at": datetime.now(timezone.utc).isoformat(),
},
}
async def _call_openai(prompt: str, system_prompt: Optional[str]) -> LLMResponse:
"""Ruft OpenAI ChatGPT auf."""
import os
import httpx
start_time = time.time()
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
return LLMResponse(
provider="openai",
model="gpt-4o-mini",
response="",
latency_ms=0,
error="OPENAI_API_KEY nicht konfiguriert"
)
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
try:
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.post(
"https://api.openai.com/v1/chat/completions",
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
},
json={
"model": "gpt-4o-mini",
"messages": messages,
"temperature": 0.7,
"max_tokens": 2048,
},
)
response.raise_for_status()
data = response.json()
latency_ms = int((time.time() - start_time) * 1000)
content = data["choices"][0]["message"]["content"]
tokens = data.get("usage", {}).get("total_tokens")
return LLMResponse(
provider="openai",
model="gpt-4o-mini",
response=content,
latency_ms=latency_ms,
tokens_used=tokens,
)
except Exception as e:
return LLMResponse(
provider="openai",
model="gpt-4o-mini",
response="",
latency_ms=int((time.time() - start_time) * 1000),
error=str(e),
)
async def _call_claude(prompt: str, system_prompt: Optional[str]) -> LLMResponse:
"""Ruft Anthropic Claude auf."""
import os
start_time = time.time()
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
return LLMResponse(
provider="claude",
model="claude-3-5-sonnet-20241022",
response="",
latency_ms=0,
error="ANTHROPIC_API_KEY nicht konfiguriert"
)
try:
import anthropic
client = anthropic.AsyncAnthropic(api_key=api_key)
response = await client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=2048,
system=system_prompt or "",
messages=[{"role": "user", "content": prompt}],
)
latency_ms = int((time.time() - start_time) * 1000)
content = response.content[0].text if response.content else ""
tokens = response.usage.input_tokens + response.usage.output_tokens
return LLMResponse(
provider="claude",
model="claude-3-5-sonnet-20241022",
response=content,
latency_ms=latency_ms,
tokens_used=tokens,
)
except Exception as e:
return LLMResponse(
provider="claude",
model="claude-3-5-sonnet-20241022",
response="",
latency_ms=int((time.time() - start_time) * 1000),
error=str(e),
)
async def _search_tavily(query: str, count: int = 5) -> list[dict]:
"""Sucht mit Tavily API."""
import os
import httpx
api_key = os.getenv("TAVILY_API_KEY")
if not api_key:
return []
try:
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(
"https://api.tavily.com/search",
json={
"api_key": api_key,
"query": query,
"max_results": count,
"include_domains": [
"kmk.org", "bildungsserver.de", "bpb.de",
"bayern.de", "nrw.de", "berlin.de",
],
},
)
response.raise_for_status()
data = response.json()
return data.get("results", [])
except Exception as e:
logger.error(f"Tavily search error: {e}")
return []
async def _search_edusearch(query: str, count: int = 5, filters: Optional[dict] = None) -> list[dict]:
"""Sucht mit EduSearch API."""
import os
import httpx
edu_search_url = os.getenv("EDU_SEARCH_URL", "http://edu-search-service:8084")
try:
async with httpx.AsyncClient(timeout=30.0) as client:
payload = {
"q": query,
"limit": count,
"mode": "keyword",
}
if filters:
payload["filters"] = filters
response = await client.post(
f"{edu_search_url}/v1/search",
json=payload,
)
response.raise_for_status()
data = response.json()
# Formatiere Ergebnisse
results = []
for r in data.get("results", []):
results.append({
"title": r.get("title", ""),
"url": r.get("url", ""),
"content": r.get("snippet", ""),
"score": r.get("scores", {}).get("final", 0),
})
return results
except Exception as e:
logger.error(f"EduSearch error: {e}")
return []
async def _call_selfhosted_with_search(
prompt: str,
system_prompt: Optional[str],
search_provider: str,
search_results: list[dict],
model: str,
temperature: float,
top_p: float,
max_tokens: int,
) -> LLMResponse:
"""Ruft Self-hosted LLM mit Suchergebnissen auf."""
import os
import httpx
start_time = time.time()
ollama_url = os.getenv("OLLAMA_URL", "http://localhost:11434")
# Baue Kontext aus Suchergebnissen
context_parts = []
for i, result in enumerate(search_results, 1):
context_parts.append(f"[{i}] {result.get('title', 'Untitled')}")
context_parts.append(f" URL: {result.get('url', '')}")
context_parts.append(f" {result.get('content', '')[:500]}")
context_parts.append("")
search_context = "\n".join(context_parts)
# Erweitere System Prompt mit Suchergebnissen
augmented_system = f"""{system_prompt or ''}
Du hast Zugriff auf folgende Suchergebnisse aus {"Tavily" if search_provider == "tavily" else "EduSearch (deutsche Bildungsquellen)"}:
{search_context}
Nutze diese Quellen um deine Antwort zu unterstuetzen. Zitiere relevante Quellen mit [Nummer]."""
messages = [
{"role": "system", "content": augmented_system},
{"role": "user", "content": prompt},
]
try:
async with httpx.AsyncClient(timeout=120.0) as client:
response = await client.post(
f"{ollama_url}/api/chat",
json={
"model": model,
"messages": messages,
"stream": False,
"options": {
"temperature": temperature,
"top_p": top_p,
"num_predict": max_tokens,
},
},
)
response.raise_for_status()
data = response.json()
latency_ms = int((time.time() - start_time) * 1000)
content = data.get("message", {}).get("content", "")
tokens = data.get("prompt_eval_count", 0) + data.get("eval_count", 0)
return LLMResponse(
provider=f"selfhosted_{search_provider}",
model=model,
response=content,
latency_ms=latency_ms,
tokens_used=tokens,
search_results=search_results,
)
except Exception as e:
return LLMResponse(
provider=f"selfhosted_{search_provider}",
model=model,
response="",
latency_ms=int((time.time() - start_time) * 1000),
error=str(e),
search_results=search_results,
)
@router.post("/run", response_model=ComparisonResponse)
async def run_comparison(
request: ComparisonRequest,
_: str = Depends(verify_api_key),
):
"""
Fuehrt LLM-Vergleich durch.
Sendet den Prompt parallel an alle aktivierten Provider und
sammelt die Antworten.
"""
comparison_id = f"cmp-{uuid.uuid4().hex[:12]}"
tasks = []
# System Prompt vorbereiten
system_prompt = request.system_prompt
# OpenAI
if request.enable_openai:
tasks.append(("openai", _call_openai(request.prompt, system_prompt)))
# Claude
if request.enable_claude:
tasks.append(("claude", _call_claude(request.prompt, system_prompt)))
# Self-hosted + Tavily
if request.enable_selfhosted_tavily:
tavily_results = await _search_tavily(request.prompt, request.search_results_count)
tasks.append((
"selfhosted_tavily",
_call_selfhosted_with_search(
request.prompt,
system_prompt,
"tavily",
tavily_results,
request.selfhosted_model,
request.temperature,
request.top_p,
request.max_tokens,
)
))
# Self-hosted + EduSearch
if request.enable_selfhosted_edusearch:
edu_results = await _search_edusearch(
request.prompt,
request.search_results_count,
request.edu_search_filters,
)
tasks.append((
"selfhosted_edusearch",
_call_selfhosted_with_search(
request.prompt,
system_prompt,
"edusearch",
edu_results,
request.selfhosted_model,
request.temperature,
request.top_p,
request.max_tokens,
)
))
# Parallele Ausfuehrung
responses = []
if tasks:
results = await asyncio.gather(*[t[1] for t in tasks], return_exceptions=True)
for (name, _), result in zip(tasks, results):
if isinstance(result, Exception):
responses.append(LLMResponse(
provider=name,
model="unknown",
response="",
latency_ms=0,
error=str(result),
))
else:
responses.append(result)
return ComparisonResponse(
comparison_id=comparison_id,
prompt=request.prompt,
system_prompt=system_prompt,
responses=responses,
)
@router.post("/save/{comparison_id}")
async def save_comparison(
comparison_id: str,
comparison: ComparisonResponse,
notes: Optional[str] = None,
rating: Optional[dict] = None,
_: str = Depends(verify_api_key),
):
"""Speichert einen Vergleich fuer spaetere Analyse."""
saved = SavedComparison(
comparison_id=comparison_id,
prompt=comparison.prompt,
system_prompt=comparison.system_prompt,
responses=comparison.responses,
notes=notes,
rating=rating,
created_at=comparison.created_at,
)
_comparisons_store[comparison_id] = saved
return {"status": "saved", "comparison_id": comparison_id}
@router.get("/history")
async def get_comparison_history(
limit: int = 50,
_: str = Depends(verify_api_key),
):
"""Gibt gespeicherte Vergleiche zurueck."""
comparisons = list(_comparisons_store.values())
comparisons.sort(key=lambda x: x.created_at, reverse=True)
return {"comparisons": comparisons[:limit]}
@router.get("/history/{comparison_id}")
async def get_comparison(
comparison_id: str,
_: str = Depends(verify_api_key),
):
"""Gibt einen bestimmten Vergleich zurueck."""
if comparison_id not in _comparisons_store:
raise HTTPException(status_code=404, detail="Vergleich nicht gefunden")
return _comparisons_store[comparison_id]
# System Prompt Management
@router.get("/prompts")
async def list_system_prompts(
_: str = Depends(verify_api_key),
):
"""Listet alle gespeicherten System Prompts."""
return {"prompts": list(_system_prompts_store.values())}
@router.post("/prompts")
async def create_system_prompt(
name: str,
prompt: str,
_: str = Depends(verify_api_key),
):
"""Erstellt einen neuen System Prompt."""
prompt_id = f"sp-{uuid.uuid4().hex[:8]}"
_system_prompts_store[prompt_id] = {
"id": prompt_id,
"name": name,
"prompt": prompt,
"created_at": datetime.now(timezone.utc).isoformat(),
}
return {"status": "created", "prompt_id": prompt_id}
@router.put("/prompts/{prompt_id}")
async def update_system_prompt(
prompt_id: str,
name: str,
prompt: str,
_: str = Depends(verify_api_key),
):
"""Aktualisiert einen System Prompt."""
if prompt_id not in _system_prompts_store:
raise HTTPException(status_code=404, detail="System Prompt nicht gefunden")
_system_prompts_store[prompt_id].update({
"name": name,
"prompt": prompt,
"updated_at": datetime.now(timezone.utc).isoformat(),
})
return {"status": "updated", "prompt_id": prompt_id}
@router.delete("/prompts/{prompt_id}")
async def delete_system_prompt(
prompt_id: str,
_: str = Depends(verify_api_key),
):
"""Loescht einen System Prompt."""
if prompt_id not in _system_prompts_store:
raise HTTPException(status_code=404, detail="System Prompt nicht gefunden")
if prompt_id in ["default", "curriculum", "worksheet"]:
raise HTTPException(status_code=400, detail="Standard-Prompts koennen nicht geloescht werden")
del _system_prompts_store[prompt_id]
return {"status": "deleted", "prompt_id": prompt_id}
@router.get("/prompts/{prompt_id}")
async def get_system_prompt(
prompt_id: str,
_: str = Depends(verify_api_key),
):
"""Gibt einen System Prompt zurueck."""
if prompt_id not in _system_prompts_store:
raise HTTPException(status_code=404, detail="System Prompt nicht gefunden")
return _system_prompts_store[prompt_id]

View File

@@ -0,0 +1,710 @@
"""
EduSearch Seeds API Routes.
CRUD operations for managing education search crawler seed URLs.
Direct database access to PostgreSQL.
"""
import os
import logging
from typing import Optional, List
from datetime import datetime
from uuid import UUID
from fastapi import APIRouter, HTTPException, Depends, Query
from pydantic import BaseModel, Field, HttpUrl
import asyncpg
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/edu-search", tags=["edu-search"])
# Database connection pool
_pool: Optional[asyncpg.Pool] = None
async def get_db_pool() -> asyncpg.Pool:
"""Get or create database connection pool."""
global _pool
if _pool is None:
database_url = os.environ.get("DATABASE_URL")
if not database_url:
raise RuntimeError("DATABASE_URL nicht konfiguriert - bitte via Vault oder Umgebungsvariable setzen")
_pool = await asyncpg.create_pool(database_url, min_size=2, max_size=10)
return _pool
# =============================================================================
# Pydantic Models
# =============================================================================
class CategoryResponse(BaseModel):
"""Category response model."""
id: str
name: str
display_name: str
description: Optional[str] = None
icon: Optional[str] = None
sort_order: int
is_active: bool
class SeedBase(BaseModel):
"""Base seed model for creation/update."""
url: str = Field(..., max_length=500)
name: str = Field(..., max_length=255)
description: Optional[str] = None
category_name: Optional[str] = Field(None, description="Category name (federal, states, etc.)")
source_type: str = Field("GOV", description="GOV, EDU, UNI, etc.")
scope: str = Field("FEDERAL", description="FEDERAL, STATE, etc.")
state: Optional[str] = Field(None, max_length=5, description="State code (BW, BY, etc.)")
trust_boost: float = Field(0.50, ge=0.0, le=1.0)
enabled: bool = True
crawl_depth: int = Field(2, ge=1, le=5)
crawl_frequency: str = Field("weekly", description="hourly, daily, weekly, monthly")
class SeedCreate(SeedBase):
"""Seed creation model."""
pass
class SeedUpdate(BaseModel):
"""Seed update model (all fields optional)."""
url: Optional[str] = Field(None, max_length=500)
name: Optional[str] = Field(None, max_length=255)
description: Optional[str] = None
category_name: Optional[str] = None
source_type: Optional[str] = None
scope: Optional[str] = None
state: Optional[str] = Field(None, max_length=5)
trust_boost: Optional[float] = Field(None, ge=0.0, le=1.0)
enabled: Optional[bool] = None
crawl_depth: Optional[int] = Field(None, ge=1, le=5)
crawl_frequency: Optional[str] = None
class SeedResponse(BaseModel):
"""Seed response model."""
id: str
url: str
name: str
description: Optional[str] = None
category: Optional[str] = None
category_display_name: Optional[str] = None
source_type: str
scope: str
state: Optional[str] = None
trust_boost: float
enabled: bool
crawl_depth: int
crawl_frequency: str
last_crawled_at: Optional[datetime] = None
last_crawl_status: Optional[str] = None
last_crawl_docs: int = 0
total_documents: int = 0
created_at: datetime
updated_at: datetime
class SeedsListResponse(BaseModel):
"""List response with pagination info."""
seeds: List[SeedResponse]
total: int
page: int
page_size: int
class StatsResponse(BaseModel):
"""Crawl statistics response."""
total_seeds: int
enabled_seeds: int
total_documents: int
seeds_by_category: dict
seeds_by_state: dict
last_crawl_time: Optional[datetime] = None
class BulkImportRequest(BaseModel):
"""Bulk import request."""
seeds: List[SeedCreate]
class BulkImportResponse(BaseModel):
"""Bulk import response."""
imported: int
skipped: int
errors: List[str]
# =============================================================================
# API Endpoints
# =============================================================================
@router.get("/categories", response_model=List[CategoryResponse])
async def list_categories():
"""List all seed categories."""
pool = await get_db_pool()
async with pool.acquire() as conn:
rows = await conn.fetch("""
SELECT id, name, display_name, description, icon, sort_order, is_active
FROM edu_search_categories
WHERE is_active = TRUE
ORDER BY sort_order
""")
return [
CategoryResponse(
id=str(row["id"]),
name=row["name"],
display_name=row["display_name"],
description=row["description"],
icon=row["icon"],
sort_order=row["sort_order"],
is_active=row["is_active"],
)
for row in rows
]
@router.get("/seeds", response_model=SeedsListResponse)
async def list_seeds(
category: Optional[str] = Query(None, description="Filter by category name"),
state: Optional[str] = Query(None, description="Filter by state code"),
enabled: Optional[bool] = Query(None, description="Filter by enabled status"),
search: Optional[str] = Query(None, description="Search in name/url"),
page: int = Query(1, ge=1),
page_size: int = Query(50, ge=1, le=200),
):
"""List seeds with optional filtering and pagination."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Build WHERE clause
conditions = []
params = []
param_idx = 1
if category:
conditions.append(f"c.name = ${param_idx}")
params.append(category)
param_idx += 1
if state:
conditions.append(f"s.state = ${param_idx}")
params.append(state)
param_idx += 1
if enabled is not None:
conditions.append(f"s.enabled = ${param_idx}")
params.append(enabled)
param_idx += 1
if search:
conditions.append(f"(s.name ILIKE ${param_idx} OR s.url ILIKE ${param_idx})")
params.append(f"%{search}%")
param_idx += 1
where_clause = " AND ".join(conditions) if conditions else "TRUE"
# Count total
count_query = f"""
SELECT COUNT(*) FROM edu_search_seeds s
LEFT JOIN edu_search_categories c ON s.category_id = c.id
WHERE {where_clause}
"""
total = await conn.fetchval(count_query, *params)
# Get paginated results
offset = (page - 1) * page_size
params.extend([page_size, offset])
query = f"""
SELECT
s.id, s.url, s.name, s.description,
c.name as category, c.display_name as category_display_name,
s.source_type, s.scope, s.state, s.trust_boost, s.enabled,
s.crawl_depth, s.crawl_frequency, s.last_crawled_at,
s.last_crawl_status, s.last_crawl_docs, s.total_documents,
s.created_at, s.updated_at
FROM edu_search_seeds s
LEFT JOIN edu_search_categories c ON s.category_id = c.id
WHERE {where_clause}
ORDER BY c.sort_order, s.name
LIMIT ${param_idx} OFFSET ${param_idx + 1}
"""
rows = await conn.fetch(query, *params)
seeds = [
SeedResponse(
id=str(row["id"]),
url=row["url"],
name=row["name"],
description=row["description"],
category=row["category"],
category_display_name=row["category_display_name"],
source_type=row["source_type"],
scope=row["scope"],
state=row["state"],
trust_boost=float(row["trust_boost"]),
enabled=row["enabled"],
crawl_depth=row["crawl_depth"],
crawl_frequency=row["crawl_frequency"],
last_crawled_at=row["last_crawled_at"],
last_crawl_status=row["last_crawl_status"],
last_crawl_docs=row["last_crawl_docs"] or 0,
total_documents=row["total_documents"] or 0,
created_at=row["created_at"],
updated_at=row["updated_at"],
)
for row in rows
]
return SeedsListResponse(
seeds=seeds,
total=total,
page=page,
page_size=page_size,
)
@router.get("/seeds/{seed_id}", response_model=SeedResponse)
async def get_seed(seed_id: str):
"""Get a single seed by ID."""
pool = await get_db_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow("""
SELECT
s.id, s.url, s.name, s.description,
c.name as category, c.display_name as category_display_name,
s.source_type, s.scope, s.state, s.trust_boost, s.enabled,
s.crawl_depth, s.crawl_frequency, s.last_crawled_at,
s.last_crawl_status, s.last_crawl_docs, s.total_documents,
s.created_at, s.updated_at
FROM edu_search_seeds s
LEFT JOIN edu_search_categories c ON s.category_id = c.id
WHERE s.id = $1
""", seed_id)
if not row:
raise HTTPException(status_code=404, detail="Seed nicht gefunden")
return SeedResponse(
id=str(row["id"]),
url=row["url"],
name=row["name"],
description=row["description"],
category=row["category"],
category_display_name=row["category_display_name"],
source_type=row["source_type"],
scope=row["scope"],
state=row["state"],
trust_boost=float(row["trust_boost"]),
enabled=row["enabled"],
crawl_depth=row["crawl_depth"],
crawl_frequency=row["crawl_frequency"],
last_crawled_at=row["last_crawled_at"],
last_crawl_status=row["last_crawl_status"],
last_crawl_docs=row["last_crawl_docs"] or 0,
total_documents=row["total_documents"] or 0,
created_at=row["created_at"],
updated_at=row["updated_at"],
)
@router.post("/seeds", response_model=SeedResponse, status_code=201)
async def create_seed(seed: SeedCreate):
"""Create a new seed URL."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Get category ID if provided
category_id = None
if seed.category_name:
category_id = await conn.fetchval(
"SELECT id FROM edu_search_categories WHERE name = $1",
seed.category_name
)
try:
row = await conn.fetchrow("""
INSERT INTO edu_search_seeds (
url, name, description, category_id, source_type, scope,
state, trust_boost, enabled, crawl_depth, crawl_frequency
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
RETURNING id, created_at, updated_at
""",
seed.url, seed.name, seed.description, category_id,
seed.source_type, seed.scope, seed.state, seed.trust_boost,
seed.enabled, seed.crawl_depth, seed.crawl_frequency
)
except asyncpg.UniqueViolationError:
raise HTTPException(status_code=409, detail="URL existiert bereits")
return SeedResponse(
id=str(row["id"]),
url=seed.url,
name=seed.name,
description=seed.description,
category=seed.category_name,
category_display_name=None,
source_type=seed.source_type,
scope=seed.scope,
state=seed.state,
trust_boost=seed.trust_boost,
enabled=seed.enabled,
crawl_depth=seed.crawl_depth,
crawl_frequency=seed.crawl_frequency,
last_crawled_at=None,
last_crawl_status=None,
last_crawl_docs=0,
total_documents=0,
created_at=row["created_at"],
updated_at=row["updated_at"],
)
@router.put("/seeds/{seed_id}", response_model=SeedResponse)
async def update_seed(seed_id: str, seed: SeedUpdate):
"""Update an existing seed."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Build update statement dynamically
updates = []
params = []
param_idx = 1
if seed.url is not None:
updates.append(f"url = ${param_idx}")
params.append(seed.url)
param_idx += 1
if seed.name is not None:
updates.append(f"name = ${param_idx}")
params.append(seed.name)
param_idx += 1
if seed.description is not None:
updates.append(f"description = ${param_idx}")
params.append(seed.description)
param_idx += 1
if seed.category_name is not None:
category_id = await conn.fetchval(
"SELECT id FROM edu_search_categories WHERE name = $1",
seed.category_name
)
updates.append(f"category_id = ${param_idx}")
params.append(category_id)
param_idx += 1
if seed.source_type is not None:
updates.append(f"source_type = ${param_idx}")
params.append(seed.source_type)
param_idx += 1
if seed.scope is not None:
updates.append(f"scope = ${param_idx}")
params.append(seed.scope)
param_idx += 1
if seed.state is not None:
updates.append(f"state = ${param_idx}")
params.append(seed.state)
param_idx += 1
if seed.trust_boost is not None:
updates.append(f"trust_boost = ${param_idx}")
params.append(seed.trust_boost)
param_idx += 1
if seed.enabled is not None:
updates.append(f"enabled = ${param_idx}")
params.append(seed.enabled)
param_idx += 1
if seed.crawl_depth is not None:
updates.append(f"crawl_depth = ${param_idx}")
params.append(seed.crawl_depth)
param_idx += 1
if seed.crawl_frequency is not None:
updates.append(f"crawl_frequency = ${param_idx}")
params.append(seed.crawl_frequency)
param_idx += 1
if not updates:
raise HTTPException(status_code=400, detail="Keine Felder zum Aktualisieren")
updates.append("updated_at = NOW()")
params.append(seed_id)
query = f"""
UPDATE edu_search_seeds
SET {", ".join(updates)}
WHERE id = ${param_idx}
RETURNING id
"""
result = await conn.fetchrow(query, *params)
if not result:
raise HTTPException(status_code=404, detail="Seed nicht gefunden")
# Return updated seed
return await get_seed(seed_id)
@router.delete("/seeds/{seed_id}")
async def delete_seed(seed_id: str):
"""Delete a seed."""
pool = await get_db_pool()
async with pool.acquire() as conn:
result = await conn.execute(
"DELETE FROM edu_search_seeds WHERE id = $1",
seed_id
)
if result == "DELETE 0":
raise HTTPException(status_code=404, detail="Seed nicht gefunden")
return {"status": "deleted", "id": seed_id}
@router.post("/seeds/bulk-import", response_model=BulkImportResponse)
async def bulk_import_seeds(request: BulkImportRequest):
"""Bulk import seeds (skip duplicates)."""
pool = await get_db_pool()
imported = 0
skipped = 0
errors = []
async with pool.acquire() as conn:
# Pre-fetch all category IDs
categories = {}
rows = await conn.fetch("SELECT id, name FROM edu_search_categories")
for row in rows:
categories[row["name"]] = row["id"]
for seed in request.seeds:
try:
category_id = categories.get(seed.category_name) if seed.category_name else None
await conn.execute("""
INSERT INTO edu_search_seeds (
url, name, description, category_id, source_type, scope,
state, trust_boost, enabled, crawl_depth, crawl_frequency
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
ON CONFLICT (url) DO NOTHING
""",
seed.url, seed.name, seed.description, category_id,
seed.source_type, seed.scope, seed.state, seed.trust_boost,
seed.enabled, seed.crawl_depth, seed.crawl_frequency
)
imported += 1
except asyncpg.UniqueViolationError:
skipped += 1
except Exception as e:
errors.append(f"{seed.url}: {str(e)}")
return BulkImportResponse(imported=imported, skipped=skipped, errors=errors)
@router.get("/stats", response_model=StatsResponse)
async def get_stats():
"""Get crawl statistics."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Basic counts
total = await conn.fetchval("SELECT COUNT(*) FROM edu_search_seeds")
enabled = await conn.fetchval("SELECT COUNT(*) FROM edu_search_seeds WHERE enabled = TRUE")
total_docs = await conn.fetchval("SELECT COALESCE(SUM(total_documents), 0) FROM edu_search_seeds")
# By category
cat_rows = await conn.fetch("""
SELECT c.name, COUNT(s.id) as count
FROM edu_search_categories c
LEFT JOIN edu_search_seeds s ON c.id = s.category_id
GROUP BY c.name
""")
by_category = {row["name"]: row["count"] for row in cat_rows}
# By state
state_rows = await conn.fetch("""
SELECT COALESCE(state, 'federal') as state, COUNT(*) as count
FROM edu_search_seeds
GROUP BY state
""")
by_state = {row["state"]: row["count"] for row in state_rows}
# Last crawl time
last_crawl = await conn.fetchval(
"SELECT MAX(last_crawled_at) FROM edu_search_seeds"
)
return StatsResponse(
total_seeds=total,
enabled_seeds=enabled,
total_documents=total_docs,
seeds_by_category=by_category,
seeds_by_state=by_state,
last_crawl_time=last_crawl,
)
# Export for external use (edu-search-service)
@router.get("/seeds/export/for-crawler")
async def export_seeds_for_crawler():
"""Export enabled seeds in format suitable for crawler."""
pool = await get_db_pool()
async with pool.acquire() as conn:
rows = await conn.fetch("""
SELECT
s.url, s.trust_boost, s.source_type, s.scope, s.state,
s.crawl_depth, c.name as category
FROM edu_search_seeds s
LEFT JOIN edu_search_categories c ON s.category_id = c.id
WHERE s.enabled = TRUE
ORDER BY s.trust_boost DESC
""")
return {
"seeds": [
{
"url": row["url"],
"trust": float(row["trust_boost"]),
"source": row["source_type"],
"scope": row["scope"],
"state": row["state"],
"depth": row["crawl_depth"],
"category": row["category"],
}
for row in rows
],
"total": len(rows),
"exported_at": datetime.utcnow().isoformat(),
}
# =============================================================================
# Crawl Status Feedback (from edu-search-service)
# =============================================================================
class CrawlStatusUpdate(BaseModel):
"""Crawl status update from edu-search-service."""
seed_url: str = Field(..., description="The seed URL that was crawled")
status: str = Field(..., description="Crawl status: success, error, partial")
documents_crawled: int = Field(0, ge=0, description="Number of documents crawled")
error_message: Optional[str] = Field(None, description="Error message if status is error")
crawl_duration_seconds: float = Field(0.0, ge=0.0, description="Duration of the crawl in seconds")
class CrawlStatusResponse(BaseModel):
"""Response for crawl status update."""
success: bool
seed_url: str
message: str
@router.post("/seeds/crawl-status", response_model=CrawlStatusResponse)
async def update_crawl_status(update: CrawlStatusUpdate):
"""Update crawl status for a seed URL (called by edu-search-service)."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Find the seed by URL
seed = await conn.fetchrow(
"SELECT id, total_documents FROM edu_search_seeds WHERE url = $1",
update.seed_url
)
if not seed:
raise HTTPException(
status_code=404,
detail=f"Seed nicht gefunden: {update.seed_url}"
)
# Update the seed with crawl status
new_total = (seed["total_documents"] or 0) + update.documents_crawled
await conn.execute("""
UPDATE edu_search_seeds
SET
last_crawled_at = NOW(),
last_crawl_status = $2,
last_crawl_docs = $3,
total_documents = $4,
updated_at = NOW()
WHERE id = $1
""", seed["id"], update.status, update.documents_crawled, new_total)
logger.info(
f"Crawl status updated: {update.seed_url} - "
f"status={update.status}, docs={update.documents_crawled}, "
f"duration={update.crawl_duration_seconds:.1f}s"
)
return CrawlStatusResponse(
success=True,
seed_url=update.seed_url,
message=f"Status aktualisiert: {update.documents_crawled} Dokumente gecrawlt"
)
class BulkCrawlStatusUpdate(BaseModel):
"""Bulk crawl status update."""
updates: List[CrawlStatusUpdate]
class BulkCrawlStatusResponse(BaseModel):
"""Response for bulk crawl status update."""
updated: int
failed: int
errors: List[str]
@router.post("/seeds/crawl-status/bulk", response_model=BulkCrawlStatusResponse)
async def bulk_update_crawl_status(request: BulkCrawlStatusUpdate):
"""Bulk update crawl status for multiple seeds."""
pool = await get_db_pool()
updated = 0
failed = 0
errors = []
async with pool.acquire() as conn:
for update in request.updates:
try:
seed = await conn.fetchrow(
"SELECT id, total_documents FROM edu_search_seeds WHERE url = $1",
update.seed_url
)
if not seed:
failed += 1
errors.append(f"Seed nicht gefunden: {update.seed_url}")
continue
new_total = (seed["total_documents"] or 0) + update.documents_crawled
await conn.execute("""
UPDATE edu_search_seeds
SET
last_crawled_at = NOW(),
last_crawl_status = $2,
last_crawl_docs = $3,
total_documents = $4,
updated_at = NOW()
WHERE id = $1
""", seed["id"], update.status, update.documents_crawled, new_total)
updated += 1
except Exception as e:
failed += 1
errors.append(f"{update.seed_url}: {str(e)}")
logger.info(f"Bulk crawl status update: {updated} updated, {failed} failed")
return BulkCrawlStatusResponse(
updated=updated,
failed=failed,
errors=errors
)

View File

@@ -0,0 +1,127 @@
"""
Health Check Route.
"""
import logging
from datetime import datetime
from fastapi import APIRouter
from pydantic import BaseModel
from ..config import get_config
logger = logging.getLogger(__name__)
router = APIRouter(tags=["Health"])
class ComponentStatus(BaseModel):
"""Status einer Komponente."""
name: str
status: str # healthy, degraded, unhealthy
message: str = ""
class HealthResponse(BaseModel):
"""Health Check Response."""
status: str # ok, degraded, error
ts: str
version: str
components: list[ComponentStatus]
@router.get("/health", response_model=HealthResponse)
async def health_check():
"""
Health Check Endpoint.
Prüft den Status aller Komponenten:
- Gateway selbst
- LLM Backend Erreichbarkeit
- Datenbank (wenn konfiguriert)
"""
config = get_config()
components = []
overall_status = "ok"
# Gateway selbst
components.append(ComponentStatus(
name="gateway",
status="healthy",
message="Gateway is running",
))
# Ollama Backend
if config.ollama and config.ollama.enabled:
try:
import httpx
async with httpx.AsyncClient(timeout=5.0) as client:
response = await client.get(f"{config.ollama.base_url}/api/tags")
if response.status_code == 200:
components.append(ComponentStatus(
name="ollama",
status="healthy",
message="Ollama is reachable",
))
else:
components.append(ComponentStatus(
name="ollama",
status="degraded",
message=f"Ollama returned status {response.status_code}",
))
overall_status = "degraded"
except Exception as e:
components.append(ComponentStatus(
name="ollama",
status="unhealthy",
message=f"Cannot reach Ollama: {str(e)}",
))
# Nicht critical wenn andere Backends verfügbar
if not (config.vllm and config.vllm.enabled) and not (config.anthropic and config.anthropic.enabled):
overall_status = "error"
# vLLM Backend
if config.vllm and config.vllm.enabled:
try:
import httpx
headers = {}
if config.vllm.api_key:
headers["Authorization"] = f"Bearer {config.vllm.api_key}"
async with httpx.AsyncClient(timeout=5.0) as client:
response = await client.get(
f"{config.vllm.base_url}/v1/models",
headers=headers,
)
if response.status_code == 200:
components.append(ComponentStatus(
name="vllm",
status="healthy",
message="vLLM is reachable",
))
else:
components.append(ComponentStatus(
name="vllm",
status="degraded",
message=f"vLLM returned status {response.status_code}",
))
overall_status = "degraded"
except Exception as e:
components.append(ComponentStatus(
name="vllm",
status="unhealthy",
message=f"Cannot reach vLLM: {str(e)}",
))
# Anthropic Backend
if config.anthropic and config.anthropic.enabled:
components.append(ComponentStatus(
name="anthropic",
status="healthy",
message="Anthropic API configured (not checked)",
))
return HealthResponse(
status=overall_status,
ts=datetime.utcnow().isoformat() + "Z",
version="0.1.0",
components=components,
)

View File

@@ -0,0 +1,173 @@
"""
Legal Crawler API Routes.
Endpoints für das Crawlen und Abrufen von rechtlichen Bildungsinhalten.
"""
import logging
import asyncio
from typing import List, Optional
from fastapi import APIRouter, HTTPException, BackgroundTasks
from pydantic import BaseModel
from ..services.legal_crawler import get_legal_crawler, LegalCrawler
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/legal-crawler", tags=["legal-crawler"])
class CrawlStatusResponse(BaseModel):
"""Response für Crawl-Status."""
status: str
message: str
stats: Optional[dict] = None
class LegalDocumentResponse(BaseModel):
"""Response für ein rechtliches Dokument."""
id: str
url: str
title: str
law_name: Optional[str]
state: Optional[str]
paragraphs: Optional[list]
last_crawled_at: Optional[str]
class LegalReferenceFromDB(BaseModel):
"""Rechtliche Referenz aus der DB."""
law: str
url: str
state: Optional[str]
title: str
paragraphs: list
# Globaler Status für laufenden Crawl
_crawl_status = {
"running": False,
"last_run": None,
"last_stats": None,
}
async def _run_crawl(db_pool):
"""Führt den Crawl asynchron durch."""
global _crawl_status
_crawl_status["running"] = True
try:
crawler = get_legal_crawler()
stats = await crawler.crawl_legal_seeds(db_pool)
_crawl_status["last_stats"] = stats
_crawl_status["last_run"] = "completed"
except Exception as e:
logger.error(f"Crawl-Fehler: {e}")
_crawl_status["last_run"] = f"error: {str(e)}"
finally:
_crawl_status["running"] = False
@router.post("/start", response_model=CrawlStatusResponse)
async def start_crawl(background_tasks: BackgroundTasks):
"""
Startet einen neuen Crawl für alle Legal-Seeds.
Der Crawl läuft im Hintergrund und kann über /status abgefragt werden.
"""
global _crawl_status
if _crawl_status["running"]:
return CrawlStatusResponse(
status="already_running",
message="Ein Crawl läuft bereits. Bitte warten Sie, bis er abgeschlossen ist."
)
# Hinweis: In Produktion würde hier der DB-Pool übergeben werden
# Für jetzt nur Status setzen
_crawl_status["running"] = True
_crawl_status["last_run"] = "started"
return CrawlStatusResponse(
status="started",
message="Crawl wurde gestartet. Nutzen Sie /status um den Fortschritt zu prüfen."
)
@router.get("/status", response_model=CrawlStatusResponse)
async def get_crawl_status():
"""Gibt den aktuellen Crawl-Status zurück."""
return CrawlStatusResponse(
status="running" if _crawl_status["running"] else "idle",
message=_crawl_status.get("last_run") or "Noch nie gecrawlt",
stats=_crawl_status.get("last_stats")
)
@router.get("/documents", response_model=List[LegalDocumentResponse])
async def get_legal_documents(
state: Optional[str] = None,
doc_type: Optional[str] = None,
limit: int = 50
):
"""
Gibt gecrawlte rechtliche Dokumente zurück.
Args:
state: Filter nach Bundesland (z.B. "NW", "BY")
doc_type: Filter nach Dokumenttyp (z.B. "schulgesetz")
limit: Max. Anzahl Dokumente
Returns:
Liste von LegalDocumentResponse
"""
# TODO: DB-Query implementieren wenn DB-Pool verfügbar
# Für jetzt leere Liste zurückgeben
return []
@router.get("/references/{state}")
async def get_legal_references_for_state(state: str):
"""
Gibt rechtliche Referenzen für ein Bundesland zurück.
Dies ist der Endpoint, den der Communication-Service nutzt.
Args:
state: Bundesland-Kürzel (z.B. "NW", "BY", "BE")
Returns:
Dict mit Schulgesetz-Informationen und Paragraphen
"""
# TODO: Aus DB laden
# Mapping von state-Kürzeln zu DB-Werten
state_mapping = {
"NRW": "NW",
"NW": "NW",
"BY": "BY",
"BW": "BW",
"BE": "BE",
"BB": "BB",
"HB": "HB",
"HH": "HH",
"HE": "HE",
"MV": "MV",
"NI": "NI",
"RP": "RP",
"SL": "SL",
"SN": "SN",
"ST": "ST",
"SH": "SH",
"TH": "TH",
}
db_state = state_mapping.get(state.upper(), state.upper())
# Placeholder - später aus DB
return {
"state": state,
"documents": [],
"message": "Dokumente werden nach dem ersten Crawl verfügbar sein"
}

View File

@@ -0,0 +1,96 @@
"""
Playbooks Route - System Prompt Verwaltung.
"""
import logging
from typing import Optional
from fastapi import APIRouter, HTTPException, Depends
from pydantic import BaseModel
from ..services.playbook_service import get_playbook_service, Playbook
from ..middleware.auth import verify_api_key
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/playbooks", tags=["Playbooks"])
class PlaybookSummary(BaseModel):
"""Zusammenfassung eines Playbooks (ohne System Prompt)."""
id: str
name: str
description: str
prompt_version: str
recommended_models: list[str]
class PlaybookDetail(BaseModel):
"""Vollständige Playbook-Details."""
id: str
name: str
description: str
system_prompt: str
prompt_version: str
recommended_models: list[str]
tool_policy: dict
status: str
class PlaybookListResponse(BaseModel):
"""Response für Playbook-Liste."""
items: list[PlaybookSummary]
@router.get("", response_model=PlaybookListResponse)
async def list_playbooks(
status: Optional[str] = "published",
_: str = Depends(verify_api_key),
):
"""
Liste verfügbarer Playbooks.
Playbooks sind versionierte System-Prompt-Vorlagen für spezifische Schulkontexte.
"""
service = get_playbook_service()
playbooks = service.list_playbooks(status=status)
return PlaybookListResponse(
items=[
PlaybookSummary(
id=p.id,
name=p.name,
description=p.description,
prompt_version=p.prompt_version,
recommended_models=p.recommended_models,
)
for p in playbooks
]
)
@router.get("/{playbook_id}", response_model=PlaybookDetail)
async def get_playbook(
playbook_id: str,
_: str = Depends(verify_api_key),
):
"""
Details zu einem Playbook abrufen.
Enthält den vollständigen System Prompt und Tool-Policies.
"""
service = get_playbook_service()
playbook = service.get_playbook(playbook_id)
if not playbook:
raise HTTPException(status_code=404, detail=f"Playbook {playbook_id} not found")
return PlaybookDetail(
id=playbook.id,
name=playbook.name,
description=playbook.description,
system_prompt=playbook.system_prompt,
prompt_version=playbook.prompt_version,
recommended_models=playbook.recommended_models,
tool_policy=playbook.tool_policy,
status=playbook.status,
)

View File

@@ -0,0 +1,867 @@
"""
Schools API Routes.
CRUD operations for managing German schools (~40,000 schools).
Direct database access to PostgreSQL.
"""
import os
import logging
from typing import Optional, List
from datetime import datetime
from uuid import UUID
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel, Field
import asyncpg
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/schools", tags=["schools"])
# Database connection pool
_pool: Optional[asyncpg.Pool] = None
async def get_db_pool() -> asyncpg.Pool:
"""Get or create database connection pool."""
global _pool
if _pool is None:
database_url = os.environ.get(
"DATABASE_URL",
"postgresql://breakpilot:breakpilot123@postgres:5432/breakpilot_db"
)
_pool = await asyncpg.create_pool(database_url, min_size=2, max_size=10)
return _pool
# =============================================================================
# Pydantic Models
# =============================================================================
class SchoolTypeResponse(BaseModel):
"""School type response model."""
id: str
name: str
name_short: Optional[str] = None
category: Optional[str] = None
description: Optional[str] = None
class SchoolBase(BaseModel):
"""Base school model for creation/update."""
name: str = Field(..., max_length=255)
school_number: Optional[str] = Field(None, max_length=20)
school_type_id: Optional[str] = None
school_type_raw: Optional[str] = None
state: str = Field(..., max_length=10)
district: Optional[str] = None
city: Optional[str] = None
postal_code: Optional[str] = None
street: Optional[str] = None
address_full: Optional[str] = None
latitude: Optional[float] = None
longitude: Optional[float] = None
website: Optional[str] = None
email: Optional[str] = None
phone: Optional[str] = None
fax: Optional[str] = None
principal_name: Optional[str] = None
principal_title: Optional[str] = None
principal_email: Optional[str] = None
principal_phone: Optional[str] = None
secretary_name: Optional[str] = None
secretary_email: Optional[str] = None
secretary_phone: Optional[str] = None
student_count: Optional[int] = None
teacher_count: Optional[int] = None
class_count: Optional[int] = None
founded_year: Optional[int] = None
is_public: bool = True
is_all_day: Optional[bool] = None
has_inclusion: Optional[bool] = None
languages: Optional[List[str]] = None
specializations: Optional[List[str]] = None
source: Optional[str] = None
source_url: Optional[str] = None
class SchoolCreate(SchoolBase):
"""School creation model."""
pass
class SchoolUpdate(BaseModel):
"""School update model (all fields optional)."""
name: Optional[str] = Field(None, max_length=255)
school_number: Optional[str] = None
school_type_id: Optional[str] = None
state: Optional[str] = None
district: Optional[str] = None
city: Optional[str] = None
postal_code: Optional[str] = None
street: Optional[str] = None
website: Optional[str] = None
email: Optional[str] = None
phone: Optional[str] = None
principal_name: Optional[str] = None
student_count: Optional[int] = None
teacher_count: Optional[int] = None
is_active: Optional[bool] = None
class SchoolResponse(BaseModel):
"""School response model."""
id: str
name: str
school_number: Optional[str] = None
school_type: Optional[str] = None
school_type_short: Optional[str] = None
school_category: Optional[str] = None
state: str
district: Optional[str] = None
city: Optional[str] = None
postal_code: Optional[str] = None
street: Optional[str] = None
address_full: Optional[str] = None
latitude: Optional[float] = None
longitude: Optional[float] = None
website: Optional[str] = None
email: Optional[str] = None
phone: Optional[str] = None
fax: Optional[str] = None
principal_name: Optional[str] = None
principal_email: Optional[str] = None
student_count: Optional[int] = None
teacher_count: Optional[int] = None
is_public: bool = True
is_all_day: Optional[bool] = None
staff_count: int = 0
source: Optional[str] = None
crawled_at: Optional[datetime] = None
is_active: bool = True
created_at: datetime
updated_at: datetime
class SchoolsListResponse(BaseModel):
"""List response with pagination info."""
schools: List[SchoolResponse]
total: int
page: int
page_size: int
class SchoolStaffBase(BaseModel):
"""Base school staff model."""
first_name: Optional[str] = None
last_name: str
full_name: Optional[str] = None
title: Optional[str] = None
position: Optional[str] = None
position_type: Optional[str] = None
subjects: Optional[List[str]] = None
email: Optional[str] = None
phone: Optional[str] = None
class SchoolStaffCreate(SchoolStaffBase):
"""School staff creation model."""
school_id: str
class SchoolStaffResponse(SchoolStaffBase):
"""School staff response model."""
id: str
school_id: str
school_name: Optional[str] = None
profile_url: Optional[str] = None
photo_url: Optional[str] = None
is_active: bool = True
created_at: datetime
class SchoolStaffListResponse(BaseModel):
"""Staff list response."""
staff: List[SchoolStaffResponse]
total: int
class SchoolStatsResponse(BaseModel):
"""School statistics response."""
total_schools: int
total_staff: int
schools_by_state: dict
schools_by_type: dict
schools_with_website: int
schools_with_email: int
schools_with_principal: int
total_students: int
total_teachers: int
last_crawl_time: Optional[datetime] = None
class BulkImportRequest(BaseModel):
"""Bulk import request."""
schools: List[SchoolCreate]
class BulkImportResponse(BaseModel):
"""Bulk import response."""
imported: int
updated: int
skipped: int
errors: List[str]
# =============================================================================
# School Type Endpoints
# =============================================================================
@router.get("/types", response_model=List[SchoolTypeResponse])
async def list_school_types():
"""List all school types."""
pool = await get_db_pool()
async with pool.acquire() as conn:
rows = await conn.fetch("""
SELECT id, name, name_short, category, description
FROM school_types
ORDER BY category, name
""")
return [
SchoolTypeResponse(
id=str(row["id"]),
name=row["name"],
name_short=row["name_short"],
category=row["category"],
description=row["description"],
)
for row in rows
]
# =============================================================================
# School Endpoints
# =============================================================================
@router.get("", response_model=SchoolsListResponse)
async def list_schools(
state: Optional[str] = Query(None, description="Filter by state code (BW, BY, etc.)"),
school_type: Optional[str] = Query(None, description="Filter by school type name"),
city: Optional[str] = Query(None, description="Filter by city"),
district: Optional[str] = Query(None, description="Filter by district"),
postal_code: Optional[str] = Query(None, description="Filter by postal code prefix"),
search: Optional[str] = Query(None, description="Search in name, city"),
has_email: Optional[bool] = Query(None, description="Filter schools with email"),
has_website: Optional[bool] = Query(None, description="Filter schools with website"),
is_public: Optional[bool] = Query(None, description="Filter public/private schools"),
page: int = Query(1, ge=1),
page_size: int = Query(50, ge=1, le=200),
):
"""List schools with optional filtering and pagination."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Build WHERE clause
conditions = ["s.is_active = TRUE"]
params = []
param_idx = 1
if state:
conditions.append(f"s.state = ${param_idx}")
params.append(state.upper())
param_idx += 1
if school_type:
conditions.append(f"st.name = ${param_idx}")
params.append(school_type)
param_idx += 1
if city:
conditions.append(f"LOWER(s.city) = LOWER(${param_idx})")
params.append(city)
param_idx += 1
if district:
conditions.append(f"LOWER(s.district) LIKE LOWER(${param_idx})")
params.append(f"%{district}%")
param_idx += 1
if postal_code:
conditions.append(f"s.postal_code LIKE ${param_idx}")
params.append(f"{postal_code}%")
param_idx += 1
if search:
conditions.append(f"""
(LOWER(s.name) LIKE LOWER(${param_idx})
OR LOWER(s.city) LIKE LOWER(${param_idx})
OR LOWER(s.district) LIKE LOWER(${param_idx}))
""")
params.append(f"%{search}%")
param_idx += 1
if has_email is not None:
if has_email:
conditions.append("s.email IS NOT NULL")
else:
conditions.append("s.email IS NULL")
if has_website is not None:
if has_website:
conditions.append("s.website IS NOT NULL")
else:
conditions.append("s.website IS NULL")
if is_public is not None:
conditions.append(f"s.is_public = ${param_idx}")
params.append(is_public)
param_idx += 1
where_clause = " AND ".join(conditions)
# Count total
count_query = f"""
SELECT COUNT(*) FROM schools s
LEFT JOIN school_types st ON s.school_type_id = st.id
WHERE {where_clause}
"""
total = await conn.fetchval(count_query, *params)
# Fetch schools
offset = (page - 1) * page_size
query = f"""
SELECT
s.id, s.name, s.school_number, s.state, s.district, s.city,
s.postal_code, s.street, s.address_full, s.latitude, s.longitude,
s.website, s.email, s.phone, s.fax,
s.principal_name, s.principal_email,
s.student_count, s.teacher_count,
s.is_public, s.is_all_day, s.source, s.crawled_at,
s.is_active, s.created_at, s.updated_at,
st.name as school_type, st.name_short as school_type_short, st.category as school_category,
(SELECT COUNT(*) FROM school_staff ss WHERE ss.school_id = s.id AND ss.is_active = TRUE) as staff_count
FROM schools s
LEFT JOIN school_types st ON s.school_type_id = st.id
WHERE {where_clause}
ORDER BY s.state, s.city, s.name
LIMIT ${param_idx} OFFSET ${param_idx + 1}
"""
params.extend([page_size, offset])
rows = await conn.fetch(query, *params)
schools = [
SchoolResponse(
id=str(row["id"]),
name=row["name"],
school_number=row["school_number"],
school_type=row["school_type"],
school_type_short=row["school_type_short"],
school_category=row["school_category"],
state=row["state"],
district=row["district"],
city=row["city"],
postal_code=row["postal_code"],
street=row["street"],
address_full=row["address_full"],
latitude=row["latitude"],
longitude=row["longitude"],
website=row["website"],
email=row["email"],
phone=row["phone"],
fax=row["fax"],
principal_name=row["principal_name"],
principal_email=row["principal_email"],
student_count=row["student_count"],
teacher_count=row["teacher_count"],
is_public=row["is_public"],
is_all_day=row["is_all_day"],
staff_count=row["staff_count"],
source=row["source"],
crawled_at=row["crawled_at"],
is_active=row["is_active"],
created_at=row["created_at"],
updated_at=row["updated_at"],
)
for row in rows
]
return SchoolsListResponse(
schools=schools,
total=total,
page=page,
page_size=page_size,
)
@router.get("/stats", response_model=SchoolStatsResponse)
async def get_school_stats():
"""Get school statistics."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Total schools and staff
totals = await conn.fetchrow("""
SELECT
(SELECT COUNT(*) FROM schools WHERE is_active = TRUE) as total_schools,
(SELECT COUNT(*) FROM school_staff WHERE is_active = TRUE) as total_staff,
(SELECT COUNT(*) FROM schools WHERE is_active = TRUE AND website IS NOT NULL) as with_website,
(SELECT COUNT(*) FROM schools WHERE is_active = TRUE AND email IS NOT NULL) as with_email,
(SELECT COUNT(*) FROM schools WHERE is_active = TRUE AND principal_name IS NOT NULL) as with_principal,
(SELECT COALESCE(SUM(student_count), 0) FROM schools WHERE is_active = TRUE) as total_students,
(SELECT COALESCE(SUM(teacher_count), 0) FROM schools WHERE is_active = TRUE) as total_teachers,
(SELECT MAX(crawled_at) FROM schools) as last_crawl
""")
# By state
state_rows = await conn.fetch("""
SELECT state, COUNT(*) as count
FROM schools
WHERE is_active = TRUE
GROUP BY state
ORDER BY state
""")
schools_by_state = {row["state"]: row["count"] for row in state_rows}
# By type
type_rows = await conn.fetch("""
SELECT COALESCE(st.name, 'Unbekannt') as type_name, COUNT(*) as count
FROM schools s
LEFT JOIN school_types st ON s.school_type_id = st.id
WHERE s.is_active = TRUE
GROUP BY st.name
ORDER BY count DESC
""")
schools_by_type = {row["type_name"]: row["count"] for row in type_rows}
return SchoolStatsResponse(
total_schools=totals["total_schools"],
total_staff=totals["total_staff"],
schools_by_state=schools_by_state,
schools_by_type=schools_by_type,
schools_with_website=totals["with_website"],
schools_with_email=totals["with_email"],
schools_with_principal=totals["with_principal"],
total_students=totals["total_students"],
total_teachers=totals["total_teachers"],
last_crawl_time=totals["last_crawl"],
)
@router.get("/{school_id}", response_model=SchoolResponse)
async def get_school(school_id: str):
"""Get a single school by ID."""
pool = await get_db_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow("""
SELECT
s.id, s.name, s.school_number, s.state, s.district, s.city,
s.postal_code, s.street, s.address_full, s.latitude, s.longitude,
s.website, s.email, s.phone, s.fax,
s.principal_name, s.principal_email,
s.student_count, s.teacher_count,
s.is_public, s.is_all_day, s.source, s.crawled_at,
s.is_active, s.created_at, s.updated_at,
st.name as school_type, st.name_short as school_type_short, st.category as school_category,
(SELECT COUNT(*) FROM school_staff ss WHERE ss.school_id = s.id AND ss.is_active = TRUE) as staff_count
FROM schools s
LEFT JOIN school_types st ON s.school_type_id = st.id
WHERE s.id = $1
""", school_id)
if not row:
raise HTTPException(status_code=404, detail="School not found")
return SchoolResponse(
id=str(row["id"]),
name=row["name"],
school_number=row["school_number"],
school_type=row["school_type"],
school_type_short=row["school_type_short"],
school_category=row["school_category"],
state=row["state"],
district=row["district"],
city=row["city"],
postal_code=row["postal_code"],
street=row["street"],
address_full=row["address_full"],
latitude=row["latitude"],
longitude=row["longitude"],
website=row["website"],
email=row["email"],
phone=row["phone"],
fax=row["fax"],
principal_name=row["principal_name"],
principal_email=row["principal_email"],
student_count=row["student_count"],
teacher_count=row["teacher_count"],
is_public=row["is_public"],
is_all_day=row["is_all_day"],
staff_count=row["staff_count"],
source=row["source"],
crawled_at=row["crawled_at"],
is_active=row["is_active"],
created_at=row["created_at"],
updated_at=row["updated_at"],
)
@router.post("/bulk-import", response_model=BulkImportResponse)
async def bulk_import_schools(request: BulkImportRequest):
"""Bulk import schools. Updates existing schools based on school_number + state."""
pool = await get_db_pool()
imported = 0
updated = 0
skipped = 0
errors = []
async with pool.acquire() as conn:
# Get school type mapping
type_rows = await conn.fetch("SELECT id, name FROM school_types")
type_map = {row["name"].lower(): str(row["id"]) for row in type_rows}
for school in request.schools:
try:
# Find school type ID
school_type_id = None
if school.school_type_raw:
school_type_id = type_map.get(school.school_type_raw.lower())
# Check if school exists (by school_number + state, or by name + city + state)
existing = None
if school.school_number:
existing = await conn.fetchrow(
"SELECT id FROM schools WHERE school_number = $1 AND state = $2",
school.school_number, school.state
)
if not existing and school.city:
existing = await conn.fetchrow(
"SELECT id FROM schools WHERE LOWER(name) = LOWER($1) AND LOWER(city) = LOWER($2) AND state = $3",
school.name, school.city, school.state
)
if existing:
# Update existing school
await conn.execute("""
UPDATE schools SET
name = $2,
school_type_id = COALESCE($3, school_type_id),
school_type_raw = COALESCE($4, school_type_raw),
district = COALESCE($5, district),
city = COALESCE($6, city),
postal_code = COALESCE($7, postal_code),
street = COALESCE($8, street),
address_full = COALESCE($9, address_full),
latitude = COALESCE($10, latitude),
longitude = COALESCE($11, longitude),
website = COALESCE($12, website),
email = COALESCE($13, email),
phone = COALESCE($14, phone),
fax = COALESCE($15, fax),
principal_name = COALESCE($16, principal_name),
principal_title = COALESCE($17, principal_title),
principal_email = COALESCE($18, principal_email),
principal_phone = COALESCE($19, principal_phone),
student_count = COALESCE($20, student_count),
teacher_count = COALESCE($21, teacher_count),
is_public = $22,
source = COALESCE($23, source),
source_url = COALESCE($24, source_url),
updated_at = NOW()
WHERE id = $1
""",
existing["id"],
school.name,
school_type_id,
school.school_type_raw,
school.district,
school.city,
school.postal_code,
school.street,
school.address_full,
school.latitude,
school.longitude,
school.website,
school.email,
school.phone,
school.fax,
school.principal_name,
school.principal_title,
school.principal_email,
school.principal_phone,
school.student_count,
school.teacher_count,
school.is_public,
school.source,
school.source_url,
)
updated += 1
else:
# Insert new school
await conn.execute("""
INSERT INTO schools (
name, school_number, school_type_id, school_type_raw,
state, district, city, postal_code, street, address_full,
latitude, longitude, website, email, phone, fax,
principal_name, principal_title, principal_email, principal_phone,
student_count, teacher_count, is_public,
source, source_url, crawled_at
) VALUES (
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10,
$11, $12, $13, $14, $15, $16, $17, $18, $19, $20,
$21, $22, $23, $24, $25, NOW()
)
""",
school.name,
school.school_number,
school_type_id,
school.school_type_raw,
school.state,
school.district,
school.city,
school.postal_code,
school.street,
school.address_full,
school.latitude,
school.longitude,
school.website,
school.email,
school.phone,
school.fax,
school.principal_name,
school.principal_title,
school.principal_email,
school.principal_phone,
school.student_count,
school.teacher_count,
school.is_public,
school.source,
school.source_url,
)
imported += 1
except Exception as e:
errors.append(f"Error importing {school.name}: {str(e)}")
if len(errors) > 100:
errors.append("... (more errors truncated)")
break
return BulkImportResponse(
imported=imported,
updated=updated,
skipped=skipped,
errors=errors[:100],
)
# =============================================================================
# School Staff Endpoints
# =============================================================================
@router.get("/{school_id}/staff", response_model=SchoolStaffListResponse)
async def get_school_staff(school_id: str):
"""Get staff members for a school."""
pool = await get_db_pool()
async with pool.acquire() as conn:
rows = await conn.fetch("""
SELECT
ss.id, ss.school_id, ss.first_name, ss.last_name, ss.full_name,
ss.title, ss.position, ss.position_type, ss.subjects,
ss.email, ss.phone, ss.profile_url, ss.photo_url,
ss.is_active, ss.created_at,
s.name as school_name
FROM school_staff ss
JOIN schools s ON ss.school_id = s.id
WHERE ss.school_id = $1 AND ss.is_active = TRUE
ORDER BY
CASE ss.position_type
WHEN 'principal' THEN 1
WHEN 'vice_principal' THEN 2
WHEN 'secretary' THEN 3
ELSE 4
END,
ss.last_name
""", school_id)
staff = [
SchoolStaffResponse(
id=str(row["id"]),
school_id=str(row["school_id"]),
school_name=row["school_name"],
first_name=row["first_name"],
last_name=row["last_name"],
full_name=row["full_name"],
title=row["title"],
position=row["position"],
position_type=row["position_type"],
subjects=row["subjects"],
email=row["email"],
phone=row["phone"],
profile_url=row["profile_url"],
photo_url=row["photo_url"],
is_active=row["is_active"],
created_at=row["created_at"],
)
for row in rows
]
return SchoolStaffListResponse(
staff=staff,
total=len(staff),
)
@router.post("/{school_id}/staff", response_model=SchoolStaffResponse)
async def create_school_staff(school_id: str, staff: SchoolStaffBase):
"""Add a staff member to a school."""
pool = await get_db_pool()
async with pool.acquire() as conn:
# Verify school exists
school = await conn.fetchrow("SELECT name FROM schools WHERE id = $1", school_id)
if not school:
raise HTTPException(status_code=404, detail="School not found")
# Create full name
full_name = staff.full_name
if not full_name:
parts = []
if staff.title:
parts.append(staff.title)
if staff.first_name:
parts.append(staff.first_name)
parts.append(staff.last_name)
full_name = " ".join(parts)
row = await conn.fetchrow("""
INSERT INTO school_staff (
school_id, first_name, last_name, full_name, title,
position, position_type, subjects, email, phone
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
RETURNING id, created_at
""",
school_id,
staff.first_name,
staff.last_name,
full_name,
staff.title,
staff.position,
staff.position_type,
staff.subjects,
staff.email,
staff.phone,
)
return SchoolStaffResponse(
id=str(row["id"]),
school_id=school_id,
school_name=school["name"],
first_name=staff.first_name,
last_name=staff.last_name,
full_name=full_name,
title=staff.title,
position=staff.position,
position_type=staff.position_type,
subjects=staff.subjects,
email=staff.email,
phone=staff.phone,
is_active=True,
created_at=row["created_at"],
)
# =============================================================================
# Search Endpoints
# =============================================================================
@router.get("/search/staff", response_model=SchoolStaffListResponse)
async def search_school_staff(
q: Optional[str] = Query(None, description="Search query"),
state: Optional[str] = Query(None, description="Filter by state"),
position_type: Optional[str] = Query(None, description="Filter by position type"),
has_email: Optional[bool] = Query(None, description="Only staff with email"),
page: int = Query(1, ge=1),
page_size: int = Query(50, ge=1, le=200),
):
"""Search school staff across all schools."""
pool = await get_db_pool()
async with pool.acquire() as conn:
conditions = ["ss.is_active = TRUE", "s.is_active = TRUE"]
params = []
param_idx = 1
if q:
conditions.append(f"""
(LOWER(ss.full_name) LIKE LOWER(${param_idx})
OR LOWER(ss.last_name) LIKE LOWER(${param_idx})
OR LOWER(s.name) LIKE LOWER(${param_idx}))
""")
params.append(f"%{q}%")
param_idx += 1
if state:
conditions.append(f"s.state = ${param_idx}")
params.append(state.upper())
param_idx += 1
if position_type:
conditions.append(f"ss.position_type = ${param_idx}")
params.append(position_type)
param_idx += 1
if has_email is not None and has_email:
conditions.append("ss.email IS NOT NULL")
where_clause = " AND ".join(conditions)
# Count total
total = await conn.fetchval(f"""
SELECT COUNT(*) FROM school_staff ss
JOIN schools s ON ss.school_id = s.id
WHERE {where_clause}
""", *params)
# Fetch staff
offset = (page - 1) * page_size
rows = await conn.fetch(f"""
SELECT
ss.id, ss.school_id, ss.first_name, ss.last_name, ss.full_name,
ss.title, ss.position, ss.position_type, ss.subjects,
ss.email, ss.phone, ss.profile_url, ss.photo_url,
ss.is_active, ss.created_at,
s.name as school_name
FROM school_staff ss
JOIN schools s ON ss.school_id = s.id
WHERE {where_clause}
ORDER BY ss.last_name, ss.first_name
LIMIT ${param_idx} OFFSET ${param_idx + 1}
""", *params, page_size, offset)
staff = [
SchoolStaffResponse(
id=str(row["id"]),
school_id=str(row["school_id"]),
school_name=row["school_name"],
first_name=row["first_name"],
last_name=row["last_name"],
full_name=row["full_name"],
title=row["title"],
position=row["position"],
position_type=row["position_type"],
subjects=row["subjects"],
email=row["email"],
phone=row["phone"],
profile_url=row["profile_url"],
photo_url=row["photo_url"],
is_active=row["is_active"],
created_at=row["created_at"],
)
for row in rows
]
return SchoolStaffListResponse(
staff=staff,
total=total,
)

View File

@@ -0,0 +1,174 @@
"""
Tool Routes für LLM Gateway.
Bietet API-Endpoints für externe Tools wie Web Search.
"""
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel, Field
from ..middleware.auth import verify_api_key
from ..services.tool_gateway import (
ToolGateway,
get_tool_gateway,
SearchDepth,
TavilyError,
ToolGatewayError,
)
router = APIRouter()
# Request/Response Models
class SearchRequest(BaseModel):
"""Request für Web-Suche."""
query: str = Field(..., min_length=1, max_length=1000, description="Suchanfrage")
search_depth: Optional[SearchDepth] = Field(
default=None,
description="Suchtiefe: basic (schnell) oder advanced (gründlich)",
)
max_results: Optional[int] = Field(
default=None,
ge=1,
le=20,
description="Maximale Anzahl Ergebnisse (1-20)",
)
include_domains: Optional[list[str]] = Field(
default=None,
description="Nur diese Domains durchsuchen",
)
exclude_domains: Optional[list[str]] = Field(
default=None,
description="Diese Domains ausschließen",
)
class SearchResultItem(BaseModel):
"""Ein Suchergebnis."""
title: str
url: str
content: str
score: float
published_date: Optional[str] = None
class SearchResponse(BaseModel):
"""Response für Web-Suche."""
query: str
redacted_query: Optional[str] = Field(
default=None,
description="Redaktierte Query (nur wenn PII gefunden)",
)
results: list[SearchResultItem]
answer: Optional[str] = Field(
default=None,
description="KI-generierte Zusammenfassung der Ergebnisse",
)
pii_detected: bool = Field(
default=False,
description="True wenn PII in der Anfrage erkannt und redaktiert wurde",
)
pii_types: list[str] = Field(
default_factory=list,
description="Liste der erkannten PII-Typen",
)
response_time_ms: int = Field(
default=0,
description="Antwortzeit in Millisekunden",
)
class ToolsHealthResponse(BaseModel):
"""Health-Status der Tools."""
tavily: dict
pii_redaction: dict
@router.post("/search", response_model=SearchResponse)
async def web_search(
request: SearchRequest,
_: str = Depends(verify_api_key),
tool_gateway: ToolGateway = Depends(get_tool_gateway),
):
"""
Führt eine Web-Suche durch.
Die Suchanfrage wird automatisch auf personenbezogene Daten (PII)
geprüft. Gefundene PII werden vor dem Versand an den Suchdienst
redaktiert, um DSGVO-Konformität zu gewährleisten.
**PII-Erkennung umfasst:**
- E-Mail-Adressen
- Telefonnummern
- IBAN/Bankkonten
- Kreditkartennummern
- Sozialversicherungsnummern
- IP-Adressen
- Geburtsdaten
**Beispiel:**
```
POST /llm/tools/search
{
"query": "Schulrecht Bayern Datenschutz",
"max_results": 5
}
```
"""
try:
result = await tool_gateway.search(
query=request.query,
search_depth=request.search_depth,
max_results=request.max_results,
include_domains=request.include_domains,
exclude_domains=request.exclude_domains,
)
return SearchResponse(
query=result.query,
redacted_query=result.redacted_query,
results=[
SearchResultItem(
title=r.title,
url=r.url,
content=r.content,
score=r.score,
published_date=r.published_date,
)
for r in result.results
],
answer=result.answer,
pii_detected=result.pii_detected,
pii_types=result.pii_types,
response_time_ms=result.response_time_ms,
)
except TavilyError as e:
# TavilyError first (more specific, inherits from ToolGatewayError)
raise HTTPException(
status_code=502,
detail=f"Search service error: {e}",
)
except ToolGatewayError as e:
raise HTTPException(
status_code=503,
detail=f"Tool service unavailable: {e}",
)
@router.get("/health", response_model=ToolsHealthResponse)
async def tools_health(
_: str = Depends(verify_api_key),
tool_gateway: ToolGateway = Depends(get_tool_gateway),
):
"""
Prüft den Gesundheitsstatus der Tool-Services.
Gibt Status für jeden konfigurierten Tool-Service zurück:
- Tavily: Web-Suche
- PII Redaction: Datenschutz-Filter
"""
status = await tool_gateway.health_check()
return ToolsHealthResponse(**status)

View File

@@ -0,0 +1,21 @@
"""
LLM Gateway Services.
"""
from .inference import InferenceService, get_inference_service
from .playbook_service import PlaybookService
from .pii_detector import PIIDetector, get_pii_detector, PIIType, RedactionResult
from .tool_gateway import ToolGateway, get_tool_gateway, SearchDepth
__all__ = [
"InferenceService",
"get_inference_service",
"PlaybookService",
"PIIDetector",
"get_pii_detector",
"PIIType",
"RedactionResult",
"ToolGateway",
"get_tool_gateway",
"SearchDepth",
]

View File

@@ -0,0 +1,614 @@
"""
Communication Service - KI-gestützte Lehrer-Eltern-Kommunikation.
Unterstützt Lehrkräfte bei der Erstellung professioneller, rechtlich fundierter
Kommunikation mit Eltern. Basiert auf den Prinzipien der gewaltfreien Kommunikation
(GFK nach Marshall Rosenberg) und deutschen Schulgesetzen.
Die rechtlichen Referenzen werden dynamisch aus der Datenbank geladen
(edu_search_documents Tabelle), nicht mehr hardcoded.
"""
import logging
import os
from typing import Optional, List, Dict, Any
from enum import Enum, auto
from dataclasses import dataclass
import httpx
logger = logging.getLogger(__name__)
# Legal Crawler API URL (für dynamische Rechtsinhalte)
LEGAL_CRAWLER_API_URL = os.getenv(
"LEGAL_CRAWLER_API_URL",
"http://localhost:8000/v1/legal-crawler"
)
class CommunicationType(str, Enum):
"""Arten von Eltern-Kommunikation."""
GENERAL_INFO = "general_info" # Allgemeine Information
BEHAVIOR = "behavior" # Verhalten/Disziplin
ACADEMIC = "academic" # Schulleistungen
ATTENDANCE = "attendance" # Anwesenheit/Fehlzeiten
MEETING_INVITE = "meeting_invite" # Einladung zum Gespräch
POSITIVE_FEEDBACK = "positive_feedback" # Positives Feedback
CONCERN = "concern" # Bedenken äußern
CONFLICT = "conflict" # Konfliktlösung
SPECIAL_NEEDS = "special_needs" # Förderbedarf
class CommunicationTone(str, Enum):
"""Tonalität der Kommunikation."""
FORMAL = "formal" # Sehr förmlich
PROFESSIONAL = "professional" # Professionell-freundlich
WARM = "warm" # Warmherzig
CONCERNED = "concerned" # Besorgt
APPRECIATIVE = "appreciative" # Wertschätzend
@dataclass
class LegalReference:
"""Rechtliche Referenz für Kommunikation."""
law: str # z.B. "SchulG NRW"
paragraph: str # z.B. "§ 42"
title: str # z.B. "Pflichten der Eltern"
summary: str # Kurzzusammenfassung
relevance: str # Warum relevant für diesen Fall
@dataclass
class GFKPrinciple:
"""Prinzip der Gewaltfreien Kommunikation."""
principle: str # z.B. "Beobachtung"
description: str # Erklärung
example: str # Beispiel im Kontext
# Fallback Rechtliche Grundlagen (nur verwendet wenn DB leer)
# Die primäre Quelle sind gecrawlte Dokumente in der edu_search_documents Tabelle
FALLBACK_LEGAL_REFERENCES: Dict[str, Dict[str, LegalReference]] = {
"DEFAULT": {
"elternpflichten": LegalReference(
law="Landesschulgesetz",
paragraph="(je nach Bundesland)",
title="Pflichten der Eltern",
summary="Eltern haben die Pflicht, die schulische Entwicklung zu unterstützen.",
relevance="Grundlage für Kooperationsaufforderungen"
),
"schulpflicht": LegalReference(
law="Landesschulgesetz",
paragraph="(je nach Bundesland)",
title="Schulpflicht",
summary="Kinder sind schulpflichtig. Eltern sind verantwortlich für regelmäßigen Schulbesuch.",
relevance="Bei Fehlzeiten und Anwesenheitsproblemen"
),
}
}
async def fetch_legal_references_from_db(state: str) -> List[Dict[str, Any]]:
"""
Lädt rechtliche Referenzen aus der Datenbank (via Legal Crawler API).
Args:
state: Bundesland-Kürzel (z.B. "NRW", "BY", "NW")
Returns:
Liste von Rechtsdokumenten mit Paragraphen
"""
try:
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(
f"{LEGAL_CRAWLER_API_URL}/references/{state}"
)
if response.status_code == 200:
data = response.json()
return data.get("documents", [])
else:
logger.warning(f"Legal API returned {response.status_code} for state {state}")
return []
except Exception as e:
logger.error(f"Fehler beim Laden rechtlicher Referenzen für {state}: {e}")
return []
def parse_db_references_to_legal_refs(
db_docs: List[Dict[str, Any]],
topic: str
) -> List[LegalReference]:
"""
Konvertiert DB-Dokumente in LegalReference-Objekte.
Filtert nach relevanten Paragraphen basierend auf dem Topic.
"""
references = []
# Topic zu relevanten Paragraph-Nummern mapping
topic_keywords = {
"elternpflichten": ["42", "76", "85", "eltern", "pflicht"],
"schulpflicht": ["41", "35", "schulpflicht", "pflicht"],
"ordnungsmassnahmen": ["53", "ordnung", "erzieh", "maßnahm"],
"datenschutz": ["120", "daten", "schutz"],
"foerderung": ["2", "förder", "bildung", "auftrag"],
}
keywords = topic_keywords.get(topic, ["eltern"])
for doc in db_docs:
law_name = doc.get("law_name", doc.get("title", "Schulgesetz"))
paragraphs = doc.get("paragraphs", [])
if not paragraphs:
# Wenn keine Paragraphen extrahiert, allgemeine Referenz erstellen
references.append(LegalReference(
law=law_name,
paragraph="(siehe Gesetzestext)",
title=doc.get("title", "Schulgesetz"),
summary=f"Rechtliche Grundlage aus {law_name}",
relevance=f"Relevant für {topic}"
))
continue
# Relevante Paragraphen finden
for para in paragraphs[:10]: # Max 10 Paragraphen prüfen
para_nr = para.get("nr", "")
para_title = para.get("title", "")
# Prüfen ob Paragraph relevant ist
is_relevant = False
for keyword in keywords:
if keyword.lower() in para_nr.lower() or keyword.lower() in para_title.lower():
is_relevant = True
break
if is_relevant:
references.append(LegalReference(
law=law_name,
paragraph=para_nr,
title=para_title[:100],
summary=f"{para_title[:150]}",
relevance=f"Relevant für {topic}"
))
return references
# GFK-Prinzipien
GFK_PRINCIPLES = [
GFKPrinciple(
principle="Beobachtung",
description="Konkrete Handlungen beschreiben ohne Bewertung oder Interpretation",
example="'Ich habe bemerkt, dass Max in den letzten zwei Wochen dreimal ohne Hausaufgaben kam.' statt 'Max ist faul.'"
),
GFKPrinciple(
principle="Gefühle",
description="Eigene Gefühle ausdrücken (Ich-Botschaften)",
example="'Ich mache mir Sorgen...' statt 'Sie müssen endlich...'"
),
GFKPrinciple(
principle="Bedürfnisse",
description="Dahinterliegende Bedürfnisse benennen",
example="'Mir ist wichtig, dass Max sein Potential entfalten kann.' statt 'Sie müssen mehr kontrollieren.'"
),
GFKPrinciple(
principle="Bitten",
description="Konkrete, erfüllbare Bitten formulieren",
example="'Wären Sie bereit, täglich die Hausaufgaben zu prüfen?' statt 'Tun Sie endlich etwas!'"
),
]
# Kommunikationsvorlagen
COMMUNICATION_TEMPLATES: Dict[CommunicationType, Dict[str, str]] = {
CommunicationType.GENERAL_INFO: {
"subject": "Information: {topic}",
"opening": "Sehr geehrte/r {parent_name},\n\nich möchte Sie über folgendes informieren:",
"closing": "Bei Fragen stehe ich Ihnen gerne zur Verfügung.\n\nMit freundlichen Grüßen",
},
CommunicationType.BEHAVIOR: {
"subject": "Gesprächswunsch: {student_name}",
"opening": "Sehr geehrte/r {parent_name},\n\nich wende mich heute an Sie, da mir das Wohlergehen von {student_name} sehr am Herzen liegt.",
"closing": "Ich bin überzeugt, dass wir gemeinsam eine gute Lösung finden können. Ich würde mich über ein Gespräch freuen.\n\nMit freundlichen Grüßen",
},
CommunicationType.ACADEMIC: {
"subject": "Schulische Entwicklung: {student_name}",
"opening": "Sehr geehrte/r {parent_name},\n\nich möchte Sie über die schulische Entwicklung von {student_name} informieren.",
"closing": "Ich würde mich freuen, wenn wir gemeinsam überlegen könnten, wie wir {student_name} optimal unterstützen können.\n\nMit freundlichen Grüßen",
},
CommunicationType.ATTENDANCE: {
"subject": "Fehlzeiten: {student_name}",
"opening": "Sehr geehrte/r {parent_name},\n\nich wende mich an Sie bezüglich der Anwesenheit von {student_name}.",
"closing": "Gemäß {legal_reference} sind regelmäßige Fehlzeiten meldepflichtig. Ich bin sicher, dass wir gemeinsam eine Lösung finden.\n\nMit freundlichen Grüßen",
},
CommunicationType.MEETING_INVITE: {
"subject": "Einladung zum Elterngespräch",
"opening": "Sehr geehrte/r {parent_name},\n\nich würde mich freuen, Sie zu einem persönlichen Gespräch einzuladen.",
"closing": "Bitte teilen Sie mir mit, ob einer der vorgeschlagenen Termine für Sie passt, oder nennen Sie mir einen Alternativtermin.\n\nMit freundlichen Grüßen",
},
CommunicationType.POSITIVE_FEEDBACK: {
"subject": "Positive Rückmeldung: {student_name}",
"opening": "Sehr geehrte/r {parent_name},\n\nich freue mich, Ihnen heute eine erfreuliche Nachricht mitteilen zu können.",
"closing": "Ich freue mich, {student_name} auf diesem positiven Weg weiter begleiten zu dürfen.\n\nMit herzlichen Grüßen",
},
CommunicationType.CONCERN: {
"subject": "Gemeinsame Sorge: {student_name}",
"opening": "Sehr geehrte/r {parent_name},\n\nich wende mich heute an Sie, weil mir etwas aufgefallen ist, das ich gerne mit Ihnen besprechen würde.",
"closing": "Ich bin überzeugt, dass wir im Sinne von {student_name} gemeinsam eine gute Lösung finden werden.\n\nMit freundlichen Grüßen",
},
CommunicationType.CONFLICT: {
"subject": "Bitte um ein klärendes Gespräch",
"opening": "Sehr geehrte/r {parent_name},\n\nich möchte das Gespräch mit Ihnen suchen, da mir eine konstruktive Zusammenarbeit sehr wichtig ist.",
"closing": "Mir liegt eine gute Kooperation zum Wohl von {student_name} am Herzen. Ich bin überzeugt, dass wir im Dialog eine für alle Seiten gute Lösung finden können.\n\nMit freundlichen Grüßen",
},
CommunicationType.SPECIAL_NEEDS: {
"subject": "Förderung: {student_name}",
"opening": "Sehr geehrte/r {parent_name},\n\nich möchte mit Ihnen über die individuelle Förderung von {student_name} sprechen.",
"closing": "Gemäß dem Bildungsauftrag ({legal_reference}) ist es uns ein besonderes Anliegen, jedes Kind optimal zu fördern. Lassen Sie uns gemeinsam überlegen, wie wir {student_name} bestmöglich unterstützen können.\n\nMit freundlichen Grüßen",
},
}
class CommunicationService:
"""
Service zur Unterstützung von Lehrer-Eltern-Kommunikation.
Generiert professionelle, rechtlich fundierte und empathische Nachrichten
basierend auf den Prinzipien der gewaltfreien Kommunikation.
Rechtliche Referenzen werden dynamisch aus der DB geladen (via Legal Crawler API).
"""
def __init__(self):
self.fallback_references = FALLBACK_LEGAL_REFERENCES
self.gfk_principles = GFK_PRINCIPLES
self.templates = COMMUNICATION_TEMPLATES
# Cache für DB-Referenzen (um wiederholte API-Calls zu vermeiden)
self._cached_references: Dict[str, List[LegalReference]] = {}
async def get_legal_references_async(
self,
state: str,
topic: str
) -> List[LegalReference]:
"""
Gibt relevante rechtliche Referenzen für ein Bundesland und Thema zurück.
Lädt aus DB via Legal Crawler API.
Args:
state: Bundesland-Kürzel (z.B. "NRW", "BY", "NW")
topic: Themenbereich (z.B. "elternpflichten", "schulpflicht")
Returns:
Liste relevanter LegalReference-Objekte
"""
cache_key = f"{state}:{topic}"
# Cache prüfen
if cache_key in self._cached_references:
return self._cached_references[cache_key]
# Aus DB laden
db_docs = await fetch_legal_references_from_db(state)
if db_docs:
# DB-Dokumente in LegalReference konvertieren
references = parse_db_references_to_legal_refs(db_docs, topic)
if references:
self._cached_references[cache_key] = references
return references
# Fallback wenn DB leer
logger.info(f"Keine DB-Referenzen für {state}/{topic}, nutze Fallback")
return self._get_fallback_references(state, topic)
def get_legal_references(
self,
state: str,
topic: str
) -> List[LegalReference]:
"""
Synchrone Methode für Rückwärtskompatibilität.
Nutzt nur Fallback-Referenzen (für non-async Kontexte).
Für dynamische DB-Referenzen bitte get_legal_references_async() verwenden.
"""
return self._get_fallback_references(state, topic)
def _get_fallback_references(
self,
state: str,
topic: str
) -> List[LegalReference]:
"""Gibt Fallback-Referenzen zurück."""
state_refs = self.fallback_references.get("DEFAULT", {})
if topic in state_refs:
return [state_refs[topic]]
return list(state_refs.values())
def get_gfk_guidance(
self,
comm_type: CommunicationType
) -> List[GFKPrinciple]:
"""
Gibt GFK-Leitlinien für einen Kommunikationstyp zurück.
"""
return self.gfk_principles
def get_template(
self,
comm_type: CommunicationType
) -> Dict[str, str]:
"""
Gibt die Vorlage für einen Kommunikationstyp zurück.
"""
return self.templates.get(comm_type, self.templates[CommunicationType.GENERAL_INFO])
def build_system_prompt(
self,
comm_type: CommunicationType,
state: str,
tone: CommunicationTone
) -> str:
"""
Erstellt den System-Prompt für die KI-gestützte Nachrichtengenerierung.
Args:
comm_type: Art der Kommunikation
state: Bundesland für rechtliche Referenzen
tone: Gewünschte Tonalität
Returns:
System-Prompt für LLM
"""
# Rechtliche Referenzen sammeln
topic_map = {
CommunicationType.ATTENDANCE: "schulpflicht",
CommunicationType.BEHAVIOR: "ordnungsmassnahmen",
CommunicationType.ACADEMIC: "foerderung",
CommunicationType.SPECIAL_NEEDS: "foerderung",
CommunicationType.CONCERN: "elternpflichten",
CommunicationType.CONFLICT: "elternpflichten",
}
topic = topic_map.get(comm_type, "elternpflichten")
legal_refs = self.get_legal_references(state, topic)
legal_context = ""
if legal_refs:
legal_context = "\n\nRechtliche Grundlagen:\n"
for ref in legal_refs:
legal_context += f"- {ref.law} {ref.paragraph} ({ref.title}): {ref.summary}\n"
# Tonalität beschreiben
tone_descriptions = {
CommunicationTone.FORMAL: "Verwende eine sehr formelle, sachliche Sprache.",
CommunicationTone.PROFESSIONAL: "Verwende eine professionelle, aber freundliche Sprache.",
CommunicationTone.WARM: "Verwende eine warmherzige, einladende Sprache.",
CommunicationTone.CONCERNED: "Drücke aufrichtige Sorge und Empathie aus.",
CommunicationTone.APPRECIATIVE: "Betone Wertschätzung und positives Feedback.",
}
tone_desc = tone_descriptions.get(tone, tone_descriptions[CommunicationTone.PROFESSIONAL])
system_prompt = f"""Du bist ein erfahrener Kommunikationsberater für Lehrkräfte im deutschen Schulsystem.
Deine Aufgabe ist es, professionelle, empathische und rechtlich fundierte Elternbriefe zu verfassen.
GRUNDPRINZIPIEN (Gewaltfreie Kommunikation nach Marshall Rosenberg):
1. BEOBACHTUNG: Beschreibe konkrete Handlungen ohne Bewertung
Beispiel: "Ich habe bemerkt, dass..." statt "Das Kind ist..."
2. GEFÜHLE: Drücke Gefühle als Ich-Botschaften aus
Beispiel: "Ich mache mir Sorgen..." statt "Sie müssen..."
3. BEDÜRFNISSE: Benenne dahinterliegende Bedürfnisse
Beispiel: "Mir ist wichtig, dass..." statt "Sie sollten..."
4. BITTEN: Formuliere konkrete, erfüllbare Bitten
Beispiel: "Wären Sie bereit, ...?" statt "Tun Sie endlich...!"
WICHTIGE REGELN:
- Immer die Würde aller Beteiligten wahren
- Keine Schuldzuweisungen oder Vorwürfe
- Lösungsorientiert statt problemfokussiert
- Auf Augenhöhe kommunizieren
- Kooperation statt Konfrontation
- Deutsche Sprache, förmliche Anrede (Sie)
- Sachlich, aber empathisch
{legal_context}
TONALITÄT:
{tone_desc}
FORMAT:
- Verfasse den Brief als vollständigen, versandfertigen Text
- Beginne mit der Anrede
- Strukturiere den Inhalt klar und verständlich
- Schließe mit einer freundlichen Grußformel
- Die Signatur (Name der Lehrkraft) wird später hinzugefügt
WICHTIG: Der Brief soll professionell und rechtlich einwandfrei sein, aber gleichzeitig
menschlich und einladend wirken. Ziel ist immer eine konstruktive Zusammenarbeit."""
return system_prompt
def build_user_prompt(
self,
comm_type: CommunicationType,
context: Dict[str, Any]
) -> str:
"""
Erstellt den User-Prompt aus dem Kontext.
Args:
comm_type: Art der Kommunikation
context: Kontextinformationen (student_name, parent_name, situation, etc.)
Returns:
User-Prompt für LLM
"""
student_name = context.get("student_name", "das Kind")
parent_name = context.get("parent_name", "Frau/Herr")
situation = context.get("situation", "")
additional_info = context.get("additional_info", "")
type_descriptions = {
CommunicationType.GENERAL_INFO: "eine allgemeine Information",
CommunicationType.BEHAVIOR: "ein Verhalten, das besprochen werden sollte",
CommunicationType.ACADEMIC: "die schulische Entwicklung",
CommunicationType.ATTENDANCE: "Fehlzeiten oder Anwesenheitsprobleme",
CommunicationType.MEETING_INVITE: "eine Einladung zum Elterngespräch",
CommunicationType.POSITIVE_FEEDBACK: "positives Feedback",
CommunicationType.CONCERN: "eine Sorge oder ein Anliegen",
CommunicationType.CONFLICT: "eine konflikthafte Situation",
CommunicationType.SPECIAL_NEEDS: "Förderbedarf oder besondere Unterstützung",
}
type_desc = type_descriptions.get(comm_type, "ein Anliegen")
user_prompt = f"""Schreibe einen Elternbrief zu folgendem Anlass: {type_desc}
Schülername: {student_name}
Elternname: {parent_name}
Situation:
{situation}
"""
if additional_info:
user_prompt += f"\nZusätzliche Informationen:\n{additional_info}\n"
user_prompt += """
Bitte verfasse einen professionellen, empathischen Brief nach den GFK-Prinzipien.
Der Brief sollte:
- Die Situation sachlich beschreiben (Beobachtung)
- Verständnis und Sorge ausdrücken (Gefühle)
- Das gemeinsame Ziel betonen (Bedürfnisse)
- Einen konstruktiven Vorschlag machen (Bitte)
"""
return user_prompt
def validate_communication(self, text: str) -> Dict[str, Any]:
"""
Validiert eine generierte Kommunikation auf GFK-Konformität.
Args:
text: Der zu prüfende Text
Returns:
Validierungsergebnis mit Verbesserungsvorschlägen
"""
issues = []
suggestions = []
# Prüfe auf problematische Formulierungen
problematic_patterns = [
("Sie müssen", "Vorschlag: 'Wären Sie bereit, ...' oder 'Ich bitte Sie, ...'"),
("Sie sollten", "Vorschlag: 'Ich würde mir wünschen, ...'"),
("Das Kind ist", "Vorschlag: 'Ich habe beobachtet, dass ...'"),
("immer", "Vorsicht bei Verallgemeinerungen - besser konkrete Beispiele"),
("nie", "Vorsicht bei Verallgemeinerungen - besser konkrete Beispiele"),
("faul", "Vorschlag: Verhalten konkret beschreiben statt bewerten"),
("unverschämt", "Vorschlag: Verhalten konkret beschreiben statt bewerten"),
("respektlos", "Vorschlag: Verhalten konkret beschreiben statt bewerten"),
]
for pattern, suggestion in problematic_patterns:
if pattern.lower() in text.lower():
issues.append(f"Problematische Formulierung gefunden: '{pattern}'")
suggestions.append(suggestion)
# Prüfe auf positive Elemente
positive_elements = []
positive_patterns = [
("Ich habe bemerkt", "Gute Beobachtung"),
("Ich möchte", "Gute Ich-Botschaft"),
("gemeinsam", "Gute Kooperationsorientierung"),
("wichtig", "Gutes Bedürfnis-Statement"),
("freuen", "Positive Tonalität"),
("Wären Sie bereit", "Gute Bitte-Formulierung"),
]
for pattern, feedback in positive_patterns:
if pattern.lower() in text.lower():
positive_elements.append(feedback)
return {
"is_valid": len(issues) == 0,
"issues": issues,
"suggestions": suggestions,
"positive_elements": positive_elements,
"gfk_score": max(0, 100 - len(issues) * 15 + len(positive_elements) * 10) / 100
}
def get_all_communication_types(self) -> List[Dict[str, str]]:
"""Gibt alle verfügbaren Kommunikationstypen zurück."""
return [
{"value": ct.value, "label": self._get_type_label(ct)}
for ct in CommunicationType
]
def _get_type_label(self, ct: CommunicationType) -> str:
"""Gibt das deutsche Label für einen Kommunikationstyp zurück."""
labels = {
CommunicationType.GENERAL_INFO: "Allgemeine Information",
CommunicationType.BEHAVIOR: "Verhalten/Disziplin",
CommunicationType.ACADEMIC: "Schulleistungen",
CommunicationType.ATTENDANCE: "Fehlzeiten",
CommunicationType.MEETING_INVITE: "Einladung zum Gespräch",
CommunicationType.POSITIVE_FEEDBACK: "Positives Feedback",
CommunicationType.CONCERN: "Bedenken äußern",
CommunicationType.CONFLICT: "Konfliktlösung",
CommunicationType.SPECIAL_NEEDS: "Förderbedarf",
}
return labels.get(ct, ct.value)
def get_all_tones(self) -> List[Dict[str, str]]:
"""Gibt alle verfügbaren Tonalitäten zurück."""
labels = {
CommunicationTone.FORMAL: "Sehr förmlich",
CommunicationTone.PROFESSIONAL: "Professionell-freundlich",
CommunicationTone.WARM: "Warmherzig",
CommunicationTone.CONCERNED: "Besorgt",
CommunicationTone.APPRECIATIVE: "Wertschätzend",
}
return [
{"value": t.value, "label": labels.get(t, t.value)}
for t in CommunicationTone
]
def get_states(self) -> List[Dict[str, str]]:
"""Gibt alle verfügbaren Bundesländer zurück."""
return [
{"value": "NRW", "label": "Nordrhein-Westfalen"},
{"value": "BY", "label": "Bayern"},
{"value": "BW", "label": "Baden-Württemberg"},
{"value": "NI", "label": "Niedersachsen"},
{"value": "HE", "label": "Hessen"},
{"value": "SN", "label": "Sachsen"},
{"value": "RP", "label": "Rheinland-Pfalz"},
{"value": "SH", "label": "Schleswig-Holstein"},
{"value": "BE", "label": "Berlin"},
{"value": "BB", "label": "Brandenburg"},
{"value": "MV", "label": "Mecklenburg-Vorpommern"},
{"value": "ST", "label": "Sachsen-Anhalt"},
{"value": "TH", "label": "Thüringen"},
{"value": "HH", "label": "Hamburg"},
{"value": "HB", "label": "Bremen"},
{"value": "SL", "label": "Saarland"},
]
# Singleton-Instanz
_communication_service: Optional[CommunicationService] = None
def get_communication_service() -> CommunicationService:
"""Gibt die Singleton-Instanz des CommunicationService zurück."""
global _communication_service
if _communication_service is None:
_communication_service = CommunicationService()
return _communication_service

View File

@@ -0,0 +1,522 @@
"""
Inference Service - Kommunikation mit LLM Backends.
Unterstützt:
- Ollama (lokal)
- vLLM (remote, OpenAI-kompatibel)
- Anthropic Claude API (Fallback)
"""
import httpx
import json
import logging
from typing import AsyncIterator, Optional
from dataclasses import dataclass
from ..config import get_config, LLMBackendConfig
from ..models.chat import (
ChatCompletionRequest,
ChatCompletionResponse,
ChatCompletionChunk,
ChatMessage,
ChatChoice,
StreamChoice,
ChatChoiceDelta,
Usage,
ModelInfo,
ModelListResponse,
)
logger = logging.getLogger(__name__)
@dataclass
class InferenceResult:
"""Ergebnis einer Inference-Anfrage."""
content: str
model: str
backend: str
usage: Optional[Usage] = None
finish_reason: str = "stop"
class InferenceService:
"""Service für LLM Inference über verschiedene Backends."""
def __init__(self):
self.config = get_config()
self._client: Optional[httpx.AsyncClient] = None
async def get_client(self) -> httpx.AsyncClient:
"""Lazy initialization des HTTP Clients."""
if self._client is None:
self._client = httpx.AsyncClient(timeout=120.0)
return self._client
async def close(self):
"""Schließt den HTTP Client."""
if self._client:
await self._client.aclose()
self._client = None
def _get_available_backend(self, preferred_model: Optional[str] = None) -> Optional[LLMBackendConfig]:
"""Findet das erste verfügbare Backend basierend auf Priorität."""
for backend_name in self.config.backend_priority:
backend = getattr(self.config, backend_name, None)
if backend and backend.enabled:
return backend
return None
def _map_model_to_backend(self, model: str) -> tuple[str, LLMBackendConfig]:
"""
Mapped ein Modell-Name zum entsprechenden Backend.
Beispiele:
- "breakpilot-teacher-8b" → Ollama/vLLM mit llama3.1:8b
- "claude-3-5-sonnet" → Anthropic
"""
model_lower = model.lower()
# Explizite Claude-Modelle → Anthropic
if "claude" in model_lower:
if self.config.anthropic and self.config.anthropic.enabled:
return self.config.anthropic.default_model, self.config.anthropic
raise ValueError("Anthropic backend not configured")
# BreakPilot Modelle → primäres Backend
if "breakpilot" in model_lower or "teacher" in model_lower:
backend = self._get_available_backend()
if backend:
# Map zu tatsächlichem Modell-Namen
if "70b" in model_lower:
actual_model = "llama3.1:70b" if backend.name == "ollama" else "meta-llama/Meta-Llama-3.1-70B-Instruct"
else:
actual_model = "llama3.1:8b" if backend.name == "ollama" else "meta-llama/Meta-Llama-3.1-8B-Instruct"
return actual_model, backend
raise ValueError("No LLM backend available")
# Mistral Modelle
if "mistral" in model_lower:
backend = self._get_available_backend()
if backend:
actual_model = "mistral:7b" if backend.name == "ollama" else "mistralai/Mistral-7B-Instruct-v0.2"
return actual_model, backend
raise ValueError("No LLM backend available")
# Fallback: verwende Modell-Name direkt
backend = self._get_available_backend()
if backend:
return model, backend
raise ValueError("No LLM backend available")
async def _call_ollama(
self,
backend: LLMBackendConfig,
model: str,
request: ChatCompletionRequest,
) -> InferenceResult:
"""Ruft Ollama API auf (nicht OpenAI-kompatibel)."""
client = await self.get_client()
# Ollama verwendet eigenes Format
messages = [{"role": m.role, "content": m.content or ""} for m in request.messages]
payload = {
"model": model,
"messages": messages,
"stream": False,
"options": {
"temperature": request.temperature,
"top_p": request.top_p,
},
}
if request.max_tokens:
payload["options"]["num_predict"] = request.max_tokens
response = await client.post(
f"{backend.base_url}/api/chat",
json=payload,
timeout=backend.timeout,
)
response.raise_for_status()
data = response.json()
return InferenceResult(
content=data.get("message", {}).get("content", ""),
model=model,
backend="ollama",
usage=Usage(
prompt_tokens=data.get("prompt_eval_count", 0),
completion_tokens=data.get("eval_count", 0),
total_tokens=data.get("prompt_eval_count", 0) + data.get("eval_count", 0),
),
finish_reason="stop" if data.get("done") else "length",
)
async def _stream_ollama(
self,
backend: LLMBackendConfig,
model: str,
request: ChatCompletionRequest,
response_id: str,
) -> AsyncIterator[ChatCompletionChunk]:
"""Streamt von Ollama."""
client = await self.get_client()
messages = [{"role": m.role, "content": m.content or ""} for m in request.messages]
payload = {
"model": model,
"messages": messages,
"stream": True,
"options": {
"temperature": request.temperature,
"top_p": request.top_p,
},
}
if request.max_tokens:
payload["options"]["num_predict"] = request.max_tokens
async with client.stream(
"POST",
f"{backend.base_url}/api/chat",
json=payload,
timeout=backend.timeout,
) as response:
response.raise_for_status()
async for line in response.aiter_lines():
if not line:
continue
try:
data = json.loads(line)
content = data.get("message", {}).get("content", "")
done = data.get("done", False)
yield ChatCompletionChunk(
id=response_id,
model=model,
choices=[
StreamChoice(
index=0,
delta=ChatChoiceDelta(content=content),
finish_reason="stop" if done else None,
)
],
)
except json.JSONDecodeError:
continue
async def _call_openai_compatible(
self,
backend: LLMBackendConfig,
model: str,
request: ChatCompletionRequest,
) -> InferenceResult:
"""Ruft OpenAI-kompatible API auf (vLLM, etc.)."""
client = await self.get_client()
headers = {"Content-Type": "application/json"}
if backend.api_key:
headers["Authorization"] = f"Bearer {backend.api_key}"
payload = {
"model": model,
"messages": [m.model_dump(exclude_none=True) for m in request.messages],
"stream": False,
"temperature": request.temperature,
"top_p": request.top_p,
}
if request.max_tokens:
payload["max_tokens"] = request.max_tokens
if request.stop:
payload["stop"] = request.stop
response = await client.post(
f"{backend.base_url}/v1/chat/completions",
json=payload,
headers=headers,
timeout=backend.timeout,
)
response.raise_for_status()
data = response.json()
choice = data.get("choices", [{}])[0]
usage_data = data.get("usage", {})
return InferenceResult(
content=choice.get("message", {}).get("content", ""),
model=model,
backend=backend.name,
usage=Usage(
prompt_tokens=usage_data.get("prompt_tokens", 0),
completion_tokens=usage_data.get("completion_tokens", 0),
total_tokens=usage_data.get("total_tokens", 0),
),
finish_reason=choice.get("finish_reason", "stop"),
)
async def _stream_openai_compatible(
self,
backend: LLMBackendConfig,
model: str,
request: ChatCompletionRequest,
response_id: str,
) -> AsyncIterator[ChatCompletionChunk]:
"""Streamt von OpenAI-kompatibler API."""
client = await self.get_client()
headers = {"Content-Type": "application/json"}
if backend.api_key:
headers["Authorization"] = f"Bearer {backend.api_key}"
payload = {
"model": model,
"messages": [m.model_dump(exclude_none=True) for m in request.messages],
"stream": True,
"temperature": request.temperature,
"top_p": request.top_p,
}
if request.max_tokens:
payload["max_tokens"] = request.max_tokens
async with client.stream(
"POST",
f"{backend.base_url}/v1/chat/completions",
json=payload,
headers=headers,
timeout=backend.timeout,
) as response:
response.raise_for_status()
async for line in response.aiter_lines():
if not line or not line.startswith("data: "):
continue
data_str = line[6:] # Remove "data: " prefix
if data_str == "[DONE]":
break
try:
data = json.loads(data_str)
choice = data.get("choices", [{}])[0]
delta = choice.get("delta", {})
yield ChatCompletionChunk(
id=response_id,
model=model,
choices=[
StreamChoice(
index=0,
delta=ChatChoiceDelta(
role=delta.get("role"),
content=delta.get("content"),
),
finish_reason=choice.get("finish_reason"),
)
],
)
except json.JSONDecodeError:
continue
async def _call_anthropic(
self,
backend: LLMBackendConfig,
model: str,
request: ChatCompletionRequest,
) -> InferenceResult:
"""Ruft Anthropic Claude API auf."""
# Anthropic SDK verwenden (bereits installiert)
try:
import anthropic
except ImportError:
raise ImportError("anthropic package required for Claude API")
client = anthropic.AsyncAnthropic(api_key=backend.api_key)
# System message extrahieren
system_content = ""
messages = []
for msg in request.messages:
if msg.role == "system":
system_content += (msg.content or "") + "\n"
else:
messages.append({"role": msg.role, "content": msg.content or ""})
response = await client.messages.create(
model=model,
max_tokens=request.max_tokens or 4096,
system=system_content.strip() if system_content else None,
messages=messages,
temperature=request.temperature,
top_p=request.top_p,
)
content = ""
if response.content:
content = response.content[0].text if response.content[0].type == "text" else ""
return InferenceResult(
content=content,
model=model,
backend="anthropic",
usage=Usage(
prompt_tokens=response.usage.input_tokens,
completion_tokens=response.usage.output_tokens,
total_tokens=response.usage.input_tokens + response.usage.output_tokens,
),
finish_reason="stop" if response.stop_reason == "end_turn" else response.stop_reason or "stop",
)
async def _stream_anthropic(
self,
backend: LLMBackendConfig,
model: str,
request: ChatCompletionRequest,
response_id: str,
) -> AsyncIterator[ChatCompletionChunk]:
"""Streamt von Anthropic Claude API."""
try:
import anthropic
except ImportError:
raise ImportError("anthropic package required for Claude API")
client = anthropic.AsyncAnthropic(api_key=backend.api_key)
# System message extrahieren
system_content = ""
messages = []
for msg in request.messages:
if msg.role == "system":
system_content += (msg.content or "") + "\n"
else:
messages.append({"role": msg.role, "content": msg.content or ""})
async with client.messages.stream(
model=model,
max_tokens=request.max_tokens or 4096,
system=system_content.strip() if system_content else None,
messages=messages,
temperature=request.temperature,
top_p=request.top_p,
) as stream:
async for text in stream.text_stream:
yield ChatCompletionChunk(
id=response_id,
model=model,
choices=[
StreamChoice(
index=0,
delta=ChatChoiceDelta(content=text),
finish_reason=None,
)
],
)
# Final chunk with finish_reason
yield ChatCompletionChunk(
id=response_id,
model=model,
choices=[
StreamChoice(
index=0,
delta=ChatChoiceDelta(),
finish_reason="stop",
)
],
)
async def complete(self, request: ChatCompletionRequest) -> ChatCompletionResponse:
"""
Führt Chat Completion durch (non-streaming).
"""
actual_model, backend = self._map_model_to_backend(request.model)
logger.info(f"Inference request: model={request.model}{actual_model} via {backend.name}")
if backend.name == "ollama":
result = await self._call_ollama(backend, actual_model, request)
elif backend.name == "anthropic":
result = await self._call_anthropic(backend, actual_model, request)
else:
result = await self._call_openai_compatible(backend, actual_model, request)
return ChatCompletionResponse(
model=request.model, # Original requested model name
choices=[
ChatChoice(
index=0,
message=ChatMessage(role="assistant", content=result.content),
finish_reason=result.finish_reason,
)
],
usage=result.usage,
)
async def stream(self, request: ChatCompletionRequest) -> AsyncIterator[ChatCompletionChunk]:
"""
Führt Chat Completion mit Streaming durch.
"""
import uuid
response_id = f"chatcmpl-{uuid.uuid4().hex[:12]}"
actual_model, backend = self._map_model_to_backend(request.model)
logger.info(f"Streaming request: model={request.model}{actual_model} via {backend.name}")
if backend.name == "ollama":
async for chunk in self._stream_ollama(backend, actual_model, request, response_id):
yield chunk
elif backend.name == "anthropic":
async for chunk in self._stream_anthropic(backend, actual_model, request, response_id):
yield chunk
else:
async for chunk in self._stream_openai_compatible(backend, actual_model, request, response_id):
yield chunk
async def list_models(self) -> ModelListResponse:
"""Listet verfügbare Modelle."""
models = []
# BreakPilot Modelle (mapped zu verfügbaren Backends)
backend = self._get_available_backend()
if backend:
models.extend([
ModelInfo(
id="breakpilot-teacher-8b",
owned_by="breakpilot",
description="Llama 3.1 8B optimiert für Schulkontext",
context_length=8192,
),
ModelInfo(
id="breakpilot-teacher-70b",
owned_by="breakpilot",
description="Llama 3.1 70B für komplexe Aufgaben",
context_length=8192,
),
])
# Claude Modelle (wenn Anthropic konfiguriert)
if self.config.anthropic and self.config.anthropic.enabled:
models.append(
ModelInfo(
id="claude-3-5-sonnet",
owned_by="anthropic",
description="Claude 3.5 Sonnet - Fallback für höchste Qualität",
context_length=200000,
)
)
return ModelListResponse(data=models)
# Singleton
_inference_service: Optional[InferenceService] = None
def get_inference_service() -> InferenceService:
"""Gibt den Inference Service Singleton zurück."""
global _inference_service
if _inference_service is None:
_inference_service = InferenceService()
return _inference_service

View File

@@ -0,0 +1,290 @@
"""
Legal Content Crawler Service.
Crawlt Schulgesetze und rechtliche Inhalte von den Seed-URLs
und speichert sie in der Datenbank für den Communication-Service.
"""
import asyncio
import hashlib
import logging
import re
from datetime import datetime
from typing import Dict, List, Optional
from dataclasses import dataclass
import httpx
from bs4 import BeautifulSoup
logger = logging.getLogger(__name__)
@dataclass
class CrawledDocument:
"""Repräsentiert ein gecrawltes Dokument."""
url: str
canonical_url: Optional[str]
title: str
content: str
content_hash: str
category: str
doc_type: str
state: Optional[str]
law_name: Optional[str]
paragraphs: Optional[List[Dict]]
trust_score: float
class LegalCrawler:
"""Crawler für rechtliche Bildungsinhalte."""
def __init__(self, db_pool=None):
self.db_pool = db_pool
self.user_agent = "BreakPilot-Crawler/1.0 (Educational Purpose)"
self.timeout = 30.0
self.rate_limit_delay = 1.0 # Sekunden zwischen Requests
async def crawl_url(self, url: str, seed_info: Dict) -> Optional[CrawledDocument]:
"""
Crawlt eine URL und extrahiert den Inhalt.
Args:
url: Die zu crawlende URL
seed_info: Metadaten vom Seed (category, state, trust_boost)
Returns:
CrawledDocument oder None bei Fehler
"""
try:
async with httpx.AsyncClient(
follow_redirects=True,
timeout=self.timeout,
headers={"User-Agent": self.user_agent}
) as client:
response = await client.get(url)
if response.status_code != 200:
logger.warning(f"HTTP {response.status_code} für {url}")
return None
content_type = response.headers.get("content-type", "")
# PDF-Handling (für Saarland etc.)
if "pdf" in content_type.lower():
return await self._process_pdf(response, url, seed_info)
# HTML-Handling
if "html" in content_type.lower():
return await self._process_html(response, url, seed_info)
logger.warning(f"Unbekannter Content-Type: {content_type} für {url}")
return None
except Exception as e:
logger.error(f"Fehler beim Crawlen von {url}: {e}")
return None
async def _process_html(
self,
response: httpx.Response,
url: str,
seed_info: Dict
) -> Optional[CrawledDocument]:
"""Verarbeitet HTML-Inhalte."""
html = response.text
soup = BeautifulSoup(html, "html.parser")
# Titel extrahieren
title = ""
title_tag = soup.find("title")
if title_tag:
title = title_tag.get_text(strip=True)
# Haupt-Content extrahieren (verschiedene Strategien)
content = ""
# Strategie 1: main oder article Tag
main = soup.find("main") or soup.find("article")
if main:
content = main.get_text(separator="\n", strip=True)
else:
# Strategie 2: Body ohne Navigation etc.
for tag in soup.find_all(["nav", "header", "footer", "aside", "script", "style"]):
tag.decompose()
body = soup.find("body")
if body:
content = body.get_text(separator="\n", strip=True)
if not content:
return None
# Paragraphen extrahieren (für Schulgesetze)
paragraphs = self._extract_paragraphs(soup, content)
# Law name ermitteln
law_name = seed_info.get("name", "")
if not law_name and title:
# Aus Titel extrahieren
law_patterns = [
r"(SchulG\s+\w+)",
r"(Schulgesetz\s+\w+)",
r"(BayEUG)",
r"(\w+SchulG)",
]
for pattern in law_patterns:
match = re.search(pattern, title)
if match:
law_name = match.group(1)
break
# Content Hash berechnen
content_hash = hashlib.sha256(content.encode()).hexdigest()[:64]
return CrawledDocument(
url=url,
canonical_url=str(response.url),
title=title,
content=content[:100000], # Max 100k Zeichen
content_hash=content_hash,
category=seed_info.get("category", "legal"),
doc_type="schulgesetz",
state=seed_info.get("state"),
law_name=law_name,
paragraphs=paragraphs,
trust_score=seed_info.get("trust_boost", 0.9),
)
async def _process_pdf(
self,
response: httpx.Response,
url: str,
seed_info: Dict
) -> Optional[CrawledDocument]:
"""Verarbeitet PDF-Inhalte (Placeholder - benötigt PDF-Library)."""
# TODO: PDF-Extraktion mit PyPDF2 oder pdfplumber
logger.info(f"PDF erkannt: {url} - PDF-Extraktion noch nicht implementiert")
return None
def _extract_paragraphs(
self,
soup: BeautifulSoup,
content: str
) -> Optional[List[Dict]]:
"""
Extrahiert Paragraphen aus Gesetzestexten.
Sucht nach Mustern wie:
- § 42 Titel
- Paragraph 42
"""
paragraphs = []
# Pattern für Paragraphen
paragraph_pattern = r"\s*\d+[a-z]?)\s*([^\n§]+)"
matches = re.findall(paragraph_pattern, content, re.MULTILINE)
for nr, title in matches[:50]: # Max 50 Paragraphen
paragraphs.append({
"nr": nr.strip(),
"title": title.strip()[:200],
})
return paragraphs if paragraphs else None
async def crawl_legal_seeds(self, db_pool) -> Dict:
"""
Crawlt alle Seeds der Kategorie 'legal'.
Returns:
Statistik über gecrawlte Dokumente
"""
stats = {
"total": 0,
"success": 0,
"failed": 0,
"skipped": 0,
}
# Seeds aus DB laden
async with db_pool.acquire() as conn:
seeds = await conn.fetch("""
SELECT s.id, s.url, s.name, s.state, s.trust_boost,
c.name as category
FROM edu_search_seeds s
LEFT JOIN edu_search_categories c ON s.category_id = c.id
WHERE c.name = 'legal' AND s.enabled = true
""")
stats["total"] = len(seeds)
logger.info(f"Crawle {len(seeds)} Legal-Seeds...")
for seed in seeds:
# Rate Limiting
await asyncio.sleep(self.rate_limit_delay)
seed_info = {
"name": seed["name"],
"state": seed["state"],
"trust_boost": seed["trust_boost"],
"category": seed["category"],
}
doc = await self.crawl_url(seed["url"], seed_info)
if doc:
# In DB speichern
try:
await conn.execute("""
INSERT INTO edu_search_documents
(url, canonical_url, title, content, content_hash,
category, doc_type, state, law_name, paragraphs,
trust_score, seed_id, last_crawled_at)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10::jsonb, $11, $12, NOW())
ON CONFLICT (url) DO UPDATE SET
title = EXCLUDED.title,
content = EXCLUDED.content,
content_hash = EXCLUDED.content_hash,
paragraphs = EXCLUDED.paragraphs,
last_crawled_at = NOW(),
content_updated_at = CASE
WHEN edu_search_documents.content_hash != EXCLUDED.content_hash
THEN NOW()
ELSE edu_search_documents.content_updated_at
END
""",
doc.url, doc.canonical_url, doc.title, doc.content,
doc.content_hash, doc.category, doc.doc_type, doc.state,
doc.law_name,
str(doc.paragraphs) if doc.paragraphs else None,
doc.trust_score, seed["id"]
)
stats["success"] += 1
logger.info(f"✓ Gecrawlt: {doc.title[:50]}...")
except Exception as e:
logger.error(f"DB-Fehler für {doc.url}: {e}")
stats["failed"] += 1
else:
stats["failed"] += 1
# Seed-Status aktualisieren
await conn.execute("""
UPDATE edu_search_seeds
SET last_crawled_at = NOW(),
last_crawl_status = $1
WHERE id = $2
""", "success" if doc else "failed", seed["id"])
logger.info(f"Crawl abgeschlossen: {stats}")
return stats
# Singleton-Instanz
_crawler_instance: Optional[LegalCrawler] = None
def get_legal_crawler() -> LegalCrawler:
"""Gibt die Singleton-Instanz des Legal Crawlers zurück."""
global _crawler_instance
if _crawler_instance is None:
_crawler_instance = LegalCrawler()
return _crawler_instance

View File

@@ -0,0 +1,249 @@
"""
PII Detector Service.
Erkennt und redaktiert personenbezogene Daten (PII) in Texten
bevor sie an externe Services wie Tavily gesendet werden.
"""
import re
from dataclasses import dataclass, field
from typing import Optional
from enum import Enum
class PIIType(Enum):
"""Typen von PII."""
EMAIL = "email"
PHONE = "phone"
IBAN = "iban"
CREDIT_CARD = "credit_card"
SSN = "ssn" # Sozialversicherungsnummer
NAME = "name"
ADDRESS = "address"
DATE_OF_BIRTH = "date_of_birth"
IP_ADDRESS = "ip_address"
@dataclass
class PIIMatch:
"""Ein gefundenes PII-Element."""
type: PIIType
value: str
start: int
end: int
replacement: str
@dataclass
class RedactionResult:
"""Ergebnis der PII-Redaktion."""
original_text: str
redacted_text: str
matches: list[PIIMatch] = field(default_factory=list)
pii_found: bool = False
class PIIDetector:
"""
Service zur Erkennung und Redaktion von PII.
Verwendet Regex-Pattern für deutsche und internationale Formate.
"""
# Regex Patterns für verschiedene PII-Typen
PATTERNS = {
PIIType.EMAIL: r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',
# Deutsche Telefonnummern (verschiedene Formate)
PIIType.PHONE: r'(?:\+49|0049|0)[\s\-/]?(?:\d{2,5})[\s\-/]?(?:\d{3,8})[\s\-/]?(?:\d{0,5})',
# IBAN (deutsch und international)
PIIType.IBAN: r'\b[A-Z]{2}\d{2}[\s]?(?:\d{4}[\s]?){4,7}\d{0,2}\b',
# Kreditkarten (Visa, Mastercard, Amex)
PIIType.CREDIT_CARD: r'\b(?:4\d{3}|5[1-5]\d{2}|3[47]\d{2})[\s\-]?\d{4}[\s\-]?\d{4}[\s\-]?\d{4}\b',
# Deutsche Sozialversicherungsnummer
PIIType.SSN: r'\b\d{2}[\s]?\d{6}[\s]?[A-Z][\s]?\d{3}\b',
# IP-Adressen (IPv4)
PIIType.IP_ADDRESS: r'\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b',
# Geburtsdatum (deutsche Formate)
PIIType.DATE_OF_BIRTH: r'\b(?:0?[1-9]|[12]\d|3[01])\.(?:0?[1-9]|1[0-2])\.(?:19|20)\d{2}\b',
}
# Ersetzungstexte
REPLACEMENTS = {
PIIType.EMAIL: "[EMAIL_REDACTED]",
PIIType.PHONE: "[PHONE_REDACTED]",
PIIType.IBAN: "[IBAN_REDACTED]",
PIIType.CREDIT_CARD: "[CARD_REDACTED]",
PIIType.SSN: "[SSN_REDACTED]",
PIIType.NAME: "[NAME_REDACTED]",
PIIType.ADDRESS: "[ADDRESS_REDACTED]",
PIIType.DATE_OF_BIRTH: "[DOB_REDACTED]",
PIIType.IP_ADDRESS: "[IP_REDACTED]",
}
# Priorität für überlappende Matches (höher = wird bevorzugt)
PRIORITY = {
PIIType.EMAIL: 100,
PIIType.IBAN: 90,
PIIType.CREDIT_CARD: 85,
PIIType.SSN: 80,
PIIType.IP_ADDRESS: 70,
PIIType.DATE_OF_BIRTH: 60,
PIIType.PHONE: 50, # Niedrigere Priorität wegen False Positives
PIIType.NAME: 40,
PIIType.ADDRESS: 30,
}
def __init__(self, enabled_types: Optional[list[PIIType]] = None):
"""
Initialisiert den PII Detector.
Args:
enabled_types: Liste der zu erkennenden PII-Typen.
None = alle Typen aktiviert.
Leere Liste = keine Erkennung.
"""
if enabled_types is not None:
self.enabled_types = enabled_types
else:
self.enabled_types = list(PIIType)
self._compiled_patterns = {
pii_type: re.compile(pattern, re.IGNORECASE)
for pii_type, pattern in self.PATTERNS.items()
if pii_type in self.enabled_types
}
def detect(self, text: str) -> list[PIIMatch]:
"""
Erkennt PII in einem Text.
Bei überlappenden Matches wird der Match mit höherer Priorität
bevorzugt (z.B. IBAN über Telefon).
Args:
text: Der zu analysierende Text.
Returns:
Liste der gefundenen PII-Matches.
"""
all_matches = []
for pii_type, pattern in self._compiled_patterns.items():
for match in pattern.finditer(text):
all_matches.append(PIIMatch(
type=pii_type,
value=match.group(),
start=match.start(),
end=match.end(),
replacement=self.REPLACEMENTS[pii_type],
))
# Überlappende Matches filtern (höhere Priorität gewinnt)
matches = self._filter_overlapping(all_matches)
# Nach Position sortieren (für korrekte Redaktion)
matches.sort(key=lambda m: m.start)
return matches
def _filter_overlapping(self, matches: list[PIIMatch]) -> list[PIIMatch]:
"""
Filtert überlappende Matches, bevorzugt höhere Priorität.
Args:
matches: Alle gefundenen Matches.
Returns:
Gefilterte Liste ohne Überlappungen.
"""
if not matches:
return []
# Nach Priorität sortieren (höchste zuerst)
sorted_matches = sorted(
matches,
key=lambda m: self.PRIORITY.get(m.type, 0),
reverse=True,
)
result = []
used_ranges: list[tuple[int, int]] = []
for match in sorted_matches:
# Prüfen ob dieser Match mit einem bereits akzeptierten überlappt
overlaps = False
for start, end in used_ranges:
# Überlappung wenn: match.start < end AND match.end > start
if match.start < end and match.end > start:
overlaps = True
break
if not overlaps:
result.append(match)
used_ranges.append((match.start, match.end))
return result
def redact(self, text: str) -> RedactionResult:
"""
Erkennt und redaktiert PII in einem Text.
Args:
text: Der zu redaktierende Text.
Returns:
RedactionResult mit originalem und redaktiertem Text.
"""
matches = self.detect(text)
if not matches:
return RedactionResult(
original_text=text,
redacted_text=text,
matches=[],
pii_found=False,
)
# Von hinten nach vorne ersetzen (um Indizes zu erhalten)
redacted = text
for match in reversed(matches):
redacted = redacted[:match.start] + match.replacement + redacted[match.end:]
return RedactionResult(
original_text=text,
redacted_text=redacted,
matches=matches,
pii_found=True,
)
def contains_pii(self, text: str) -> bool:
"""
Prüft schnell, ob Text PII enthält.
Args:
text: Der zu prüfende Text.
Returns:
True wenn PII gefunden wurde.
"""
for pattern in self._compiled_patterns.values():
if pattern.search(text):
return True
return False
# Singleton Instance
_pii_detector: Optional[PIIDetector] = None
def get_pii_detector() -> PIIDetector:
"""Gibt Singleton-Instanz des PII Detectors zurück."""
global _pii_detector
if _pii_detector is None:
_pii_detector = PIIDetector()
return _pii_detector

View File

@@ -0,0 +1,322 @@
"""
Playbook Service - Verwaltung von System Prompts.
Playbooks sind versionierte System-Prompt-Vorlagen für spezifische Schulkontexte.
"""
import logging
from typing import Optional
from dataclasses import dataclass, field
from datetime import datetime
logger = logging.getLogger(__name__)
@dataclass
class Playbook:
"""Ein Playbook mit System Prompt."""
id: str
name: str
description: str
system_prompt: str
prompt_version: str
recommended_models: list[str] = field(default_factory=list)
tool_policy: dict = field(default_factory=dict)
status: str = "published" # draft, review, approved, published
created_at: datetime = field(default_factory=datetime.now)
updated_at: datetime = field(default_factory=datetime.now)
# Initiale Playbooks (später aus DB laden)
DEFAULT_PLAYBOOKS: dict[str, Playbook] = {
"pb_default": Playbook(
id="pb_default",
name="Standard-Assistent",
description="Allgemeiner Assistent für Lehrkräfte",
system_prompt="""Du bist ein hilfreicher Assistent für Lehrkräfte an deutschen Schulen.
Richtlinien:
- Antworte präzise und verständlich
- Berücksichtige den deutschen Schulkontext
- Beachte datenschutzrechtliche Aspekte (DSGVO)
- Verwende geschlechtergerechte Sprache
- Gib bei rechtlichen Fragen den Hinweis, dass du keine Rechtsberatung ersetzen kannst""",
prompt_version="1.0.0",
recommended_models=["breakpilot-teacher-8b", "breakpilot-teacher-70b"],
tool_policy={"allow_web_search": True, "no_pii_in_output": True},
),
"pb_elternbrief": Playbook(
id="pb_elternbrief",
name="Elternbrief",
description="Professionelle Elternkommunikation verfassen",
system_prompt="""Du bist ein erfahrener Schulassistent, der Lehrkräften hilft, professionelle Elternbriefe zu verfassen.
Richtlinien für Elternbriefe:
- Höflicher, respektvoller Ton
- Klare, verständliche Sprache (kein Fachjargon)
- Strukturierte Gliederung mit Datum, Betreff, Anrede
- Wichtige Informationen hervorheben
- Handlungsaufforderungen klar formulieren
- Kontaktmöglichkeiten angeben
- Keine personenbezogenen Daten einzelner Schüler*innen nennen
- DSGVO-konform formulieren
Format:
- Briefkopf mit Schule, Datum
- Betreff-Zeile
- Anrede "Sehr geehrte Eltern und Erziehungsberechtigte,"
- Haupttext in Absätzen
- Grußformel
- Unterschrift mit Name und Funktion""",
prompt_version="1.1.0",
recommended_models=["breakpilot-teacher-8b"],
tool_policy={"allow_web_search": False, "no_pii_in_output": True},
),
"pb_arbeitsblatt": Playbook(
id="pb_arbeitsblatt",
name="Arbeitsblatt erstellen",
description="Arbeitsblätter für verschiedene Klassenstufen und Fächer",
system_prompt="""Du bist ein erfahrener Didaktiker, der Lehrkräften bei der Erstellung von Arbeitsblättern hilft.
Bei der Erstellung von Arbeitsblättern beachte:
- Klassenstufe und Lernstand berücksichtigen
- Klare, verständliche Aufgabenstellungen
- Differenzierungsmöglichkeiten anbieten (leicht/mittel/schwer)
- Platz für Antworten einplanen
- Visualisierungen wo sinnvoll vorschlagen
- Bezug zum Lehrplan herstellen
- Zeitaufwand realistisch einschätzen
Format für Arbeitsblätter:
- Titel und Thema
- Klassenstufe/Fach
- Lernziele (für Lehrkraft)
- Aufgaben mit Nummerierung
- Platzhalter für Antworten [___]
- Optionale Zusatzaufgaben
- Lösungshinweise (optional, für Lehrkraft)""",
prompt_version="1.2.0",
recommended_models=["breakpilot-teacher-8b", "breakpilot-teacher-70b"],
tool_policy={"allow_web_search": True, "no_pii_in_output": True},
),
"pb_foerderplan": Playbook(
id="pb_foerderplan",
name="Förderplan",
description="Individuelle Förderpläne erstellen",
system_prompt="""Du bist ein erfahrener Sonderpädagoge/Förderschullehrer, der bei der Erstellung von Förderplänen unterstützt.
WICHTIG: Förderpläne enthalten sensible Daten. Erstelle nur Vorlagen und Strukturen, keine echten Schülerdaten.
Struktur eines Förderplans:
1. Ausgangslage
- Stärken des Kindes
- Entwicklungsbereiche
- Bisherige Fördermaßnahmen
2. Förderziele (SMART formuliert)
- Spezifisch, Messbar, Attraktiv, Realistisch, Terminiert
- Kurzfristige Ziele (4-6 Wochen)
- Mittelfristige Ziele (Halbjahr)
3. Maßnahmen
- Konkrete Fördermaßnahmen
- Methoden und Materialien
- Verantwortlichkeiten
4. Evaluation
- Beobachtungskriterien
- Dokumentation
- Anpassungszeitpunkte
Rechtliche Hinweise:
- Förderpläne sind vertrauliche Dokumente
- Eltern haben Einsichtsrecht
- Regelmäßige Fortschreibung erforderlich""",
prompt_version="1.0.0",
recommended_models=["breakpilot-teacher-70b"],
tool_policy={"allow_web_search": False, "no_pii_in_output": True},
),
"pb_rechtlich": Playbook(
id="pb_rechtlich",
name="Rechtliche Fragen",
description="Schulrechtliche und datenschutzrechtliche Fragen",
system_prompt="""Du bist ein Experte für Schulrecht und Datenschutz im Bildungsbereich.
WICHTIGER HINWEIS: Du gibst allgemeine Informationen, keine Rechtsberatung. Bei konkreten Rechtsfragen sollte immer ein Fachanwalt oder die Schulbehörde konsultiert werden.
Themengebiete:
- DSGVO im Schulkontext
- Schulgesetze der Bundesländer
- Aufsichtspflicht
- Urheberrecht im Unterricht
- Elternrechte und -pflichten
- Dokumentationspflichten
- Datenschutz bei digitalen Medien
Bei Antworten:
- Auf Bundesland-spezifische Regelungen hinweisen
- Rechtsquellen nennen (z.B. SchulG, DSGVO-Artikel)
- Auf Aktualität der Informationen hinweisen
- Immer empfehlen, aktuelle Regelungen zu prüfen
- Bei Unsicherheit an zuständige Stellen verweisen""",
prompt_version="1.0.0",
recommended_models=["breakpilot-teacher-70b", "claude-3-5-sonnet"],
tool_policy={"allow_web_search": True, "no_pii_in_output": True},
),
"pb_kommunikation": Playbook(
id="pb_kommunikation",
name="Elternkommunikation",
description="Kommunikation mit Eltern in verschiedenen Situationen",
system_prompt="""Du bist ein erfahrener Schulberater, der bei der Kommunikation mit Eltern unterstützt.
Kommunikationssituationen:
- Elterngespräche vorbereiten
- Schwierige Gespräche führen
- Konflikte deeskalieren
- Positive Rückmeldungen formulieren
- Unterstützung einfordern
Kommunikationsgrundsätze:
- Wertschätzender, respektvoller Ton
- Sachlich bleiben, auch bei Emotionen
- Ich-Botschaften verwenden
- Konkrete Beobachtungen statt Bewertungen
- Gemeinsame Lösungen suchen
- Ressourcen und Stärken betonen
- Vertraulichkeit wahren
Struktur für Elterngespräche:
1. Begrüßung und Gesprächsrahmen
2. Positiver Einstieg
3. Beobachtungen mitteilen
4. Perspektive der Eltern hören
5. Gemeinsame Ziele definieren
6. Konkrete Vereinbarungen treffen
7. Positiver Abschluss""",
prompt_version="1.0.0",
recommended_models=["breakpilot-teacher-8b", "breakpilot-teacher-70b"],
tool_policy={"allow_web_search": False, "no_pii_in_output": True},
),
"mail_analysis": Playbook(
id="mail_analysis",
name="E-Mail-Analyse",
description="Analyse eingehender E-Mails für Schulleiter/innen",
system_prompt="""Du bist ein intelligenter Assistent für Schulleitungen in Niedersachsen.
Deine Aufgabe ist die Analyse eingehender E-Mails:
1. ABSENDER-KLASSIFIKATION:
Erkenne den Absender-Typ:
- kultusministerium: Kultusministerium (MK)
- landesschulbehoerde: Landesschulbehörde (NLSchB)
- rlsb: Regionales Landesamt für Schule und Bildung
- schulamt: Schulamt
- nibis: Niedersächsischer Bildungsserver
- schultraeger: Schulträger/Kommune
- elternvertreter: Elternvertreter/Elternrat
- gewerkschaft: GEW, VBE, etc.
- fortbildungsinstitut: NLQ, etc.
- privatperson: Privatperson
- unternehmen: Firma
- unbekannt: Nicht einzuordnen
2. FRISTEN-ERKENNUNG:
Extrahiere alle genannten Fristen und Termine:
- Datum im Format YYYY-MM-DD
- Beschreibung der Frist
- Verbindlichkeit (ja/nein)
3. KATEGORISIERUNG:
Ordne die E-Mail einer Kategorie zu:
- dienstlich: Offizielle Dienstangelegenheiten
- personal: Personalangelegenheiten
- finanzen: Haushalts-/Finanzthemen
- eltern: Elternkommunikation
- schueler: Schülerangelegenheiten
- fortbildung: Fortbildungen
- veranstaltung: Termine/Events
- sicherheit: Sicherheit/Hygiene
- technik: IT/Digitales
- newsletter: Informationen
- sonstiges: Andere
4. PRIORITÄT:
Schlage eine Priorität vor:
- urgent: Sofort bearbeiten
- high: Zeitnah bearbeiten
- medium: Normale Bearbeitung
- low: Kann warten
Antworte präzise im geforderten Format. Keine langen Erklärungen.
Beachte deutsche Datums- und Behördenformate.""",
prompt_version="1.0.0",
recommended_models=["breakpilot-teacher-8b", "llama-3.1-8b-instruct"],
tool_policy={"allow_web_search": False, "no_pii_in_output": True},
),
}
class PlaybookService:
"""Service für Playbook-Verwaltung."""
def __init__(self):
# In-Memory Storage (später DB)
self._playbooks = DEFAULT_PLAYBOOKS.copy()
def list_playbooks(self, status: Optional[str] = "published") -> list[Playbook]:
"""Listet alle Playbooks mit optionalem Status-Filter."""
playbooks = list(self._playbooks.values())
if status:
playbooks = [p for p in playbooks if p.status == status]
return playbooks
def get_playbook(self, playbook_id: str) -> Optional[Playbook]:
"""Holt ein Playbook by ID."""
return self._playbooks.get(playbook_id)
def get_system_prompt(self, playbook_id: str) -> Optional[str]:
"""Holt nur den System Prompt eines Playbooks."""
playbook = self.get_playbook(playbook_id)
return playbook.system_prompt if playbook else None
def create_playbook(self, playbook: Playbook) -> Playbook:
"""Erstellt ein neues Playbook."""
if playbook.id in self._playbooks:
raise ValueError(f"Playbook with id {playbook.id} already exists")
self._playbooks[playbook.id] = playbook
logger.info(f"Created playbook: {playbook.id}")
return playbook
def update_playbook(self, playbook_id: str, **updates) -> Optional[Playbook]:
"""Aktualisiert ein Playbook."""
playbook = self._playbooks.get(playbook_id)
if not playbook:
return None
for key, value in updates.items():
if hasattr(playbook, key):
setattr(playbook, key, value)
playbook.updated_at = datetime.now()
logger.info(f"Updated playbook: {playbook_id}")
return playbook
def delete_playbook(self, playbook_id: str) -> bool:
"""Löscht ein Playbook."""
if playbook_id in self._playbooks:
del self._playbooks[playbook_id]
logger.info(f"Deleted playbook: {playbook_id}")
return True
return False
# Singleton
_playbook_service: Optional[PlaybookService] = None
def get_playbook_service() -> PlaybookService:
"""Gibt den Playbook Service Singleton zurück."""
global _playbook_service
if _playbook_service is None:
_playbook_service = PlaybookService()
return _playbook_service

View File

@@ -0,0 +1,285 @@
"""
Tool Gateway Service.
Bietet sichere Schnittstelle zu externen Tools wie Tavily Web Search.
Alle Anfragen werden vor dem Versand auf PII geprüft und redaktiert.
"""
import os
import httpx
import logging
from dataclasses import dataclass, field
from typing import Optional, Any
from enum import Enum
from .pii_detector import PIIDetector, get_pii_detector, RedactionResult
logger = logging.getLogger(__name__)
class SearchDepth(str, Enum):
"""Suchtiefe für Tavily."""
BASIC = "basic"
ADVANCED = "advanced"
@dataclass
class SearchResult:
"""Ein einzelnes Suchergebnis."""
title: str
url: str
content: str
score: float = 0.0
published_date: Optional[str] = None
@dataclass
class SearchResponse:
"""Antwort einer Suche."""
query: str
redacted_query: Optional[str] = None
results: list[SearchResult] = field(default_factory=list)
answer: Optional[str] = None
pii_detected: bool = False
pii_types: list[str] = field(default_factory=list)
response_time_ms: int = 0
@dataclass
class ToolGatewayConfig:
"""Konfiguration für den Tool Gateway."""
tavily_api_key: Optional[str] = None
tavily_base_url: str = "https://api.tavily.com"
timeout: int = 30
max_results: int = 5
search_depth: SearchDepth = SearchDepth.BASIC
include_answer: bool = True
include_images: bool = False
pii_redaction_enabled: bool = True
@classmethod
def from_env(cls) -> "ToolGatewayConfig":
"""Erstellt Config aus Umgebungsvariablen."""
return cls(
tavily_api_key=os.getenv("TAVILY_API_KEY"),
tavily_base_url=os.getenv("TAVILY_BASE_URL", "https://api.tavily.com"),
timeout=int(os.getenv("TAVILY_TIMEOUT", "30")),
max_results=int(os.getenv("TAVILY_MAX_RESULTS", "5")),
search_depth=SearchDepth(os.getenv("TAVILY_SEARCH_DEPTH", "basic")),
include_answer=os.getenv("TAVILY_INCLUDE_ANSWER", "true").lower() == "true",
include_images=os.getenv("TAVILY_INCLUDE_IMAGES", "false").lower() == "true",
pii_redaction_enabled=os.getenv("PII_REDACTION_ENABLED", "true").lower() == "true",
)
class ToolGatewayError(Exception):
"""Fehler im Tool Gateway."""
pass
class TavilyError(ToolGatewayError):
"""Fehler bei Tavily API."""
pass
class ToolGateway:
"""
Gateway für externe Tools mit PII-Schutz.
Alle Anfragen werden vor dem Versand auf personenbezogene Daten
geprüft und diese redaktiert. Dies gewährleistet DSGVO-Konformität.
"""
def __init__(
self,
config: Optional[ToolGatewayConfig] = None,
pii_detector: Optional[PIIDetector] = None,
):
"""
Initialisiert den Tool Gateway.
Args:
config: Konfiguration. None = aus Umgebungsvariablen.
pii_detector: PII Detector. None = Standard-Detector.
"""
self.config = config or ToolGatewayConfig.from_env()
self.pii_detector = pii_detector or get_pii_detector()
self._client: Optional[httpx.AsyncClient] = None
@property
def tavily_available(self) -> bool:
"""Prüft ob Tavily konfiguriert ist."""
return bool(self.config.tavily_api_key)
async def _get_client(self) -> httpx.AsyncClient:
"""Lazy-init HTTP Client."""
if self._client is None:
self._client = httpx.AsyncClient(
timeout=self.config.timeout,
headers={"Content-Type": "application/json"},
)
return self._client
async def close(self):
"""Schließt HTTP Client."""
if self._client:
await self._client.aclose()
self._client = None
def _redact_query(self, query: str) -> RedactionResult:
"""
Redaktiert PII aus einer Suchanfrage.
Args:
query: Die originale Suchanfrage.
Returns:
RedactionResult mit redaktiertem Text.
"""
if not self.config.pii_redaction_enabled:
return RedactionResult(
original_text=query,
redacted_text=query,
matches=[],
pii_found=False,
)
return self.pii_detector.redact(query)
async def search(
self,
query: str,
search_depth: Optional[SearchDepth] = None,
max_results: Optional[int] = None,
include_domains: Optional[list[str]] = None,
exclude_domains: Optional[list[str]] = None,
) -> SearchResponse:
"""
Führt eine Web-Suche mit Tavily durch.
PII wird automatisch aus der Anfrage entfernt bevor sie
an Tavily gesendet wird.
Args:
query: Die Suchanfrage.
search_depth: Suchtiefe (basic/advanced).
max_results: Maximale Anzahl Ergebnisse.
include_domains: Nur diese Domains durchsuchen.
exclude_domains: Diese Domains ausschließen.
Returns:
SearchResponse mit Ergebnissen.
Raises:
TavilyError: Bei API-Fehlern.
ToolGatewayError: Bei Konfigurationsfehlern.
"""
import time
start_time = time.time()
if not self.tavily_available:
raise ToolGatewayError("Tavily API key not configured")
# PII redaktieren
redaction = self._redact_query(query)
if redaction.pii_found:
logger.warning(
f"PII detected in search query. Types: {[m.type.value for m in redaction.matches]}"
)
# Request an Tavily
client = await self._get_client()
payload: dict[str, Any] = {
"api_key": self.config.tavily_api_key,
"query": redaction.redacted_text,
"search_depth": (search_depth or self.config.search_depth).value,
"max_results": max_results or self.config.max_results,
"include_answer": self.config.include_answer,
"include_images": self.config.include_images,
}
if include_domains:
payload["include_domains"] = include_domains
if exclude_domains:
payload["exclude_domains"] = exclude_domains
try:
response = await client.post(
f"{self.config.tavily_base_url}/search",
json=payload,
)
response.raise_for_status()
data = response.json()
except httpx.HTTPStatusError as e:
logger.error(f"Tavily API error: {e.response.status_code} - {e.response.text}")
raise TavilyError(f"Tavily API error: {e.response.status_code}")
except httpx.RequestError as e:
logger.error(f"Tavily request error: {e}")
raise TavilyError(f"Failed to connect to Tavily: {e}")
# Response parsen
results = [
SearchResult(
title=r.get("title", ""),
url=r.get("url", ""),
content=r.get("content", ""),
score=r.get("score", 0.0),
published_date=r.get("published_date"),
)
for r in data.get("results", [])
]
elapsed_ms = int((time.time() - start_time) * 1000)
return SearchResponse(
query=query,
redacted_query=redaction.redacted_text if redaction.pii_found else None,
results=results,
answer=data.get("answer"),
pii_detected=redaction.pii_found,
pii_types=[m.type.value for m in redaction.matches],
response_time_ms=elapsed_ms,
)
async def health_check(self) -> dict[str, Any]:
"""
Prüft Verfügbarkeit der Tools.
Returns:
Dict mit Status der einzelnen Tools.
"""
status = {
"tavily": {
"configured": self.tavily_available,
"healthy": False,
},
"pii_redaction": {
"enabled": self.config.pii_redaction_enabled,
},
}
# Tavily Health Check (einfache Suche)
if self.tavily_available:
try:
result = await self.search("test", max_results=1)
status["tavily"]["healthy"] = True
status["tavily"]["response_time_ms"] = result.response_time_ms
except Exception as e:
status["tavily"]["error"] = str(e)
return status
# Singleton Instance
_tool_gateway: Optional[ToolGateway] = None
def get_tool_gateway() -> ToolGateway:
"""Gibt Singleton-Instanz des Tool Gateways zurück."""
global _tool_gateway
if _tool_gateway is None:
_tool_gateway = ToolGateway()
return _tool_gateway