[split-required] Split final 43 files (500-668 LOC) to complete refactoring
klausur-service (11 files): - cv_gutter_repair, ocr_pipeline_regression, upload_api - ocr_pipeline_sessions, smart_spell, nru_worksheet_generator - ocr_pipeline_overlays, mail/aggregator, zeugnis_api - cv_syllable_detect, self_rag backend-lehrer (17 files): - classroom_engine/suggestions, generators/quiz_generator - worksheets_api, llm_gateway/comparison, state_engine_api - classroom/models (→ 4 submodules), services/file_processor - alerts_agent/api/wizard+digests+routes, content_generators/pdf - classroom/routes/sessions, llm_gateway/inference - classroom_engine/analytics, auth/keycloak_auth - alerts_agent/processing/rule_engine, ai_processor/print_versions agent-core (5 files): - brain/memory_store, brain/knowledge_graph, brain/context_manager - orchestrator/supervisor, sessions/session_manager admin-lehrer (5 components): - GridOverlay, StepGridReview, DevOpsPipelineSidebar - DataFlowDiagram, sbom/wizard/page website (2 files): - DependencyMap, lehrer/abitur-archiv Other: nibis_ingestion, grid_detection_service, export-doclayout-onnx Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -12,11 +12,13 @@ from agent_core.orchestrator.message_bus import (
|
||||
AgentMessage,
|
||||
MessagePriority,
|
||||
)
|
||||
from agent_core.orchestrator.supervisor import (
|
||||
AgentSupervisor,
|
||||
AgentInfo,
|
||||
from agent_core.orchestrator.supervisor_models import (
|
||||
AgentStatus,
|
||||
RestartPolicy,
|
||||
AgentInfo,
|
||||
AgentFactory,
|
||||
)
|
||||
from agent_core.orchestrator.supervisor import AgentSupervisor
|
||||
from agent_core.orchestrator.task_router import (
|
||||
TaskRouter,
|
||||
RoutingResult,
|
||||
@@ -30,6 +32,8 @@ __all__ = [
|
||||
"AgentSupervisor",
|
||||
"AgentInfo",
|
||||
"AgentStatus",
|
||||
"RestartPolicy",
|
||||
"AgentFactory",
|
||||
"TaskRouter",
|
||||
"RoutingResult",
|
||||
"RoutingStrategy",
|
||||
|
||||
@@ -1,17 +1,11 @@
|
||||
"""
|
||||
Agent Supervisor for Breakpilot
|
||||
|
||||
Provides:
|
||||
- Agent lifecycle management
|
||||
- Health monitoring
|
||||
- Restart policies
|
||||
- Load balancing
|
||||
Agent lifecycle management, health monitoring, restart policies, load balancing.
|
||||
"""
|
||||
|
||||
from typing import Dict, Optional, Callable, Awaitable, List, Any
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, Optional, List, Any
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from enum import Enum
|
||||
import asyncio
|
||||
import logging
|
||||
|
||||
@@ -21,91 +15,24 @@ from agent_core.orchestrator.message_bus import (
|
||||
AgentMessage,
|
||||
MessagePriority,
|
||||
)
|
||||
from agent_core.orchestrator.supervisor_models import (
|
||||
AgentStatus,
|
||||
RestartPolicy,
|
||||
AgentInfo,
|
||||
AgentFactory,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AgentStatus(Enum):
|
||||
"""Agent lifecycle states"""
|
||||
INITIALIZING = "initializing"
|
||||
STARTING = "starting"
|
||||
RUNNING = "running"
|
||||
PAUSED = "paused"
|
||||
STOPPING = "stopping"
|
||||
STOPPED = "stopped"
|
||||
ERROR = "error"
|
||||
RESTARTING = "restarting"
|
||||
|
||||
|
||||
class RestartPolicy(Enum):
|
||||
"""Agent restart policies"""
|
||||
NEVER = "never"
|
||||
ON_FAILURE = "on_failure"
|
||||
ALWAYS = "always"
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentInfo:
|
||||
"""Information about a registered agent"""
|
||||
agent_id: str
|
||||
agent_type: str
|
||||
status: AgentStatus = AgentStatus.INITIALIZING
|
||||
current_task: Optional[str] = None
|
||||
started_at: Optional[datetime] = None
|
||||
last_activity: Optional[datetime] = None
|
||||
error_count: int = 0
|
||||
restart_count: int = 0
|
||||
max_restarts: int = 3
|
||||
restart_policy: RestartPolicy = RestartPolicy.ON_FAILURE
|
||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
capacity: int = 10 # Max concurrent tasks
|
||||
current_load: int = 0
|
||||
|
||||
def is_healthy(self) -> bool:
|
||||
"""Check if agent is healthy"""
|
||||
return self.status == AgentStatus.RUNNING and self.error_count < 3
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Check if agent can accept new tasks"""
|
||||
return (
|
||||
self.status == AgentStatus.RUNNING and
|
||||
self.current_load < self.capacity
|
||||
)
|
||||
|
||||
def utilization(self) -> float:
|
||||
"""Returns agent utilization (0-1)"""
|
||||
return self.current_load / max(self.capacity, 1)
|
||||
|
||||
|
||||
AgentFactory = Callable[[str], Awaitable[Any]]
|
||||
|
||||
|
||||
class AgentSupervisor:
|
||||
"""
|
||||
Supervises and coordinates all agents.
|
||||
|
||||
Responsibilities:
|
||||
- Agent registration and lifecycle
|
||||
- Health monitoring via heartbeat
|
||||
- Restart policies
|
||||
- Load balancing
|
||||
- Alert escalation
|
||||
"""
|
||||
"""Supervises agents: lifecycle, health monitoring, restart policies, load balancing."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message_bus: MessageBus,
|
||||
self, message_bus: MessageBus,
|
||||
heartbeat_monitor: Optional[HeartbeatMonitor] = None,
|
||||
check_interval_seconds: int = 10
|
||||
):
|
||||
"""
|
||||
Initialize the supervisor.
|
||||
|
||||
Args:
|
||||
message_bus: Message bus for inter-agent communication
|
||||
heartbeat_monitor: Heartbeat monitor for liveness checks
|
||||
check_interval_seconds: How often to run health checks
|
||||
"""
|
||||
self.bus = message_bus
|
||||
self.heartbeat = heartbeat_monitor or HeartbeatMonitor()
|
||||
self.check_interval = check_interval_seconds
|
||||
|
||||
65
agent-core/orchestrator/supervisor_models.py
Normal file
65
agent-core/orchestrator/supervisor_models.py
Normal file
@@ -0,0 +1,65 @@
|
||||
"""
|
||||
Supervisor Models for Breakpilot Agents
|
||||
|
||||
Data classes and enumerations for agent lifecycle management.
|
||||
"""
|
||||
|
||||
from typing import Dict, Optional, Any, Callable, Awaitable
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class AgentStatus(Enum):
|
||||
"""Agent lifecycle states"""
|
||||
INITIALIZING = "initializing"
|
||||
STARTING = "starting"
|
||||
RUNNING = "running"
|
||||
PAUSED = "paused"
|
||||
STOPPING = "stopping"
|
||||
STOPPED = "stopped"
|
||||
ERROR = "error"
|
||||
RESTARTING = "restarting"
|
||||
|
||||
|
||||
class RestartPolicy(Enum):
|
||||
"""Agent restart policies"""
|
||||
NEVER = "never"
|
||||
ON_FAILURE = "on_failure"
|
||||
ALWAYS = "always"
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentInfo:
|
||||
"""Information about a registered agent"""
|
||||
agent_id: str
|
||||
agent_type: str
|
||||
status: AgentStatus = AgentStatus.INITIALIZING
|
||||
current_task: Optional[str] = None
|
||||
started_at: Optional[datetime] = None
|
||||
last_activity: Optional[datetime] = None
|
||||
error_count: int = 0
|
||||
restart_count: int = 0
|
||||
max_restarts: int = 3
|
||||
restart_policy: RestartPolicy = RestartPolicy.ON_FAILURE
|
||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
capacity: int = 10 # Max concurrent tasks
|
||||
current_load: int = 0
|
||||
|
||||
def is_healthy(self) -> bool:
|
||||
"""Check if agent is healthy"""
|
||||
return self.status == AgentStatus.RUNNING and self.error_count < 3
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Check if agent can accept new tasks"""
|
||||
return (
|
||||
self.status == AgentStatus.RUNNING and
|
||||
self.current_load < self.capacity
|
||||
)
|
||||
|
||||
def utilization(self) -> float:
|
||||
"""Returns agent utilization (0-1)"""
|
||||
return self.current_load / max(self.capacity, 1)
|
||||
|
||||
|
||||
AgentFactory = Callable[[str], Awaitable[Any]]
|
||||
Reference in New Issue
Block a user