""" Vast.ai REST API Client. Verwendet die offizielle vast.ai API statt CLI fuer mehr Stabilitaet. API Dokumentation: https://docs.vast.ai/api """ import asyncio import logging from dataclasses import dataclass, field from datetime import datetime, timezone from enum import Enum from typing import Optional, Dict, Any, List import httpx logger = logging.getLogger(__name__) class InstanceStatus(Enum): """Vast.ai Instance Status.""" RUNNING = "running" STOPPED = "stopped" EXITED = "exited" LOADING = "loading" SCHEDULING = "scheduling" CREATING = "creating" UNKNOWN = "unknown" @dataclass class AccountInfo: """Informationen ueber den vast.ai Account.""" credit: float # Aktuelles Guthaben in USD balance: float # Balance (meist 0) total_spend: float # Gesamtausgaben username: str email: str has_billing: bool @classmethod def from_api_response(cls, data: Dict[str, Any]) -> "AccountInfo": """Erstellt AccountInfo aus API Response.""" return cls( credit=data.get("credit", 0.0), balance=data.get("balance", 0.0), total_spend=abs(data.get("total_spend", 0.0)), # API gibt negativ zurück username=data.get("username", ""), email=data.get("email", ""), has_billing=data.get("has_billing", False), ) def to_dict(self) -> Dict[str, Any]: """Serialisiert zu Dictionary.""" return { "credit": self.credit, "balance": self.balance, "total_spend": self.total_spend, "username": self.username, "email": self.email, "has_billing": self.has_billing, } @dataclass class InstanceInfo: """Informationen ueber eine vast.ai Instanz.""" id: int status: InstanceStatus machine_id: Optional[int] = None gpu_name: Optional[str] = None num_gpus: int = 1 gpu_ram: Optional[float] = None # GB cpu_ram: Optional[float] = None # GB disk_space: Optional[float] = None # GB dph_total: Optional[float] = None # $/hour public_ipaddr: Optional[str] = None ports: Dict[str, Any] = field(default_factory=dict) label: Optional[str] = None image_uuid: Optional[str] = None started_at: Optional[datetime] = None @classmethod def from_api_response(cls, data: Dict[str, Any]) -> "InstanceInfo": """Erstellt InstanceInfo aus API Response.""" status_map = { "running": InstanceStatus.RUNNING, "exited": InstanceStatus.EXITED, "loading": InstanceStatus.LOADING, "scheduling": InstanceStatus.SCHEDULING, "creating": InstanceStatus.CREATING, } actual_status = data.get("actual_status", "unknown") status = status_map.get(actual_status, InstanceStatus.UNKNOWN) # Parse ports mapping ports = {} if "ports" in data and data["ports"]: ports = data["ports"] # Parse started_at started_at = None if "start_date" in data and data["start_date"]: try: started_at = datetime.fromtimestamp(data["start_date"], tz=timezone.utc) except (ValueError, TypeError): pass return cls( id=data.get("id", 0), status=status, machine_id=data.get("machine_id"), gpu_name=data.get("gpu_name"), num_gpus=data.get("num_gpus", 1), gpu_ram=data.get("gpu_ram"), cpu_ram=data.get("cpu_ram"), disk_space=data.get("disk_space"), dph_total=data.get("dph_total"), public_ipaddr=data.get("public_ipaddr"), ports=ports, label=data.get("label"), image_uuid=data.get("image_uuid"), started_at=started_at, ) def get_endpoint_url(self, internal_port: int = 8001) -> Optional[str]: """Berechnet die externe URL fuer einen internen Port.""" if not self.public_ipaddr: return None # vast.ai mapped interne Ports auf externe Ports # Format: {"8001/tcp": [{"HostIp": "0.0.0.0", "HostPort": "12345"}]} port_key = f"{internal_port}/tcp" if port_key in self.ports: port_info = self.ports[port_key] if isinstance(port_info, list) and port_info: host_port = port_info[0].get("HostPort") if host_port: return f"http://{self.public_ipaddr}:{host_port}" # Fallback: Direkter Port return f"http://{self.public_ipaddr}:{internal_port}" def to_dict(self) -> Dict[str, Any]: """Serialisiert zu Dictionary.""" return { "id": self.id, "status": self.status.value, "machine_id": self.machine_id, "gpu_name": self.gpu_name, "num_gpus": self.num_gpus, "gpu_ram": self.gpu_ram, "cpu_ram": self.cpu_ram, "disk_space": self.disk_space, "dph_total": self.dph_total, "public_ipaddr": self.public_ipaddr, "ports": self.ports, "label": self.label, "started_at": self.started_at.isoformat() if self.started_at else None, } class VastAIClient: """ Async Client fuer vast.ai REST API. Verwendet die offizielle API unter https://console.vast.ai/api/v0/ """ BASE_URL = "https://console.vast.ai/api/v0" def __init__(self, api_key: str, timeout: float = 30.0): self.api_key = api_key self.timeout = timeout self._client: Optional[httpx.AsyncClient] = None async def _get_client(self) -> httpx.AsyncClient: """Lazy Client-Erstellung.""" if self._client is None or self._client.is_closed: self._client = httpx.AsyncClient( timeout=self.timeout, headers={ "Accept": "application/json", }, ) return self._client async def close(self) -> None: """Schliesst den HTTP Client.""" if self._client and not self._client.is_closed: await self._client.aclose() self._client = None def _build_url(self, endpoint: str) -> str: """Baut vollstaendige URL mit API Key.""" sep = "&" if "?" in endpoint else "?" return f"{self.BASE_URL}{endpoint}{sep}api_key={self.api_key}" async def list_instances(self) -> List[InstanceInfo]: """Listet alle Instanzen auf.""" client = await self._get_client() url = self._build_url("/instances/") try: response = await client.get(url) response.raise_for_status() data = response.json() instances = [] if "instances" in data: for inst_data in data["instances"]: instances.append(InstanceInfo.from_api_response(inst_data)) return instances except httpx.HTTPStatusError as e: logger.error(f"vast.ai API error listing instances: {e}") raise async def get_instance(self, instance_id: int) -> Optional[InstanceInfo]: """Holt Details einer spezifischen Instanz.""" client = await self._get_client() url = self._build_url(f"/instances/{instance_id}/") try: response = await client.get(url) response.raise_for_status() data = response.json() if "instances" in data: instances = data["instances"] # API gibt bei einzelner Instanz ein dict zurück, bei Liste eine Liste if isinstance(instances, list) and instances: return InstanceInfo.from_api_response(instances[0]) elif isinstance(instances, dict): # Füge ID hinzu falls nicht vorhanden if "id" not in instances: instances["id"] = instance_id return InstanceInfo.from_api_response(instances) elif isinstance(data, dict) and "id" in data: return InstanceInfo.from_api_response(data) return None except httpx.HTTPStatusError as e: if e.response.status_code == 404: return None logger.error(f"vast.ai API error getting instance {instance_id}: {e}") raise async def start_instance(self, instance_id: int) -> bool: """Startet eine gestoppte Instanz.""" client = await self._get_client() url = self._build_url(f"/instances/{instance_id}/") try: response = await client.put( url, json={"state": "running"}, ) response.raise_for_status() logger.info(f"vast.ai instance {instance_id} start requested") return True except httpx.HTTPStatusError as e: logger.error(f"vast.ai API error starting instance {instance_id}: {e}") return False async def stop_instance(self, instance_id: int) -> bool: """Stoppt eine laufende Instanz (haelt Disk).""" client = await self._get_client() url = self._build_url(f"/instances/{instance_id}/") try: response = await client.put( url, json={"state": "stopped"}, ) response.raise_for_status() logger.info(f"vast.ai instance {instance_id} stop requested") return True except httpx.HTTPStatusError as e: logger.error(f"vast.ai API error stopping instance {instance_id}: {e}") return False async def destroy_instance(self, instance_id: int) -> bool: """Loescht eine Instanz komplett (Disk weg!).""" client = await self._get_client() url = self._build_url(f"/instances/{instance_id}/") try: response = await client.delete(url) response.raise_for_status() logger.info(f"vast.ai instance {instance_id} destroyed") return True except httpx.HTTPStatusError as e: logger.error(f"vast.ai API error destroying instance {instance_id}: {e}") return False async def set_label(self, instance_id: int, label: str) -> bool: """Setzt ein Label fuer eine Instanz.""" client = await self._get_client() url = self._build_url(f"/instances/{instance_id}/") try: response = await client.put( url, json={"label": label}, ) response.raise_for_status() return True except httpx.HTTPStatusError as e: logger.error(f"vast.ai API error setting label on instance {instance_id}: {e}") return False async def wait_for_status( self, instance_id: int, target_status: InstanceStatus, timeout_seconds: int = 300, poll_interval: float = 5.0, ) -> Optional[InstanceInfo]: """ Wartet bis eine Instanz einen bestimmten Status erreicht. Returns: InstanceInfo wenn Status erreicht, None bei Timeout. """ deadline = asyncio.get_event_loop().time() + timeout_seconds while asyncio.get_event_loop().time() < deadline: instance = await self.get_instance(instance_id) if instance and instance.status == target_status: return instance if instance: logger.debug( f"vast.ai instance {instance_id} status: {instance.status.value}, " f"waiting for {target_status.value}" ) await asyncio.sleep(poll_interval) logger.warning( f"Timeout waiting for instance {instance_id} to reach {target_status.value}" ) return None async def wait_for_health( self, instance: InstanceInfo, health_path: str = "/health", internal_port: int = 8001, timeout_seconds: int = 600, poll_interval: float = 5.0, ) -> bool: """ Wartet bis der Health-Endpoint erreichbar ist. Returns: True wenn Health OK, False bei Timeout. """ endpoint = instance.get_endpoint_url(internal_port) if not endpoint: logger.error("No endpoint URL available for health check") return False health_url = f"{endpoint.rstrip('/')}{health_path}" logger.info(f"Waiting for health at {health_url}") deadline = asyncio.get_event_loop().time() + timeout_seconds health_client = httpx.AsyncClient(timeout=5.0) try: while asyncio.get_event_loop().time() < deadline: try: response = await health_client.get(health_url) if 200 <= response.status_code < 300: logger.info(f"Health check passed: {health_url}") return True except Exception as e: logger.debug(f"Health check failed: {e}") await asyncio.sleep(poll_interval) logger.warning(f"Health check timeout: {health_url}") return False finally: await health_client.aclose() async def get_account_info(self) -> Optional[AccountInfo]: """ Holt Account-Informationen inkl. Credit/Budget. Returns: AccountInfo oder None bei Fehler. """ client = await self._get_client() url = self._build_url("/users/current/") try: response = await client.get(url) response.raise_for_status() data = response.json() return AccountInfo.from_api_response(data) except httpx.HTTPStatusError as e: logger.error(f"vast.ai API error getting account info: {e}") return None except Exception as e: logger.error(f"Error getting vast.ai account info: {e}") return None