fix: Restore all files lost during destructive rebase

A previous `git pull --rebase origin main` dropped 177 local commits, losing 3400+ files across admin-v2, backend, studio-v2, website, klausur-service, and many other services. The partial restore attempt (660295e2) only recovered some files. This commit restores all missing files from pre-rebase ref 98933f5e while preserving post-rebase additions (night-scheduler, night-mode UI, NightModeWidget dashboard integration). Restored features include: - AI Module Sidebar (FAB), OCR Labeling, OCR Compare - GPU Dashboard, RAG Pipeline, Magic Help - Klausur-Korrektur (8 files), Abitur-Archiv (5+ files) - Companion, Zeugnisse-Crawler, Screen Flow - Full backend, studio-v2, website, klausur-service - All compliance SDKs, agent-core, voice-service - CI/CD configs, documentation, scripts Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 09:51:32 +01:00
parent f7487ee240
commit bfdaf63ba9
2009 changed files with 749983 additions and 1731 deletions
@@ -0,0 +1,10 @@
+"""
+Infrastructure management module.
+
+Provides control plane for external GPU resources (vast.ai).
+"""
+
+from .vast_client import VastAIClient
+from .vast_power import router as vast_router
+
+__all__ = ["VastAIClient", "vast_router"]
@@ -0,0 +1,419 @@
+"""
+Vast.ai REST API Client.
+
+Verwendet die offizielle vast.ai API statt CLI fuer mehr Stabilitaet.
+API Dokumentation: https://docs.vast.ai/api
+"""
+
+import asyncio
+import logging
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from enum import Enum
+from typing import Optional, Dict, Any, List
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+
+class InstanceStatus(Enum):
+    """Vast.ai Instance Status."""
+    RUNNING = "running"
+    STOPPED = "stopped"
+    EXITED = "exited"
+    LOADING = "loading"
+    SCHEDULING = "scheduling"
+    CREATING = "creating"
+    UNKNOWN = "unknown"
+
+
+@dataclass
+class AccountInfo:
+    """Informationen ueber den vast.ai Account."""
+    credit: float  # Aktuelles Guthaben in USD
+    balance: float  # Balance (meist 0)
+    total_spend: float  # Gesamtausgaben
+    username: str
+    email: str
+    has_billing: bool
+
+    @classmethod
+    def from_api_response(cls, data: Dict[str, Any]) -> "AccountInfo":
+        """Erstellt AccountInfo aus API Response."""
+        return cls(
+            credit=data.get("credit", 0.0),
+            balance=data.get("balance", 0.0),
+            total_spend=abs(data.get("total_spend", 0.0)),  # API gibt negativ zurück
+            username=data.get("username", ""),
+            email=data.get("email", ""),
+            has_billing=data.get("has_billing", False),
+        )
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Serialisiert zu Dictionary."""
+        return {
+            "credit": self.credit,
+            "balance": self.balance,
+            "total_spend": self.total_spend,
+            "username": self.username,
+            "email": self.email,
+            "has_billing": self.has_billing,
+        }
+
+
+@dataclass
+class InstanceInfo:
+    """Informationen ueber eine vast.ai Instanz."""
+    id: int
+    status: InstanceStatus
+    machine_id: Optional[int] = None
+    gpu_name: Optional[str] = None
+    num_gpus: int = 1
+    gpu_ram: Optional[float] = None  # GB
+    cpu_ram: Optional[float] = None  # GB
+    disk_space: Optional[float] = None  # GB
+    dph_total: Optional[float] = None  # $/hour
+    public_ipaddr: Optional[str] = None
+    ports: Dict[str, Any] = field(default_factory=dict)
+    label: Optional[str] = None
+    image_uuid: Optional[str] = None
+    started_at: Optional[datetime] = None
+
+    @classmethod
+    def from_api_response(cls, data: Dict[str, Any]) -> "InstanceInfo":
+        """Erstellt InstanceInfo aus API Response."""
+        status_map = {
+            "running": InstanceStatus.RUNNING,
+            "exited": InstanceStatus.EXITED,
+            "loading": InstanceStatus.LOADING,
+            "scheduling": InstanceStatus.SCHEDULING,
+            "creating": InstanceStatus.CREATING,
+        }
+
+        actual_status = data.get("actual_status", "unknown")
+        status = status_map.get(actual_status, InstanceStatus.UNKNOWN)
+
+        # Parse ports mapping
+        ports = {}
+        if "ports" in data and data["ports"]:
+            ports = data["ports"]
+
+        # Parse started_at
+        started_at = None
+        if "start_date" in data and data["start_date"]:
+            try:
+                started_at = datetime.fromtimestamp(data["start_date"], tz=timezone.utc)
+            except (ValueError, TypeError):
+                pass
+
+        return cls(
+            id=data.get("id", 0),
+            status=status,
+            machine_id=data.get("machine_id"),
+            gpu_name=data.get("gpu_name"),
+            num_gpus=data.get("num_gpus", 1),
+            gpu_ram=data.get("gpu_ram"),
+            cpu_ram=data.get("cpu_ram"),
+            disk_space=data.get("disk_space"),
+            dph_total=data.get("dph_total"),
+            public_ipaddr=data.get("public_ipaddr"),
+            ports=ports,
+            label=data.get("label"),
+            image_uuid=data.get("image_uuid"),
+            started_at=started_at,
+        )
+
+    def get_endpoint_url(self, internal_port: int = 8001) -> Optional[str]:
+        """Berechnet die externe URL fuer einen internen Port."""
+        if not self.public_ipaddr:
+            return None
+
+        # vast.ai mapped interne Ports auf externe Ports
+        # Format: {"8001/tcp": [{"HostIp": "0.0.0.0", "HostPort": "12345"}]}
+        port_key = f"{internal_port}/tcp"
+        if port_key in self.ports:
+            port_info = self.ports[port_key]
+            if isinstance(port_info, list) and port_info:
+                host_port = port_info[0].get("HostPort")
+                if host_port:
+                    return f"http://{self.public_ipaddr}:{host_port}"
+
+        # Fallback: Direkter Port
+        return f"http://{self.public_ipaddr}:{internal_port}"
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Serialisiert zu Dictionary."""
+        return {
+            "id": self.id,
+            "status": self.status.value,
+            "machine_id": self.machine_id,
+            "gpu_name": self.gpu_name,
+            "num_gpus": self.num_gpus,
+            "gpu_ram": self.gpu_ram,
+            "cpu_ram": self.cpu_ram,
+            "disk_space": self.disk_space,
+            "dph_total": self.dph_total,
+            "public_ipaddr": self.public_ipaddr,
+            "ports": self.ports,
+            "label": self.label,
+            "started_at": self.started_at.isoformat() if self.started_at else None,
+        }
+
+
+class VastAIClient:
+    """
+    Async Client fuer vast.ai REST API.
+
+    Verwendet die offizielle API unter https://console.vast.ai/api/v0/
+    """
+
+    BASE_URL = "https://console.vast.ai/api/v0"
+
+    def __init__(self, api_key: str, timeout: float = 30.0):
+        self.api_key = api_key
+        self.timeout = timeout
+        self._client: Optional[httpx.AsyncClient] = None
+
+    async def _get_client(self) -> httpx.AsyncClient:
+        """Lazy Client-Erstellung."""
+        if self._client is None or self._client.is_closed:
+            self._client = httpx.AsyncClient(
+                timeout=self.timeout,
+                headers={
+                    "Accept": "application/json",
+                },
+            )
+        return self._client
+
+    async def close(self) -> None:
+        """Schliesst den HTTP Client."""
+        if self._client and not self._client.is_closed:
+            await self._client.aclose()
+            self._client = None
+
+    def _build_url(self, endpoint: str) -> str:
+        """Baut vollstaendige URL mit API Key."""
+        sep = "&" if "?" in endpoint else "?"
+        return f"{self.BASE_URL}{endpoint}{sep}api_key={self.api_key}"
+
+    async def list_instances(self) -> List[InstanceInfo]:
+        """Listet alle Instanzen auf."""
+        client = await self._get_client()
+        url = self._build_url("/instances/")
+
+        try:
+            response = await client.get(url)
+            response.raise_for_status()
+            data = response.json()
+
+            instances = []
+            if "instances" in data:
+                for inst_data in data["instances"]:
+                    instances.append(InstanceInfo.from_api_response(inst_data))
+
+            return instances
+
+        except httpx.HTTPStatusError as e:
+            logger.error(f"vast.ai API error listing instances: {e}")
+            raise
+
+    async def get_instance(self, instance_id: int) -> Optional[InstanceInfo]:
+        """Holt Details einer spezifischen Instanz."""
+        client = await self._get_client()
+        url = self._build_url(f"/instances/{instance_id}/")
+
+        try:
+            response = await client.get(url)
+            response.raise_for_status()
+            data = response.json()
+
+            if "instances" in data:
+                instances = data["instances"]
+                # API gibt bei einzelner Instanz ein dict zurück, bei Liste eine Liste
+                if isinstance(instances, list) and instances:
+                    return InstanceInfo.from_api_response(instances[0])
+                elif isinstance(instances, dict):
+                    # Füge ID hinzu falls nicht vorhanden
+                    if "id" not in instances:
+                        instances["id"] = instance_id
+                    return InstanceInfo.from_api_response(instances)
+            elif isinstance(data, dict) and "id" in data:
+                return InstanceInfo.from_api_response(data)
+
+            return None
+
+        except httpx.HTTPStatusError as e:
+            if e.response.status_code == 404:
+                return None
+            logger.error(f"vast.ai API error getting instance {instance_id}: {e}")
+            raise
+
+    async def start_instance(self, instance_id: int) -> bool:
+        """Startet eine gestoppte Instanz."""
+        client = await self._get_client()
+        url = self._build_url(f"/instances/{instance_id}/")
+
+        try:
+            response = await client.put(
+                url,
+                json={"state": "running"},
+            )
+            response.raise_for_status()
+            logger.info(f"vast.ai instance {instance_id} start requested")
+            return True
+
+        except httpx.HTTPStatusError as e:
+            logger.error(f"vast.ai API error starting instance {instance_id}: {e}")
+            return False
+
+    async def stop_instance(self, instance_id: int) -> bool:
+        """Stoppt eine laufende Instanz (haelt Disk)."""
+        client = await self._get_client()
+        url = self._build_url(f"/instances/{instance_id}/")
+
+        try:
+            response = await client.put(
+                url,
+                json={"state": "stopped"},
+            )
+            response.raise_for_status()
+            logger.info(f"vast.ai instance {instance_id} stop requested")
+            return True
+
+        except httpx.HTTPStatusError as e:
+            logger.error(f"vast.ai API error stopping instance {instance_id}: {e}")
+            return False
+
+    async def destroy_instance(self, instance_id: int) -> bool:
+        """Loescht eine Instanz komplett (Disk weg!)."""
+        client = await self._get_client()
+        url = self._build_url(f"/instances/{instance_id}/")
+
+        try:
+            response = await client.delete(url)
+            response.raise_for_status()
+            logger.info(f"vast.ai instance {instance_id} destroyed")
+            return True
+
+        except httpx.HTTPStatusError as e:
+            logger.error(f"vast.ai API error destroying instance {instance_id}: {e}")
+            return False
+
+    async def set_label(self, instance_id: int, label: str) -> bool:
+        """Setzt ein Label fuer eine Instanz."""
+        client = await self._get_client()
+        url = self._build_url(f"/instances/{instance_id}/")
+
+        try:
+            response = await client.put(
+                url,
+                json={"label": label},
+            )
+            response.raise_for_status()
+            return True
+
+        except httpx.HTTPStatusError as e:
+            logger.error(f"vast.ai API error setting label on instance {instance_id}: {e}")
+            return False
+
+    async def wait_for_status(
+        self,
+        instance_id: int,
+        target_status: InstanceStatus,
+        timeout_seconds: int = 300,
+        poll_interval: float = 5.0,
+    ) -> Optional[InstanceInfo]:
+        """
+        Wartet bis eine Instanz einen bestimmten Status erreicht.
+
+        Returns:
+            InstanceInfo wenn Status erreicht, None bei Timeout.
+        """
+        deadline = asyncio.get_event_loop().time() + timeout_seconds
+
+        while asyncio.get_event_loop().time() < deadline:
+            instance = await self.get_instance(instance_id)
+
+            if instance and instance.status == target_status:
+                return instance
+
+            if instance:
+                logger.debug(
+                    f"vast.ai instance {instance_id} status: {instance.status.value}, "
+                    f"waiting for {target_status.value}"
+                )
+
+            await asyncio.sleep(poll_interval)
+
+        logger.warning(
+            f"Timeout waiting for instance {instance_id} to reach {target_status.value}"
+        )
+        return None
+
+    async def wait_for_health(
+        self,
+        instance: InstanceInfo,
+        health_path: str = "/health",
+        internal_port: int = 8001,
+        timeout_seconds: int = 600,
+        poll_interval: float = 5.0,
+    ) -> bool:
+        """
+        Wartet bis der Health-Endpoint erreichbar ist.
+
+        Returns:
+            True wenn Health OK, False bei Timeout.
+        """
+        endpoint = instance.get_endpoint_url(internal_port)
+        if not endpoint:
+            logger.error("No endpoint URL available for health check")
+            return False
+
+        health_url = f"{endpoint.rstrip('/')}{health_path}"
+        logger.info(f"Waiting for health at {health_url}")
+
+        deadline = asyncio.get_event_loop().time() + timeout_seconds
+        health_client = httpx.AsyncClient(timeout=5.0)
+
+        try:
+            while asyncio.get_event_loop().time() < deadline:
+                try:
+                    response = await health_client.get(health_url)
+                    if 200 <= response.status_code < 300:
+                        logger.info(f"Health check passed: {health_url}")
+                        return True
+                except Exception as e:
+                    logger.debug(f"Health check failed: {e}")
+
+                await asyncio.sleep(poll_interval)
+
+            logger.warning(f"Health check timeout: {health_url}")
+            return False
+
+        finally:
+            await health_client.aclose()
+
+    async def get_account_info(self) -> Optional[AccountInfo]:
+        """
+        Holt Account-Informationen inkl. Credit/Budget.
+
+        Returns:
+            AccountInfo oder None bei Fehler.
+        """
+        client = await self._get_client()
+        url = self._build_url("/users/current/")
+
+        try:
+            response = await client.get(url)
+            response.raise_for_status()
+            data = response.json()
+
+            return AccountInfo.from_api_response(data)
+
+        except httpx.HTTPStatusError as e:
+            logger.error(f"vast.ai API error getting account info: {e}")
+            return None
+        except Exception as e:
+            logger.error(f"Error getting vast.ai account info: {e}")
+            return None
@@ -0,0 +1,618 @@
+"""
+Vast.ai Power Control API.
+
+Stellt Endpoints bereit fuer:
+- Start/Stop von vast.ai Instanzen
+- Status-Abfrage
+- Auto-Shutdown bei Inaktivitaet
+- Kosten-Tracking
+
+Sicherheit: Alle Endpoints erfordern CONTROL_API_KEY.
+"""
+
+import asyncio
+import json
+import logging
+import os
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Optional, Dict, Any, List
+
+from fastapi import APIRouter, Depends, HTTPException, Header, BackgroundTasks
+from pydantic import BaseModel, Field
+
+from .vast_client import VastAIClient, InstanceInfo, InstanceStatus, AccountInfo
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/infra/vast", tags=["Infrastructure"])
+
+
+# -------------------------
+# Configuration (ENV)
+# -------------------------
+VAST_API_KEY = os.getenv("VAST_API_KEY")
+VAST_INSTANCE_ID = os.getenv("VAST_INSTANCE_ID")  # Numeric instance ID
+CONTROL_API_KEY = os.getenv("CONTROL_API_KEY")  # Admin key for these endpoints
+
+# Health check configuration
+VAST_HEALTH_PORT = int(os.getenv("VAST_HEALTH_PORT", "8001"))
+VAST_HEALTH_PATH = os.getenv("VAST_HEALTH_PATH", "/health")
+VAST_WAIT_TIMEOUT_S = int(os.getenv("VAST_WAIT_TIMEOUT_S", "600"))  # 10 min
+
+# Auto-shutdown configuration
+AUTO_SHUTDOWN_ENABLED = os.getenv("VAST_AUTO_SHUTDOWN", "true").lower() == "true"
+AUTO_SHUTDOWN_MINUTES = int(os.getenv("VAST_AUTO_SHUTDOWN_MINUTES", "30"))
+
+# State persistence (in /tmp for container compatibility)
+STATE_PATH = Path(os.getenv("VAST_STATE_PATH", "/tmp/vast_state.json"))
+AUDIT_PATH = Path(os.getenv("VAST_AUDIT_PATH", "/tmp/vast_audit.log"))
+
+
+# -------------------------
+# State Management
+# -------------------------
+class VastState:
+    """
+    Persistenter State fuer vast.ai Kontrolle.
+
+    Speichert:
+    - Aktueller Endpunkt (weil IP sich aendern kann)
+    - Letzte Aktivitaet (fuer Auto-Shutdown)
+    - Kosten-Tracking
+    """
+
+    def __init__(self, path: Path = STATE_PATH):
+        self.path = path
+        self._state: Dict[str, Any] = self._load()
+
+    def _load(self) -> Dict[str, Any]:
+        """Laedt State von Disk."""
+        if not self.path.exists():
+            return {
+                "desired_state": None,
+                "endpoint_base_url": None,
+                "last_activity": None,
+                "last_start": None,
+                "last_stop": None,
+                "total_runtime_seconds": 0,
+                "total_cost_usd": 0.0,
+            }
+        try:
+            return json.loads(self.path.read_text(encoding="utf-8"))
+        except Exception:
+            return {}
+
+    def _save(self) -> None:
+        """Speichert State auf Disk."""
+        self.path.parent.mkdir(parents=True, exist_ok=True)
+        self.path.write_text(
+            json.dumps(self._state, ensure_ascii=False, indent=2),
+            encoding="utf-8",
+        )
+
+    def get(self, key: str, default: Any = None) -> Any:
+        return self._state.get(key, default)
+
+    def set(self, key: str, value: Any) -> None:
+        self._state[key] = value
+        self._save()
+
+    def update(self, data: Dict[str, Any]) -> None:
+        self._state.update(data)
+        self._save()
+
+    def record_activity(self) -> None:
+        """Zeichnet letzte Aktivitaet auf (fuer Auto-Shutdown)."""
+        self._state["last_activity"] = datetime.now(timezone.utc).isoformat()
+        self._save()
+
+    def get_last_activity(self) -> Optional[datetime]:
+        """Gibt letzte Aktivitaet als datetime."""
+        ts = self._state.get("last_activity")
+        if ts:
+            return datetime.fromisoformat(ts)
+        return None
+
+    def record_start(self) -> None:
+        """Zeichnet Start-Zeit auf."""
+        self._state["last_start"] = datetime.now(timezone.utc).isoformat()
+        self._state["desired_state"] = "RUNNING"
+        self._save()
+
+    def record_stop(self, dph_total: Optional[float] = None) -> None:
+        """Zeichnet Stop-Zeit auf und berechnet Kosten."""
+        now = datetime.now(timezone.utc)
+        self._state["last_stop"] = now.isoformat()
+        self._state["desired_state"] = "STOPPED"
+
+        # Berechne Runtime und Kosten
+        last_start = self._state.get("last_start")
+        if last_start:
+            start_dt = datetime.fromisoformat(last_start)
+            runtime_seconds = (now - start_dt).total_seconds()
+            self._state["total_runtime_seconds"] = (
+                self._state.get("total_runtime_seconds", 0) + runtime_seconds
+            )
+
+            if dph_total:
+                hours = runtime_seconds / 3600
+                cost = hours * dph_total
+                self._state["total_cost_usd"] = (
+                    self._state.get("total_cost_usd", 0.0) + cost
+                )
+                logger.info(
+                    f"Session cost: ${cost:.3f} ({runtime_seconds/60:.1f} min @ ${dph_total}/h)"
+                )
+
+        self._save()
+
+
+# Global state instance
+_state = VastState()
+
+
+# -------------------------
+# Audit Logging
+# -------------------------
+def audit_log(event: str, actor: str = "system", meta: Optional[Dict[str, Any]] = None) -> None:
+    """Schreibt Audit-Log Eintrag."""
+    meta = meta or {}
+    line = json.dumps(
+        {
+            "ts": datetime.now(timezone.utc).isoformat(),
+            "event": event,
+            "actor": actor,
+            "meta": meta,
+        },
+        ensure_ascii=False,
+    )
+    AUDIT_PATH.parent.mkdir(parents=True, exist_ok=True)
+    with AUDIT_PATH.open("a", encoding="utf-8") as f:
+        f.write(line + "\n")
+    logger.info(f"AUDIT: {event} by {actor}")
+
+
+# -------------------------
+# Request/Response Models
+# -------------------------
+class PowerOnRequest(BaseModel):
+    wait_for_health: bool = Field(default=True, description="Warten bis LLM bereit")
+    health_path: str = Field(default=VAST_HEALTH_PATH)
+    health_port: int = Field(default=VAST_HEALTH_PORT)
+
+
+class PowerOnResponse(BaseModel):
+    status: str
+    instance_id: Optional[int] = None
+    endpoint_base_url: Optional[str] = None
+    health_url: Optional[str] = None
+    message: Optional[str] = None
+
+
+class PowerOffRequest(BaseModel):
+    pass  # Keine Parameter noetig
+
+
+class PowerOffResponse(BaseModel):
+    status: str
+    session_runtime_minutes: Optional[float] = None
+    session_cost_usd: Optional[float] = None
+    message: Optional[str] = None
+
+
+class VastStatusResponse(BaseModel):
+    instance_id: Optional[int] = None
+    status: str
+    gpu_name: Optional[str] = None
+    dph_total: Optional[float] = None
+    endpoint_base_url: Optional[str] = None
+    last_activity: Optional[str] = None
+    auto_shutdown_in_minutes: Optional[int] = None
+    total_runtime_hours: Optional[float] = None
+    total_cost_usd: Optional[float] = None
+    # Budget / Credit Informationen
+    account_credit: Optional[float] = None  # Verbleibendes Guthaben in USD
+    account_total_spend: Optional[float] = None  # Gesamtausgaben auf vast.ai
+    # Session-Kosten (seit letztem Start)
+    session_runtime_minutes: Optional[float] = None
+    session_cost_usd: Optional[float] = None
+    message: Optional[str] = None
+
+
+class CostStatsResponse(BaseModel):
+    total_runtime_hours: float
+    total_cost_usd: float
+    sessions_count: int
+    avg_session_minutes: float
+
+
+# -------------------------
+# Security Dependency
+# -------------------------
+def require_control_key(x_api_key: Optional[str] = Header(default=None)) -> None:
+    """
+    Admin-Schutz fuer Control-Endpoints.
+
+    Header: X-API-Key: <CONTROL_API_KEY>
+    """
+    if not CONTROL_API_KEY:
+        raise HTTPException(
+            status_code=500,
+            detail="CONTROL_API_KEY not configured on server",
+        )
+    if x_api_key != CONTROL_API_KEY:
+        raise HTTPException(status_code=401, detail="Unauthorized")
+
+
+# -------------------------
+# Auto-Shutdown Background Task
+# -------------------------
+_shutdown_task: Optional[asyncio.Task] = None
+
+
+async def auto_shutdown_monitor() -> None:
+    """
+    Hintergrund-Task der bei Inaktivitaet die Instanz stoppt.
+
+    Laeuft permanent wenn Instanz an ist und prueft alle 60s ob
+    Aktivitaet stattfand. Stoppt Instanz wenn keine Aktivitaet
+    seit AUTO_SHUTDOWN_MINUTES.
+    """
+    if not VAST_API_KEY or not VAST_INSTANCE_ID:
+        return
+
+    client = VastAIClient(VAST_API_KEY)
+
+    try:
+        while True:
+            await asyncio.sleep(60)  # Check every minute
+
+            if not AUTO_SHUTDOWN_ENABLED:
+                continue
+
+            last_activity = _state.get_last_activity()
+            if not last_activity:
+                continue
+
+            # Berechne Inaktivitaet
+            now = datetime.now(timezone.utc)
+            inactive_minutes = (now - last_activity).total_seconds() / 60
+
+            if inactive_minutes >= AUTO_SHUTDOWN_MINUTES:
+                logger.info(
+                    f"Auto-shutdown triggered: {inactive_minutes:.1f} min inactive"
+                )
+                audit_log(
+                    "auto_shutdown",
+                    actor="system",
+                    meta={"inactive_minutes": inactive_minutes},
+                )
+
+                # Hole aktuelle Instanz-Info fuer Kosten
+                instance = await client.get_instance(int(VAST_INSTANCE_ID))
+                dph = instance.dph_total if instance else None
+
+                # Stop
+                await client.stop_instance(int(VAST_INSTANCE_ID))
+                _state.record_stop(dph_total=dph)
+
+                audit_log("auto_shutdown_complete", actor="system")
+
+    except asyncio.CancelledError:
+        pass
+    except Exception as e:
+        logger.error(f"Auto-shutdown monitor error: {e}")
+    finally:
+        await client.close()
+
+
+def start_auto_shutdown_monitor() -> None:
+    """Startet den Auto-Shutdown Monitor."""
+    global _shutdown_task
+    if _shutdown_task is None or _shutdown_task.done():
+        _shutdown_task = asyncio.create_task(auto_shutdown_monitor())
+        logger.info("Auto-shutdown monitor started")
+
+
+def stop_auto_shutdown_monitor() -> None:
+    """Stoppt den Auto-Shutdown Monitor."""
+    global _shutdown_task
+    if _shutdown_task and not _shutdown_task.done():
+        _shutdown_task.cancel()
+        logger.info("Auto-shutdown monitor stopped")
+
+
+# -------------------------
+# API Endpoints
+# -------------------------
+
+@router.get("/status", response_model=VastStatusResponse, dependencies=[Depends(require_control_key)])
+async def get_status() -> VastStatusResponse:
+    """
+    Gibt Status der vast.ai Instanz zurueck.
+
+    Inkludiert:
+    - Aktueller Status (running/stopped/etc)
+    - GPU Info und Kosten pro Stunde
+    - Endpoint URL
+    - Auto-Shutdown Timer
+    - Gesamtkosten
+    - Account Credit (verbleibendes Budget)
+    - Session-Kosten (seit letztem Start)
+    """
+    if not VAST_API_KEY or not VAST_INSTANCE_ID:
+        return VastStatusResponse(
+            status="unconfigured",
+            message="VAST_API_KEY or VAST_INSTANCE_ID not set",
+        )
+
+    client = VastAIClient(VAST_API_KEY)
+    try:
+        instance = await client.get_instance(int(VAST_INSTANCE_ID))
+
+        if not instance:
+            return VastStatusResponse(
+                instance_id=int(VAST_INSTANCE_ID),
+                status="not_found",
+                message=f"Instance {VAST_INSTANCE_ID} not found",
+            )
+
+        # Hole Account-Info fuer Budget/Credit
+        account_info = await client.get_account_info()
+        account_credit = account_info.credit if account_info else None
+        account_total_spend = account_info.total_spend if account_info else None
+
+        # Update endpoint if running
+        endpoint = None
+        if instance.status == InstanceStatus.RUNNING:
+            endpoint = instance.get_endpoint_url(VAST_HEALTH_PORT)
+            if endpoint:
+                _state.set("endpoint_base_url", endpoint)
+
+        # Calculate auto-shutdown timer
+        auto_shutdown_minutes = None
+        if AUTO_SHUTDOWN_ENABLED and instance.status == InstanceStatus.RUNNING:
+            last_activity = _state.get_last_activity()
+            if last_activity:
+                inactive = (datetime.now(timezone.utc) - last_activity).total_seconds() / 60
+                auto_shutdown_minutes = max(0, int(AUTO_SHUTDOWN_MINUTES - inactive))
+
+        # Berechne aktuelle Session-Kosten (wenn Instanz laeuft)
+        session_runtime_minutes = None
+        session_cost_usd = None
+        last_start = _state.get("last_start")
+
+        # Falls Instanz laeuft aber kein last_start gesetzt (z.B. nach Container-Neustart),
+        # nutze start_date aus der vast.ai API falls vorhanden, sonst jetzt
+        if instance.status == InstanceStatus.RUNNING and not last_start:
+            if instance.started_at:
+                _state.set("last_start", instance.started_at.isoformat())
+                last_start = instance.started_at.isoformat()
+            else:
+                _state.record_start()
+                last_start = _state.get("last_start")
+
+        if last_start and instance.status == InstanceStatus.RUNNING:
+            start_dt = datetime.fromisoformat(last_start)
+            session_runtime_minutes = (datetime.now(timezone.utc) - start_dt).total_seconds() / 60
+            if instance.dph_total:
+                session_cost_usd = (session_runtime_minutes / 60) * instance.dph_total
+
+        return VastStatusResponse(
+            instance_id=instance.id,
+            status=instance.status.value,
+            gpu_name=instance.gpu_name,
+            dph_total=instance.dph_total,
+            endpoint_base_url=endpoint or _state.get("endpoint_base_url"),
+            last_activity=_state.get("last_activity"),
+            auto_shutdown_in_minutes=auto_shutdown_minutes,
+            total_runtime_hours=_state.get("total_runtime_seconds", 0) / 3600,
+            total_cost_usd=_state.get("total_cost_usd", 0.0),
+            account_credit=account_credit,
+            account_total_spend=account_total_spend,
+            session_runtime_minutes=session_runtime_minutes,
+            session_cost_usd=session_cost_usd,
+        )
+
+    finally:
+        await client.close()
+
+
+@router.post("/power/on", response_model=PowerOnResponse, dependencies=[Depends(require_control_key)])
+async def power_on(
+    payload: PowerOnRequest,
+    background_tasks: BackgroundTasks,
+) -> PowerOnResponse:
+    """
+    Startet die vast.ai Instanz.
+
+    1. Startet Instanz via API
+    2. Wartet auf Status RUNNING
+    3. Optional: Wartet auf Health-Endpoint
+    4. Startet Auto-Shutdown Monitor
+    """
+    if not VAST_API_KEY or not VAST_INSTANCE_ID:
+        raise HTTPException(
+            status_code=500,
+            detail="VAST_API_KEY or VAST_INSTANCE_ID not configured",
+        )
+
+    instance_id = int(VAST_INSTANCE_ID)
+    audit_log("power_on_requested", meta={"instance_id": instance_id})
+
+    client = VastAIClient(VAST_API_KEY)
+    try:
+        # Start instance
+        success = await client.start_instance(instance_id)
+        if not success:
+            raise HTTPException(status_code=502, detail="Failed to start instance")
+
+        _state.record_start()
+        _state.record_activity()
+
+        # Wait for running status
+        instance = await client.wait_for_status(
+            instance_id,
+            InstanceStatus.RUNNING,
+            timeout_seconds=300,
+        )
+
+        if not instance:
+            return PowerOnResponse(
+                status="starting",
+                instance_id=instance_id,
+                message="Instance start requested but not yet running. Check status.",
+            )
+
+        # Get endpoint
+        endpoint = instance.get_endpoint_url(payload.health_port)
+        if endpoint:
+            _state.set("endpoint_base_url", endpoint)
+
+        # Wait for health if requested
+        if payload.wait_for_health:
+            health_ok = await client.wait_for_health(
+                instance,
+                health_path=payload.health_path,
+                internal_port=payload.health_port,
+                timeout_seconds=VAST_WAIT_TIMEOUT_S,
+            )
+
+            if not health_ok:
+                audit_log("power_on_health_timeout", meta={"instance_id": instance_id})
+                return PowerOnResponse(
+                    status="running_unhealthy",
+                    instance_id=instance_id,
+                    endpoint_base_url=endpoint,
+                    message=f"Instance running but health check failed at {endpoint}{payload.health_path}",
+                )
+
+        # Start auto-shutdown monitor
+        start_auto_shutdown_monitor()
+
+        audit_log("power_on_complete", meta={
+            "instance_id": instance_id,
+            "endpoint": endpoint,
+        })
+
+        return PowerOnResponse(
+            status="running",
+            instance_id=instance_id,
+            endpoint_base_url=endpoint,
+            health_url=f"{endpoint}{payload.health_path}" if endpoint else None,
+            message="Instance running and healthy",
+        )
+
+    finally:
+        await client.close()
+
+
+@router.post("/power/off", response_model=PowerOffResponse, dependencies=[Depends(require_control_key)])
+async def power_off(payload: PowerOffRequest) -> PowerOffResponse:
+    """
+    Stoppt die vast.ai Instanz (behaelt Disk).
+
+    Berechnet Session-Kosten und -Laufzeit.
+    """
+    if not VAST_API_KEY or not VAST_INSTANCE_ID:
+        raise HTTPException(
+            status_code=500,
+            detail="VAST_API_KEY or VAST_INSTANCE_ID not configured",
+        )
+
+    instance_id = int(VAST_INSTANCE_ID)
+    audit_log("power_off_requested", meta={"instance_id": instance_id})
+
+    # Stop auto-shutdown monitor
+    stop_auto_shutdown_monitor()
+
+    client = VastAIClient(VAST_API_KEY)
+    try:
+        # Get current info for cost calculation
+        instance = await client.get_instance(instance_id)
+        dph = instance.dph_total if instance else None
+
+        # Calculate session stats before updating state
+        session_runtime = 0.0
+        session_cost = 0.0
+        last_start = _state.get("last_start")
+        if last_start:
+            start_dt = datetime.fromisoformat(last_start)
+            session_runtime = (datetime.now(timezone.utc) - start_dt).total_seconds() / 60
+            if dph:
+                session_cost = (session_runtime / 60) * dph
+
+        # Stop instance
+        success = await client.stop_instance(instance_id)
+        if not success:
+            raise HTTPException(status_code=502, detail="Failed to stop instance")
+
+        _state.record_stop(dph_total=dph)
+
+        audit_log("power_off_complete", meta={
+            "instance_id": instance_id,
+            "session_minutes": session_runtime,
+            "session_cost": session_cost,
+        })
+
+        return PowerOffResponse(
+            status="stopped",
+            session_runtime_minutes=session_runtime,
+            session_cost_usd=session_cost,
+            message=f"Instance stopped. Session: {session_runtime:.1f} min, ${session_cost:.3f}",
+        )
+
+    finally:
+        await client.close()
+
+
+@router.post("/activity", dependencies=[Depends(require_control_key)])
+async def record_activity() -> Dict[str, str]:
+    """
+    Zeichnet Aktivitaet auf (verzoegert Auto-Shutdown).
+
+    Sollte von LLM Gateway aufgerufen werden bei jedem Request.
+    """
+    _state.record_activity()
+    return {"status": "recorded", "last_activity": _state.get("last_activity")}
+
+
+@router.get("/costs", response_model=CostStatsResponse, dependencies=[Depends(require_control_key)])
+async def get_costs() -> CostStatsResponse:
+    """
+    Gibt Kosten-Statistiken zurueck.
+    """
+    total_seconds = _state.get("total_runtime_seconds", 0)
+    total_cost = _state.get("total_cost_usd", 0.0)
+
+    # TODO: Sessions count from audit log
+    sessions = 1 if total_seconds > 0 else 0
+    avg_minutes = (total_seconds / 60 / sessions) if sessions > 0 else 0
+
+    return CostStatsResponse(
+        total_runtime_hours=total_seconds / 3600,
+        total_cost_usd=total_cost,
+        sessions_count=sessions,
+        avg_session_minutes=avg_minutes,
+    )
+
+
+@router.get("/audit", dependencies=[Depends(require_control_key)])
+async def get_audit_log(limit: int = 50) -> List[Dict[str, Any]]:
+    """
+    Gibt letzte Audit-Log Eintraege zurueck.
+    """
+    if not AUDIT_PATH.exists():
+        return []
+
+    lines = AUDIT_PATH.read_text(encoding="utf-8").strip().split("\n")
+    entries = []
+    for line in lines[-limit:]:
+        try:
+            entries.append(json.loads(line))
+        except json.JSONDecodeError:
+            continue
+
+    return list(reversed(entries))  # Neueste zuerst