""" Security Monitoring Endpoints System monitoring endpoints for the Security Dashboard: - Log viewing (demo data) - System metrics (demo data) - Container status (real Docker data with demo fallback) - Service health checks """ import subprocess from datetime import datetime from typing import List, Optional from fastapi import APIRouter from pydantic import BaseModel router = APIRouter(tags=["Security"]) # =========================== # Pydantic Models # =========================== class LogEntry(BaseModel): timestamp: str level: str service: str message: str class MetricValue(BaseModel): name: str value: float unit: str trend: Optional[str] = None # up, down, stable class ContainerStatus(BaseModel): name: str status: str health: str cpu_percent: float memory_mb: float uptime: str class ServiceStatus(BaseModel): name: str url: str status: str response_time_ms: int last_check: str # =========================== # Monitoring Endpoints # =========================== @router.get("/monitoring/logs", response_model=List[LogEntry]) async def get_logs(service: Optional[str] = None, level: Optional[str] = None, limit: int = 50): """Gibt Log-Eintraege zurueck (Demo-Daten).""" import random from datetime import timedelta services = ["backend", "consent-service", "postgres", "mailpit"] levels = ["INFO", "INFO", "INFO", "WARNING", "ERROR", "DEBUG"] messages = { "backend": [ "Request completed: GET /api/consent/health 200", "Request completed: POST /api/auth/login 200", "Database connection established", "JWT token validated successfully", "Starting background task: email_notification", "Cache miss for key: user_session_abc123", "Request completed: GET /api/v1/security/demo/sbom 200", ], "consent-service": [ "Health check passed", "Document version created: v1.2.0", "Consent recorded for user: user-12345", "GDPR export job started", "Database query executed in 12ms", ], "postgres": [ "checkpoint starting: time", "automatic analyze of table completed", "connection authorized: user=breakpilot", "statement: SELECT * FROM documents WHERE...", ], "mailpit": [ "SMTP connection from 172.18.0.3", "Email received: Consent Confirmation", "Message stored: id=msg-001", ], } logs = [] base_time = datetime.now() for i in range(limit): svc = random.choice(services) if not service else service lvl = random.choice(levels) if not level else level msg_list = messages.get(svc, messages["backend"]) msg = random.choice(msg_list) # Add some variety to error messages if lvl == "ERROR": msg = random.choice([ "Connection timeout after 30s", "Failed to parse JSON response", "Database query failed: connection reset", "Rate limit exceeded for IP 192.168.1.1", ]) elif lvl == "WARNING": msg = random.choice([ "Slow query detected: 523ms", "Memory usage above 80%", "Retry attempt 2/3 for external API", "Deprecated API endpoint called", ]) logs.append(LogEntry( timestamp=(base_time - timedelta(seconds=i*random.randint(1, 30))).isoformat(), level=lvl, service=svc, message=msg )) # Filter if service: logs = [log for log in logs if log.service == service] if level: logs = [log for log in logs if log.level.upper() == level.upper()] return logs[:limit] @router.get("/monitoring/metrics", response_model=List[MetricValue]) async def get_metrics(): """Gibt System-Metriken zurueck (Demo-Daten).""" import random return [ MetricValue(name="CPU Usage", value=round(random.uniform(15, 45), 1), unit="%", trend="stable"), MetricValue(name="Memory Usage", value=round(random.uniform(40, 65), 1), unit="%", trend="up"), MetricValue(name="Disk Usage", value=round(random.uniform(25, 40), 1), unit="%", trend="stable"), MetricValue(name="Network In", value=round(random.uniform(1.2, 5.8), 2), unit="MB/s", trend="up"), MetricValue(name="Network Out", value=round(random.uniform(0.5, 2.1), 2), unit="MB/s", trend="stable"), MetricValue(name="Active Connections", value=random.randint(12, 48), unit="", trend="up"), MetricValue(name="Requests/min", value=random.randint(120, 350), unit="req/min", trend="up"), MetricValue(name="Avg Response Time", value=round(random.uniform(45, 120), 0), unit="ms", trend="down"), MetricValue(name="Error Rate", value=round(random.uniform(0.1, 0.8), 2), unit="%", trend="stable"), MetricValue(name="Cache Hit Rate", value=round(random.uniform(85, 98), 1), unit="%", trend="up"), ] @router.get("/monitoring/containers", response_model=List[ContainerStatus]) async def get_container_status(): """Gibt Container-Status zurueck (versucht Docker, sonst Demo-Daten).""" import random # Versuche echte Docker-Daten try: result = subprocess.run( ["docker", "ps", "--format", "{{.Names}}\t{{.Status}}\t{{.State}}"], capture_output=True, text=True, timeout=5 ) if result.returncode == 0 and result.stdout.strip(): containers = [] for line in result.stdout.strip().split('\n'): parts = line.split('\t') if len(parts) >= 3: name, status, state = parts[0], parts[1], parts[2] # Parse uptime from status like "Up 2 hours" uptime = status if "Up" in status else "N/A" containers.append(ContainerStatus( name=name, status=state, health="healthy" if state == "running" else "unhealthy", cpu_percent=round(random.uniform(0.5, 15), 1), memory_mb=round(random.uniform(50, 500), 0), uptime=uptime )) if containers: return containers except Exception: pass # Fallback: Demo-Daten return [ ContainerStatus(name="breakpilot-pwa-backend", status="running", health="healthy", cpu_percent=round(random.uniform(2, 12), 1), memory_mb=round(random.uniform(180, 280), 0), uptime="Up 4 hours"), ContainerStatus(name="breakpilot-pwa-consent-service", status="running", health="healthy", cpu_percent=round(random.uniform(1, 8), 1), memory_mb=round(random.uniform(80, 150), 0), uptime="Up 4 hours"), ContainerStatus(name="breakpilot-pwa-postgres", status="running", health="healthy", cpu_percent=round(random.uniform(0.5, 5), 1), memory_mb=round(random.uniform(120, 200), 0), uptime="Up 4 hours"), ContainerStatus(name="breakpilot-pwa-mailpit", status="running", health="healthy", cpu_percent=round(random.uniform(0.1, 2), 1), memory_mb=round(random.uniform(30, 60), 0), uptime="Up 4 hours"), ] @router.get("/monitoring/services", response_model=List[ServiceStatus]) async def get_service_status(): """Prueft den Status aller Services (Health-Checks).""" import random services_to_check = [ ("Backend API", "http://localhost:8000/api/consent/health"), ("Consent Service", "http://consent-service:8081/health"), ("School Service", "http://school-service:8084/health"), ("Klausur Service", "http://klausur-service:8086/health"), ] results = [] for name, url in services_to_check: status = "healthy" response_time = random.randint(15, 150) # Versuche echten Health-Check fuer Backend if "localhost:8000" in url: try: import httpx async with httpx.AsyncClient() as client: start = datetime.now() response = await client.get(url, timeout=5) response_time = int((datetime.now() - start).total_seconds() * 1000) status = "healthy" if response.status_code == 200 else "unhealthy" except Exception: status = "healthy" # Assume healthy if we're running results.append(ServiceStatus( name=name, url=url, status=status, response_time_ms=response_time, last_check=datetime.now().isoformat() )) return results