refactor: remove unused pages and backends (model-management, OCR legacy, GPU/vast.ai, video-chat, matrix)

Deleted pages:
- /ai/model-management (mock data only, no real backend)
- /ai/ocr-compare (old /vocab/ backend, replaced by ocr-kombi)
- /ai/ocr-pipeline (minimal session browser, redundant)
- /ai/ocr-overlay (legacy monolith, redundant)
- /ai/gpu (vast.ai GPU management, no longer used)
- /infrastructure/gpu (same)
- /communication/video-chat (moved to core)
- /communication/matrix (moved to core)

Deleted backends:
- backend-lehrer/infra/vast_client.py + vast_power.py
- backend-lehrer/meetings_api.py + jitsi_api.py
- website/app/api/admin/gpu/
- edu-search-service/scripts/vast_ai_extractor.py

Total: ~7,800 LOC removed. All code preserved in git history.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-23 13:14:12 +02:00
parent 5abdfa202e
commit f39cbe9283
30 changed files with 1089 additions and 9567 deletions

View File

@@ -1,395 +0,0 @@
'use client'
/**
* GPU Infrastructure Admin Page
*
* vast.ai GPU Management for LLM Processing
* Part of KI-Werkzeuge
*/
import { useEffect, useState, useCallback } from 'react'
import { PagePurpose } from '@/components/common/PagePurpose'
import { AIToolsSidebarResponsive } from '@/components/ai/AIToolsSidebar'
interface VastStatus {
instance_id: number | null
status: string
gpu_name: string | null
dph_total: number | null
endpoint_base_url: string | null
last_activity: string | null
auto_shutdown_in_minutes: number | null
total_runtime_hours: number | null
total_cost_usd: number | null
account_credit: number | null
account_total_spend: number | null
session_runtime_minutes: number | null
session_cost_usd: number | null
message: string | null
error?: string
}
export default function GPUInfrastructurePage() {
const [status, setStatus] = useState<VastStatus | null>(null)
const [loading, setLoading] = useState(true)
const [actionLoading, setActionLoading] = useState<string | null>(null)
const [error, setError] = useState<string | null>(null)
const [message, setMessage] = useState<string | null>(null)
const API_PROXY = '/api/admin/gpu'
const fetchStatus = useCallback(async () => {
setLoading(true)
setError(null)
try {
const response = await fetch(API_PROXY)
const data = await response.json()
if (!response.ok) {
throw new Error(data.error || `HTTP ${response.status}`)
}
setStatus(data)
} catch (err) {
setError(err instanceof Error ? err.message : 'Verbindungsfehler')
setStatus({
instance_id: null,
status: 'error',
gpu_name: null,
dph_total: null,
endpoint_base_url: null,
last_activity: null,
auto_shutdown_in_minutes: null,
total_runtime_hours: null,
total_cost_usd: null,
account_credit: null,
account_total_spend: null,
session_runtime_minutes: null,
session_cost_usd: null,
message: 'Verbindung fehlgeschlagen'
})
} finally {
setLoading(false)
}
}, [])
useEffect(() => {
fetchStatus()
}, [fetchStatus])
useEffect(() => {
const interval = setInterval(fetchStatus, 30000)
return () => clearInterval(interval)
}, [fetchStatus])
const powerOn = async () => {
setActionLoading('on')
setError(null)
setMessage(null)
try {
const response = await fetch(API_PROXY, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ action: 'on' }),
})
const data = await response.json()
if (!response.ok) {
throw new Error(data.error || data.detail || 'Aktion fehlgeschlagen')
}
setMessage('Start angefordert')
setTimeout(fetchStatus, 3000)
setTimeout(fetchStatus, 10000)
} catch (err) {
setError(err instanceof Error ? err.message : 'Fehler beim Starten')
fetchStatus()
} finally {
setActionLoading(null)
}
}
const powerOff = async () => {
setActionLoading('off')
setError(null)
setMessage(null)
try {
const response = await fetch(API_PROXY, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ action: 'off' }),
})
const data = await response.json()
if (!response.ok) {
throw new Error(data.error || data.detail || 'Aktion fehlgeschlagen')
}
setMessage('Stop angefordert')
setTimeout(fetchStatus, 3000)
setTimeout(fetchStatus, 10000)
} catch (err) {
setError(err instanceof Error ? err.message : 'Fehler beim Stoppen')
fetchStatus()
} finally {
setActionLoading(null)
}
}
const getStatusBadge = (s: string) => {
const baseClasses = 'px-3 py-1 rounded-full text-sm font-semibold uppercase'
switch (s) {
case 'running':
return `${baseClasses} bg-green-100 text-green-800`
case 'stopped':
case 'exited':
return `${baseClasses} bg-red-100 text-red-800`
case 'loading':
case 'scheduling':
case 'creating':
case 'starting...':
case 'stopping...':
return `${baseClasses} bg-yellow-100 text-yellow-800`
default:
return `${baseClasses} bg-slate-100 text-slate-600`
}
}
const getCreditColor = (credit: number | null) => {
if (credit === null) return 'text-slate-500'
if (credit < 5) return 'text-red-600'
if (credit < 15) return 'text-yellow-600'
return 'text-green-600'
}
return (
<div>
{/* Page Purpose */}
<PagePurpose
title="GPU Infrastruktur"
purpose="Verwalten Sie die vast.ai GPU-Instanzen fuer LLM-Verarbeitung und OCR. Starten/Stoppen Sie GPUs bei Bedarf und ueberwachen Sie Kosten in Echtzeit."
audience={['DevOps', 'Entwickler', 'System-Admins']}
architecture={{
services: ['vast.ai API', 'Ollama', 'VLLM'],
databases: ['PostgreSQL (Logs)'],
}}
relatedPages={[
{ name: 'Test Quality (BQAS)', href: '/ai/test-quality', description: 'Golden Suite & Tests' },
{ name: 'Magic Help', href: '/ai/magic-help', description: 'TrOCR Testing' },
]}
collapsible={true}
defaultCollapsed={true}
/>
{/* KI-Werkzeuge Sidebar */}
<AIToolsSidebarResponsive currentTool="gpu" />
{/* Status Cards */}
<div className="bg-white rounded-xl border border-slate-200 p-6 mb-6">
<div className="grid grid-cols-2 md:grid-cols-3 lg:grid-cols-6 gap-6">
<div>
<div className="text-sm text-slate-500 mb-2">Status</div>
{loading ? (
<span className="px-3 py-1 rounded-full text-sm font-semibold bg-slate-100 text-slate-600">
Laden...
</span>
) : (
<span className={getStatusBadge(
actionLoading === 'on' ? 'starting...' :
actionLoading === 'off' ? 'stopping...' :
status?.status || 'unknown'
)}>
{actionLoading === 'on' ? 'starting...' :
actionLoading === 'off' ? 'stopping...' :
status?.status || 'unbekannt'}
</span>
)}
</div>
<div>
<div className="text-sm text-slate-500 mb-2">GPU</div>
<div className="font-semibold text-slate-900">
{status?.gpu_name || '-'}
</div>
</div>
<div>
<div className="text-sm text-slate-500 mb-2">Kosten/h</div>
<div className="font-semibold text-slate-900">
{status?.dph_total ? `$${status.dph_total.toFixed(3)}` : '-'}
</div>
</div>
<div>
<div className="text-sm text-slate-500 mb-2">Auto-Stop</div>
<div className="font-semibold text-slate-900">
{status && status.auto_shutdown_in_minutes !== null
? `${status.auto_shutdown_in_minutes} min`
: '-'}
</div>
</div>
<div>
<div className="text-sm text-slate-500 mb-2">Budget</div>
<div className={`font-bold text-lg ${getCreditColor(status?.account_credit ?? null)}`}>
{status && status.account_credit !== null
? `$${status.account_credit.toFixed(2)}`
: '-'}
</div>
</div>
<div>
<div className="text-sm text-slate-500 mb-2">Session</div>
<div className="font-semibold text-slate-900">
{status && status.session_runtime_minutes !== null && status.session_cost_usd !== null
? `${Math.round(status.session_runtime_minutes)} min / $${status.session_cost_usd.toFixed(3)}`
: '-'}
</div>
</div>
</div>
{/* Buttons */}
<div className="flex items-center gap-4 mt-6 pt-6 border-t border-slate-200">
<button
onClick={powerOn}
disabled={actionLoading !== null || status?.status === 'running'}
className="px-6 py-2 bg-orange-600 text-white rounded-lg font-medium hover:bg-orange-700 disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
>
Starten
</button>
<button
onClick={powerOff}
disabled={actionLoading !== null || status?.status !== 'running'}
className="px-6 py-2 bg-red-600 text-white rounded-lg font-medium hover:bg-red-700 disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
>
Stoppen
</button>
<button
onClick={fetchStatus}
disabled={loading}
className="px-4 py-2 border border-slate-300 text-slate-700 rounded-lg font-medium hover:bg-slate-50 disabled:opacity-50 transition-colors"
>
{loading ? 'Aktualisiere...' : 'Aktualisieren'}
</button>
{message && (
<span className="ml-4 text-sm text-green-600 font-medium">{message}</span>
)}
{error && (
<span className="ml-4 text-sm text-red-600 font-medium">{error}</span>
)}
</div>
</div>
{/* Extended Stats */}
<div className="grid grid-cols-1 lg:grid-cols-2 gap-6 mb-6">
<div className="bg-white rounded-xl border border-slate-200 p-6">
<h3 className="font-semibold text-slate-900 mb-4">Kosten-Uebersicht</h3>
<div className="space-y-4">
<div className="flex justify-between items-center">
<span className="text-slate-600">Session Laufzeit</span>
<span className="font-semibold">
{status && status.session_runtime_minutes !== null
? `${Math.round(status.session_runtime_minutes)} Minuten`
: '-'}
</span>
</div>
<div className="flex justify-between items-center">
<span className="text-slate-600">Session Kosten</span>
<span className="font-semibold">
{status && status.session_cost_usd !== null
? `$${status.session_cost_usd.toFixed(4)}`
: '-'}
</span>
</div>
<div className="flex justify-between items-center pt-4 border-t border-slate-100">
<span className="text-slate-600">Gesamtlaufzeit</span>
<span className="font-semibold">
{status && status.total_runtime_hours !== null
? `${status.total_runtime_hours.toFixed(1)} Stunden`
: '-'}
</span>
</div>
<div className="flex justify-between items-center">
<span className="text-slate-600">Gesamtkosten</span>
<span className="font-semibold">
{status && status.total_cost_usd !== null
? `$${status.total_cost_usd.toFixed(2)}`
: '-'}
</span>
</div>
<div className="flex justify-between items-center">
<span className="text-slate-600">vast.ai Ausgaben</span>
<span className="font-semibold">
{status && status.account_total_spend !== null
? `$${status.account_total_spend.toFixed(2)}`
: '-'}
</span>
</div>
</div>
</div>
<div className="bg-white rounded-xl border border-slate-200 p-6">
<h3 className="font-semibold text-slate-900 mb-4">Instanz-Details</h3>
<div className="space-y-4">
<div className="flex justify-between items-center">
<span className="text-slate-600">Instanz ID</span>
<span className="font-mono text-sm">
{status?.instance_id || '-'}
</span>
</div>
<div className="flex justify-between items-center">
<span className="text-slate-600">GPU</span>
<span className="font-semibold">
{status?.gpu_name || '-'}
</span>
</div>
<div className="flex justify-between items-center">
<span className="text-slate-600">Stundensatz</span>
<span className="font-semibold">
{status?.dph_total ? `$${status.dph_total.toFixed(4)}/h` : '-'}
</span>
</div>
<div className="flex justify-between items-center">
<span className="text-slate-600">Letzte Aktivitaet</span>
<span className="text-sm">
{status?.last_activity
? new Date(status.last_activity).toLocaleString('de-DE')
: '-'}
</span>
</div>
{status?.endpoint_base_url && status.status === 'running' && (
<div className="pt-4 border-t border-slate-100">
<div className="text-slate-600 text-sm mb-1">Endpoint</div>
<code className="text-xs bg-slate-100 px-2 py-1 rounded block overflow-x-auto">
{status.endpoint_base_url}
</code>
</div>
)}
</div>
</div>
</div>
{/* Info */}
<div className="bg-violet-50 border border-violet-200 rounded-xl p-4">
<div className="flex gap-3">
<svg className="w-5 h-5 text-violet-600 flex-shrink-0 mt-0.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
</svg>
<div>
<h4 className="font-semibold text-violet-900">Auto-Shutdown</h4>
<p className="text-sm text-violet-800 mt-1">
Die GPU-Instanz wird automatisch gestoppt, wenn sie laengere Zeit inaktiv ist.
Der Status wird alle 30 Sekunden automatisch aktualisiert.
</p>
</div>
</div>
</div>
</div>
)
}

View File

@@ -1,549 +0,0 @@
'use client'
/**
* Model Management Page
*
* Manage ML model backends (PyTorch vs ONNX), view status,
* run benchmarks, and configure inference settings.
*/
import { useState, useEffect, useCallback } from 'react'
import { PagePurpose } from '@/components/common/PagePurpose'
const KLAUSUR_API = '/klausur-api'
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
type BackendMode = 'auto' | 'pytorch' | 'onnx'
type ModelStatus = 'available' | 'not_found' | 'loading' | 'error'
type Tab = 'overview' | 'benchmarks' | 'configuration'
interface ModelInfo {
name: string
key: string
pytorch: { status: ModelStatus; size_mb: number; ram_mb: number }
onnx: { status: ModelStatus; size_mb: number; ram_mb: number; quantized: boolean }
}
interface BenchmarkRow {
model: string
backend: string
quantization: string
size_mb: number
ram_mb: number
inference_ms: number
load_time_s: number
}
interface StatusInfo {
active_backend: BackendMode
loaded_models: string[]
cache_hits: number
cache_misses: number
uptime_s: number
}
// ---------------------------------------------------------------------------
// Mock data (used when backend is not available)
// ---------------------------------------------------------------------------
const MOCK_MODELS: ModelInfo[] = [
{
name: 'TrOCR Printed',
key: 'trocr_printed',
pytorch: { status: 'available', size_mb: 892, ram_mb: 1800 },
onnx: { status: 'available', size_mb: 234, ram_mb: 620, quantized: true },
},
{
name: 'TrOCR Handwritten',
key: 'trocr_handwritten',
pytorch: { status: 'available', size_mb: 892, ram_mb: 1800 },
onnx: { status: 'not_found', size_mb: 0, ram_mb: 0, quantized: false },
},
{
name: 'PP-DocLayout',
key: 'pp_doclayout',
pytorch: { status: 'not_found', size_mb: 0, ram_mb: 0 },
onnx: { status: 'available', size_mb: 48, ram_mb: 180, quantized: false },
},
]
const MOCK_BENCHMARKS: BenchmarkRow[] = [
{ model: 'TrOCR Printed', backend: 'PyTorch', quantization: 'FP32', size_mb: 892, ram_mb: 1800, inference_ms: 142, load_time_s: 3.2 },
{ model: 'TrOCR Printed', backend: 'ONNX', quantization: 'INT8', size_mb: 234, ram_mb: 620, inference_ms: 38, load_time_s: 0.8 },
{ model: 'TrOCR Handwritten', backend: 'PyTorch', quantization: 'FP32', size_mb: 892, ram_mb: 1800, inference_ms: 156, load_time_s: 3.4 },
{ model: 'PP-DocLayout', backend: 'ONNX', quantization: 'FP32', size_mb: 48, ram_mb: 180, inference_ms: 22, load_time_s: 0.3 },
]
const MOCK_STATUS: StatusInfo = {
active_backend: 'auto',
loaded_models: ['trocr_printed (ONNX)', 'pp_doclayout (ONNX)'],
cache_hits: 1247,
cache_misses: 83,
uptime_s: 86400,
}
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
function StatusBadge({ status }: { status: ModelStatus }) {
const cls =
status === 'available'
? 'bg-emerald-100 text-emerald-800 border-emerald-200'
: status === 'loading'
? 'bg-blue-100 text-blue-800 border-blue-200'
: status === 'not_found'
? 'bg-slate-100 text-slate-500 border-slate-200'
: 'bg-red-100 text-red-800 border-red-200'
const label =
status === 'available' ? 'Verfuegbar'
: status === 'loading' ? 'Laden...'
: status === 'not_found' ? 'Nicht vorhanden'
: 'Fehler'
return (
<span className={`inline-flex items-center px-2 py-0.5 rounded-full text-xs font-medium border ${cls}`}>
{label}
</span>
)
}
function formatBytes(mb: number) {
if (mb === 0) return '--'
if (mb >= 1000) return `${(mb / 1000).toFixed(1)} GB`
return `${mb} MB`
}
function formatUptime(seconds: number) {
const h = Math.floor(seconds / 3600)
const m = Math.floor((seconds % 3600) / 60)
if (h > 0) return `${h}h ${m}m`
return `${m}m`
}
// ---------------------------------------------------------------------------
// Component
// ---------------------------------------------------------------------------
export default function ModelManagementPage() {
const [tab, setTab] = useState<Tab>('overview')
const [models, setModels] = useState<ModelInfo[]>(MOCK_MODELS)
const [benchmarks, setBenchmarks] = useState<BenchmarkRow[]>(MOCK_BENCHMARKS)
const [status, setStatus] = useState<StatusInfo>(MOCK_STATUS)
const [backend, setBackend] = useState<BackendMode>('auto')
const [saving, setSaving] = useState(false)
const [benchmarkRunning, setBenchmarkRunning] = useState(false)
const [usingMock, setUsingMock] = useState(false)
// Load status
const loadStatus = useCallback(async () => {
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/models/status`)
if (res.ok) {
const data = await res.json()
setStatus(data)
setBackend(data.active_backend || 'auto')
setUsingMock(false)
} else {
setUsingMock(true)
}
} catch {
setUsingMock(true)
}
}, [])
// Load models
const loadModels = useCallback(async () => {
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/models`)
if (res.ok) {
const data = await res.json()
if (data.models?.length) setModels(data.models)
}
} catch {
// Keep mock data
}
}, [])
// Load benchmarks
const loadBenchmarks = useCallback(async () => {
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/models/benchmarks`)
if (res.ok) {
const data = await res.json()
if (data.benchmarks?.length) setBenchmarks(data.benchmarks)
}
} catch {
// Keep mock data
}
}, [])
useEffect(() => {
loadStatus()
loadModels()
loadBenchmarks()
}, [loadStatus, loadModels, loadBenchmarks])
// Save backend preference
const saveBackend = async (mode: BackendMode) => {
setBackend(mode)
setSaving(true)
try {
await fetch(`${KLAUSUR_API}/api/v1/models/backend`, {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ backend: mode }),
})
await loadStatus()
} catch {
// Silently handle — mock mode
} finally {
setSaving(false)
}
}
// Run benchmark
const runBenchmark = async () => {
setBenchmarkRunning(true)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/models/benchmark`, {
method: 'POST',
})
if (res.ok) {
const data = await res.json()
if (data.benchmarks?.length) setBenchmarks(data.benchmarks)
}
await loadBenchmarks()
} catch {
// Keep existing data
} finally {
setBenchmarkRunning(false)
}
}
const tabs: { key: Tab; label: string }[] = [
{ key: 'overview', label: 'Uebersicht' },
{ key: 'benchmarks', label: 'Benchmarks' },
{ key: 'configuration', label: 'Konfiguration' },
]
return (
<div className="space-y-6">
<div className="max-w-7xl mx-auto p-6 space-y-6">
<PagePurpose
title="Model Management"
purpose="Verwaltung der ML-Modelle fuer OCR und Layout-Erkennung. Vergleich von PyTorch- und ONNX-Backends, Benchmark-Tests und Backend-Konfiguration."
audience={['Entwickler', 'DevOps']}
defaultCollapsed
architecture={{
services: ['klausur-service (FastAPI, Port 8086)'],
databases: ['Dateisystem (Modell-Dateien)'],
}}
relatedPages={[
{ name: 'OCR Pipeline', href: '/ai/ocr-pipeline', description: 'OCR-Pipeline ausfuehren' },
{ name: 'OCR Vergleich', href: '/ai/ocr-compare', description: 'OCR-Methoden vergleichen' },
{ name: 'GPU Infrastruktur', href: '/ai/gpu', description: 'GPU-Ressourcen verwalten' },
]}
/>
{/* Header */}
<div className="flex items-center justify-between">
<div>
<h1 className="text-2xl font-bold text-slate-900">Model Management</h1>
<p className="text-sm text-slate-500 mt-1">
{models.length} Modelle konfiguriert
{usingMock && (
<span className="ml-2 text-xs bg-amber-100 text-amber-700 px-1.5 py-0.5 rounded">
Mock-Daten (Backend nicht erreichbar)
</span>
)}
</p>
</div>
</div>
{/* Status Cards */}
<div className="grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-4 gap-4">
<div className="bg-white rounded-lg border border-slate-200 px-4 py-3">
<p className="text-xs text-slate-500 uppercase font-medium">Aktives Backend</p>
<p className="text-lg font-semibold text-slate-900 mt-1">{status.active_backend.toUpperCase()}</p>
</div>
<div className="bg-white rounded-lg border border-slate-200 px-4 py-3">
<p className="text-xs text-slate-500 uppercase font-medium">Geladene Modelle</p>
<p className="text-lg font-semibold text-slate-900 mt-1">{status.loaded_models.length}</p>
</div>
<div className="bg-white rounded-lg border border-slate-200 px-4 py-3">
<p className="text-xs text-slate-500 uppercase font-medium">Cache Hit-Rate</p>
<p className="text-lg font-semibold text-slate-900 mt-1">
{status.cache_hits + status.cache_misses > 0
? `${((status.cache_hits / (status.cache_hits + status.cache_misses)) * 100).toFixed(1)}%`
: '--'}
</p>
</div>
<div className="bg-white rounded-lg border border-slate-200 px-4 py-3">
<p className="text-xs text-slate-500 uppercase font-medium">Uptime</p>
<p className="text-lg font-semibold text-slate-900 mt-1">{formatUptime(status.uptime_s)}</p>
</div>
</div>
{/* Tabs */}
<div className="border-b border-slate-200">
<nav className="flex gap-4">
{tabs.map(t => (
<button
key={t.key}
onClick={() => setTab(t.key)}
className={`pb-3 px-1 text-sm font-medium border-b-2 transition-colors ${
tab === t.key
? 'border-teal-500 text-teal-600'
: 'border-transparent text-slate-500 hover:text-slate-700'
}`}
>
{t.label}
</button>
))}
</nav>
</div>
{/* Overview Tab */}
{tab === 'overview' && (
<div className="space-y-4">
<h3 className="text-sm font-medium text-slate-700">Verfuegbare Modelle</h3>
<div className="grid gap-4 sm:grid-cols-2 lg:grid-cols-3">
{models.map(m => (
<div key={m.key} className="bg-white rounded-lg border border-slate-200 overflow-hidden">
<div className="px-4 py-3 border-b border-slate-100">
<h4 className="font-semibold text-slate-900">{m.name}</h4>
<p className="text-xs text-slate-400 mt-0.5 font-mono">{m.key}</p>
</div>
<div className="px-4 py-3 space-y-3">
{/* PyTorch */}
<div className="flex items-center justify-between">
<div className="flex items-center gap-2">
<span className="text-xs font-medium text-slate-600 w-16">PyTorch</span>
<StatusBadge status={m.pytorch.status} />
</div>
{m.pytorch.status === 'available' && (
<span className="text-xs text-slate-400">
{formatBytes(m.pytorch.size_mb)} / {formatBytes(m.pytorch.ram_mb)} RAM
</span>
)}
</div>
{/* ONNX */}
<div className="flex items-center justify-between">
<div className="flex items-center gap-2">
<span className="text-xs font-medium text-slate-600 w-16">ONNX</span>
<StatusBadge status={m.onnx.status} />
</div>
{m.onnx.status === 'available' && (
<span className="text-xs text-slate-400">
{formatBytes(m.onnx.size_mb)} / {formatBytes(m.onnx.ram_mb)} RAM
{m.onnx.quantized && (
<span className="ml-1 text-xs bg-violet-100 text-violet-700 px-1 rounded">INT8</span>
)}
</span>
)}
</div>
</div>
</div>
))}
</div>
{/* Loaded Models List */}
{status.loaded_models.length > 0 && (
<div>
<h3 className="text-sm font-medium text-slate-700 mb-2">Aktuell geladen</h3>
<div className="flex flex-wrap gap-2">
{status.loaded_models.map((m, i) => (
<span key={i} className="inline-flex items-center px-3 py-1 rounded-full text-sm bg-teal-50 text-teal-700 border border-teal-200">
{m}
</span>
))}
</div>
</div>
)}
</div>
)}
{/* Benchmarks Tab */}
{tab === 'benchmarks' && (
<div className="space-y-4">
<div className="flex items-center justify-between">
<h3 className="text-sm font-medium text-slate-700">PyTorch vs ONNX Vergleich</h3>
<button
onClick={runBenchmark}
disabled={benchmarkRunning}
className="inline-flex items-center gap-2 px-4 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 disabled:opacity-50 disabled:cursor-not-allowed text-sm font-medium transition-colors"
>
{benchmarkRunning ? (
<>
<svg className="animate-spin h-4 w-4" fill="none" viewBox="0 0 24 24">
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
</svg>
Benchmark laeuft...
</>
) : (
'Benchmark starten'
)}
</button>
</div>
<div className="bg-white rounded-lg border border-slate-200 overflow-hidden">
<div className="overflow-x-auto">
<table className="w-full text-sm">
<thead>
<tr className="border-b border-slate-200 bg-slate-50 text-left text-slate-500">
<th className="px-4 py-3 font-medium">Modell</th>
<th className="px-4 py-3 font-medium">Backend</th>
<th className="px-4 py-3 font-medium">Quantisierung</th>
<th className="px-4 py-3 font-medium text-right">Groesse</th>
<th className="px-4 py-3 font-medium text-right">RAM</th>
<th className="px-4 py-3 font-medium text-right">Inferenz</th>
<th className="px-4 py-3 font-medium text-right">Ladezeit</th>
</tr>
</thead>
<tbody>
{benchmarks.map((b, i) => (
<tr key={i} className="border-b border-slate-100 hover:bg-slate-50">
<td className="px-4 py-3 font-medium text-slate-900">{b.model}</td>
<td className="px-4 py-3">
<span className={`inline-flex items-center px-2 py-0.5 rounded text-xs font-medium ${
b.backend === 'ONNX'
? 'bg-violet-100 text-violet-700'
: 'bg-orange-100 text-orange-700'
}`}>
{b.backend}
</span>
</td>
<td className="px-4 py-3 text-slate-600">{b.quantization}</td>
<td className="px-4 py-3 text-right text-slate-600">{formatBytes(b.size_mb)}</td>
<td className="px-4 py-3 text-right text-slate-600">{formatBytes(b.ram_mb)}</td>
<td className="px-4 py-3 text-right">
<span className={`font-mono ${b.inference_ms < 50 ? 'text-emerald-600' : b.inference_ms < 100 ? 'text-amber-600' : 'text-red-600'}`}>
{b.inference_ms} ms
</span>
</td>
<td className="px-4 py-3 text-right text-slate-500">{b.load_time_s.toFixed(1)}s</td>
</tr>
))}
</tbody>
</table>
</div>
</div>
{benchmarks.length === 0 && (
<div className="text-center py-12 text-slate-400">
<p className="text-lg">Keine Benchmark-Daten</p>
<p className="text-sm mt-1">Klicken Sie &quot;Benchmark starten&quot; um einen Vergleich durchzufuehren.</p>
</div>
)}
</div>
)}
{/* Configuration Tab */}
{tab === 'configuration' && (
<div className="space-y-6">
{/* Backend Selector */}
<div className="bg-white rounded-lg border border-slate-200 p-5">
<h3 className="text-sm font-semibold text-slate-900 mb-1">Inference Backend</h3>
<p className="text-sm text-slate-500 mb-4">
Waehlen Sie welches Backend fuer die Modell-Inferenz verwendet werden soll.
</p>
<div className="space-y-3">
{([
{
mode: 'auto' as const,
label: 'Auto',
desc: 'ONNX wenn verfuegbar, Fallback auf PyTorch.',
},
{
mode: 'pytorch' as const,
label: 'PyTorch',
desc: 'Immer PyTorch verwenden. Hoeherer RAM-Verbrauch, volle Flexibilitaet.',
},
{
mode: 'onnx' as const,
label: 'ONNX',
desc: 'Immer ONNX verwenden. Schneller und weniger RAM, Fehler wenn nicht vorhanden.',
},
] as const).map(opt => (
<label
key={opt.mode}
className={`flex items-start gap-3 p-3 rounded-lg border cursor-pointer transition-colors ${
backend === opt.mode
? 'border-teal-300 bg-teal-50'
: 'border-slate-200 hover:bg-slate-50'
}`}
>
<input
type="radio"
name="backend"
value={opt.mode}
checked={backend === opt.mode}
onChange={() => saveBackend(opt.mode)}
disabled={saving}
className="mt-1 text-teal-600 focus:ring-teal-500"
/>
<div>
<span className="font-medium text-slate-900">{opt.label}</span>
<p className="text-sm text-slate-500 mt-0.5">{opt.desc}</p>
</div>
</label>
))}
</div>
{saving && (
<p className="text-xs text-teal-600 mt-3">Speichere...</p>
)}
</div>
{/* Model Details Table */}
<div className="bg-white rounded-lg border border-slate-200 p-5">
<h3 className="text-sm font-semibold text-slate-900 mb-4">Modell-Details</h3>
<div className="overflow-x-auto">
<table className="w-full text-sm">
<thead>
<tr className="border-b border-slate-200 text-left text-slate-500">
<th className="pb-2 font-medium">Modell</th>
<th className="pb-2 font-medium">PyTorch</th>
<th className="pb-2 font-medium text-right">Groesse (PT)</th>
<th className="pb-2 font-medium">ONNX</th>
<th className="pb-2 font-medium text-right">Groesse (ONNX)</th>
<th className="pb-2 font-medium text-right">Einsparung</th>
</tr>
</thead>
<tbody>
{models.map(m => {
const ptAvail = m.pytorch.status === 'available'
const oxAvail = m.onnx.status === 'available'
const savings = ptAvail && oxAvail && m.pytorch.size_mb > 0
? Math.round((1 - m.onnx.size_mb / m.pytorch.size_mb) * 100)
: null
return (
<tr key={m.key} className="border-b border-slate-100">
<td className="py-2.5 font-medium text-slate-900">{m.name}</td>
<td className="py-2.5"><StatusBadge status={m.pytorch.status} /></td>
<td className="py-2.5 text-right text-slate-500">{ptAvail ? formatBytes(m.pytorch.size_mb) : '--'}</td>
<td className="py-2.5"><StatusBadge status={m.onnx.status} /></td>
<td className="py-2.5 text-right text-slate-500">{oxAvail ? formatBytes(m.onnx.size_mb) : '--'}</td>
<td className="py-2.5 text-right">
{savings !== null ? (
<span className="text-emerald-600 font-medium">-{savings}%</span>
) : (
<span className="text-slate-300">--</span>
)}
</td>
</tr>
)
})}
</tbody>
</table>
</div>
</div>
</div>
)}
</div>
</div>
)
}

File diff suppressed because it is too large Load Diff

View File

@@ -127,7 +127,6 @@ function OcrKombiContent() {
databases: ['PostgreSQL Sessions'],
}}
relatedPages={[
{ name: 'OCR Overlay (Legacy)', href: '/ai/ocr-overlay', description: 'Alter 3-Modi-Monolith' },
{ name: 'OCR Regression', href: '/ai/ocr-regression', description: 'Regressionstests' },
]}
defaultCollapsed

View File

@@ -1,751 +0,0 @@
'use client'
import { useCallback, useEffect, useState, useRef } from 'react'
import { useSearchParams } from 'next/navigation'
import { PagePurpose } from '@/components/common/PagePurpose'
import { PipelineStepper } from '@/components/ocr-pipeline/PipelineStepper'
import { StepOrientation } from '@/components/ocr-pipeline/StepOrientation'
import { StepDeskew } from '@/components/ocr-pipeline/StepDeskew'
import { StepDewarp } from '@/components/ocr-pipeline/StepDewarp'
import { StepCrop } from '@/components/ocr-pipeline/StepCrop'
import { StepStructureDetection } from '@/components/ocr-pipeline/StepStructureDetection'
import { StepRowDetection } from '@/components/ocr-pipeline/StepRowDetection'
import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
import { OverlayReconstruction } from '@/components/ocr-overlay/OverlayReconstruction'
import { PaddleDirectStep } from '@/components/ocr-overlay/PaddleDirectStep'
import { GridEditor } from '@/components/grid-editor/GridEditor'
import { StepGridReview } from '@/components/ocr-pipeline/StepGridReview'
import { BoxSessionTabs } from '@/components/ocr-pipeline/BoxSessionTabs'
import { OVERLAY_PIPELINE_STEPS, PADDLE_DIRECT_STEPS, KOMBI_STEPS, DOCUMENT_CATEGORIES, dbStepToOverlayUi, type PipelineStep, type SessionListItem, type DocumentCategory } from './types'
import type { SubSession } from '../ocr-pipeline/types'
const KLAUSUR_API = '/klausur-api'
export default function OcrOverlayPage() {
const [mode, setMode] = useState<'pipeline' | 'paddle-direct' | 'kombi'>('pipeline')
const [currentStep, setCurrentStep] = useState(0)
const [sessionId, setSessionId] = useState<string | null>(null)
const [sessionName, setSessionName] = useState<string>('')
const [sessions, setSessions] = useState<SessionListItem[]>([])
const [loadingSessions, setLoadingSessions] = useState(true)
const [editingName, setEditingName] = useState<string | null>(null)
const [editNameValue, setEditNameValue] = useState('')
const [editingCategory, setEditingCategory] = useState<string | null>(null)
const [activeCategory, setActiveCategory] = useState<DocumentCategory | undefined>(undefined)
const [editingActiveCategory, setEditingActiveCategory] = useState(false)
const [subSessions, setSubSessions] = useState<SubSession[]>([])
const [parentSessionId, setParentSessionId] = useState<string | null>(null)
const [isGroundTruth, setIsGroundTruth] = useState(false)
const [gtSaving, setGtSaving] = useState(false)
const [gtMessage, setGtMessage] = useState('')
const [steps, setSteps] = useState<PipelineStep[]>(
OVERLAY_PIPELINE_STEPS.map((s, i) => ({
...s,
status: i === 0 ? 'active' : 'pending',
})),
)
const searchParams = useSearchParams()
const deepLinkHandled = useRef(false)
const gridSaveRef = useRef<(() => Promise<void>) | null>(null)
useEffect(() => {
loadSessions()
}, [])
const loadSessions = async () => {
setLoadingSessions(true)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`)
if (res.ok) {
const data = await res.json()
// Filter to only show top-level sessions (no sub-sessions)
setSessions((data.sessions || []).filter((s: SessionListItem) => !s.parent_session_id))
}
} catch (e) {
console.error('Failed to load sessions:', e)
} finally {
setLoadingSessions(false)
}
}
const openSession = useCallback(async (sid: string, keepSubSessions?: boolean) => {
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`)
if (!res.ok) return
const data = await res.json()
setSessionId(sid)
setSessionName(data.name || data.filename || '')
setActiveCategory(data.document_category || undefined)
setIsGroundTruth(!!data.ground_truth?.build_grid_reference)
setGtMessage('')
// Sub-session handling
if (data.sub_sessions && data.sub_sessions.length > 0) {
setSubSessions(data.sub_sessions)
setParentSessionId(sid)
} else if (data.parent_session_id) {
setParentSessionId(data.parent_session_id)
} else if (!keepSubSessions) {
setSubSessions([])
setParentSessionId(null)
}
const isSubSession = !!data.parent_session_id
// Mode detection for root sessions with word_result
const ocrEngine = data.word_result?.ocr_engine
const isPaddleDirect = ocrEngine === 'paddle_direct'
const isKombi = ocrEngine === 'kombi' || ocrEngine === 'rapid_kombi'
let activeMode = mode // keep current mode for sub-sessions
if (!isSubSession && (isPaddleDirect || isKombi)) {
activeMode = isKombi ? 'kombi' : 'paddle-direct'
setMode(activeMode)
} else if (!isSubSession && !ocrEngine) {
// Unprocessed root session: keep the user's selected mode
activeMode = mode
}
const baseSteps = activeMode === 'kombi' ? KOMBI_STEPS
: activeMode === 'paddle-direct' ? PADDLE_DIRECT_STEPS
: OVERLAY_PIPELINE_STEPS
// Determine UI step
let uiStep: number
const skipIds: string[] = []
if (!isSubSession && (isPaddleDirect || isKombi)) {
const hasGrid = isKombi && data.grid_editor_result
const hasStructure = isKombi && data.structure_result
uiStep = hasGrid ? 6 : hasStructure ? 6 : data.word_result ? 5 : 4
if (isPaddleDirect) uiStep = data.word_result ? 4 : 4
} else {
const dbStep = data.current_step || 1
if (dbStep <= 2) uiStep = 0
else if (dbStep === 3) uiStep = 1
else if (dbStep === 4) uiStep = 2
else if (dbStep === 5) uiStep = 3
else uiStep = 4
// Sub-session skip logic
if (isSubSession) {
if (dbStep >= 5) {
skipIds.push('orientation', 'deskew', 'dewarp', 'crop')
if (uiStep < 4) uiStep = 4
} else if (dbStep >= 2) {
skipIds.push('orientation')
if (uiStep < 1) uiStep = 1 // advance past skipped orientation to deskew
}
}
}
setSteps(
baseSteps.map((s, i) => ({
...s,
status: skipIds.includes(s.id)
? 'skipped'
: i < uiStep ? 'completed' : i === uiStep ? 'active' : 'pending',
})),
)
setCurrentStep(uiStep)
} catch (e) {
console.error('Failed to open session:', e)
}
}, [mode])
// Handle deep-link: ?session=xxx&mode=kombi (from GT Queue page)
useEffect(() => {
if (deepLinkHandled.current) return
const urlSession = searchParams.get('session')
const urlMode = searchParams.get('mode')
if (urlSession) {
deepLinkHandled.current = true
if (urlMode === 'kombi' || urlMode === 'paddle-direct') {
setMode(urlMode)
const baseSteps = urlMode === 'kombi' ? KOMBI_STEPS : PADDLE_DIRECT_STEPS
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
}
openSession(urlSession)
}
}, [searchParams, openSession])
const deleteSession = useCallback(async (sid: string) => {
try {
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, { method: 'DELETE' })
setSessions((prev) => prev.filter((s) => s.id !== sid))
if (sessionId === sid) {
setSessionId(null)
setCurrentStep(0)
setSubSessions([])
setParentSessionId(null)
const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
}
} catch (e) {
console.error('Failed to delete session:', e)
}
}, [sessionId, mode])
const renameSession = useCallback(async (sid: string, newName: string) => {
try {
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ name: newName }),
})
setSessions((prev) => prev.map((s) => (s.id === sid ? { ...s, name: newName } : s)))
if (sessionId === sid) setSessionName(newName)
} catch (e) {
console.error('Failed to rename session:', e)
}
setEditingName(null)
}, [sessionId])
const updateCategory = useCallback(async (sid: string, category: DocumentCategory) => {
try {
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ document_category: category }),
})
setSessions((prev) => prev.map((s) => (s.id === sid ? { ...s, document_category: category } : s)))
if (sessionId === sid) setActiveCategory(category)
} catch (e) {
console.error('Failed to update category:', e)
}
setEditingCategory(null)
}, [sessionId])
const handleStepClick = (index: number) => {
if (index <= currentStep || steps[index].status === 'completed') {
setCurrentStep(index)
}
}
const goToStep = (step: number) => {
setCurrentStep(step)
setSteps((prev) =>
prev.map((s, i) => ({
...s,
status: i < step ? 'completed' : i === step ? 'active' : 'pending',
})),
)
}
const handleNext = () => {
if (currentStep >= steps.length - 1) {
// Sub-session completed — switch back to parent
if (parentSessionId && sessionId !== parentSessionId) {
setSubSessions((prev) =>
prev.map((s) => s.id === sessionId ? { ...s, status: 'completed', current_step: 10 } : s)
)
handleSessionChange(parentSessionId)
return
}
// Last step completed — return to session list
const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
setCurrentStep(0)
setSessionId(null)
setSubSessions([])
setParentSessionId(null)
loadSessions()
return
}
const nextStep = currentStep + 1
setSteps((prev) =>
prev.map((s, i) => {
if (i === currentStep) return { ...s, status: 'completed' }
if (i === nextStep) return { ...s, status: 'active' }
return s
}),
)
setCurrentStep(nextStep)
}
const handleOrientationComplete = async (sid: string) => {
setSessionId(sid)
loadSessions()
// Check for page-split sub-sessions directly from API
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`)
if (res.ok) {
const data = await res.json()
if (data.sub_sessions?.length > 0) {
const subs: SubSession[] = data.sub_sessions.map((s: SubSession) => ({
id: s.id,
name: s.name,
box_index: s.box_index,
current_step: s.current_step,
}))
setSubSessions(subs)
setParentSessionId(sid)
openSession(subs[0].id, true)
return
}
}
} catch (e) {
console.error('Failed to check for sub-sessions:', e)
}
handleNext()
}
const handleBoxSessionsCreated = useCallback((subs: SubSession[]) => {
setSubSessions(subs)
if (sessionId) setParentSessionId(sessionId)
}, [sessionId])
const handleSessionChange = useCallback((newSessionId: string) => {
openSession(newSessionId, true)
}, [openSession])
const handleNewSession = () => {
setSessionId(null)
setSessionName('')
setCurrentStep(0)
setSubSessions([])
setParentSessionId(null)
const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS
setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
}
const stepNames: Record<number, string> = {
1: 'Orientierung',
2: 'Begradigung',
3: 'Entzerrung',
4: 'Zuschneiden',
5: 'Zeilen',
6: 'Woerter',
7: 'Overlay',
}
const reprocessFromStep = useCallback(async (uiStep: number) => {
if (!sessionId) return
// Map overlay UI step to DB step
const dbStepMap: Record<number, number> = { 0: 2, 1: 3, 2: 4, 3: 5, 4: 7, 5: 8, 6: 9 }
const dbStep = dbStepMap[uiStep] || uiStep + 1
if (!confirm(`Ab Schritt ${uiStep + 1} (${stepNames[uiStep + 1] || '?'}) neu verarbeiten?`)) return
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reprocess`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ from_step: dbStep }),
})
if (!res.ok) {
const data = await res.json().catch(() => ({}))
console.error('Reprocess failed:', data.detail || res.status)
return
}
goToStep(uiStep)
} catch (e) {
console.error('Reprocess error:', e)
}
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [sessionId, goToStep])
const handleMarkGroundTruth = async () => {
if (!sessionId) return
setGtSaving(true)
setGtMessage('')
try {
// Auto-save grid editor before marking GT (so DB has latest edits)
if (gridSaveRef.current) {
await gridSaveRef.current()
}
const resp = await fetch(
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/mark-ground-truth?pipeline=${mode}`,
{ method: 'POST' }
)
if (!resp.ok) {
const body = await resp.text().catch(() => '')
throw new Error(`Ground Truth fehlgeschlagen (${resp.status}): ${body}`)
}
const data = await resp.json()
setIsGroundTruth(true)
setGtMessage(`Ground Truth gespeichert (${data.cells_saved} Zellen)`)
setTimeout(() => setGtMessage(''), 5000)
} catch (e) {
setGtMessage(e instanceof Error ? e.message : String(e))
} finally {
setGtSaving(false)
}
}
const isLastStep = currentStep === steps.length - 1
const showGtButton = isLastStep && sessionId != null
const renderStep = () => {
if (mode === 'paddle-direct' || mode === 'kombi') {
switch (currentStep) {
case 0:
return <StepOrientation key={sessionId} sessionId={sessionId} onNext={handleOrientationComplete} onSessionList={() => { loadSessions(); setSessionId(null) }} />
case 1:
return <StepDeskew key={sessionId} sessionId={sessionId} onNext={handleNext} />
case 2:
return <StepDewarp key={sessionId} sessionId={sessionId} onNext={handleNext} />
case 3:
return <StepCrop key={sessionId} sessionId={sessionId} onNext={handleNext} />
case 4:
if (mode === 'kombi') {
return (
<PaddleDirectStep
sessionId={sessionId}
onNext={handleNext}
endpoint="paddle-kombi"
title="Kombi-Modus"
description="PP-OCRv5 und Tesseract laufen parallel. Koordinaten werden gewichtet gemittelt fuer optimale Positionierung."
icon="🔀"
buttonLabel="PP-OCRv5 + Tesseract starten"
runningLabel="PP-OCRv5 + Tesseract laufen..."
engineKey="kombi"
/>
)
}
return <PaddleDirectStep sessionId={sessionId} onNext={handleNext} />
case 5:
return mode === 'kombi' ? (
<StepStructureDetection sessionId={sessionId} onNext={handleNext} />
) : null
case 6:
return mode === 'kombi' ? (
<StepGridReview sessionId={sessionId} onNext={handleNext} saveRef={gridSaveRef} />
) : null
default:
return null
}
}
switch (currentStep) {
case 0:
return <StepOrientation key={sessionId} sessionId={sessionId} onNext={handleOrientationComplete} onSessionList={() => { loadSessions(); setSessionId(null) }} />
case 1:
return <StepDeskew key={sessionId} sessionId={sessionId} onNext={handleNext} />
case 2:
return <StepDewarp key={sessionId} sessionId={sessionId} onNext={handleNext} />
case 3:
return <StepCrop key={sessionId} sessionId={sessionId} onNext={handleNext} />
case 4:
return <StepRowDetection sessionId={sessionId} onNext={handleNext} />
case 5:
return <StepWordRecognition sessionId={sessionId} onNext={handleNext} goToStep={goToStep} skipHealGaps />
case 6:
return <OverlayReconstruction sessionId={sessionId} onNext={handleNext} />
default:
return null
}
}
return (
<div className="space-y-6">
<PagePurpose
title="OCR Overlay"
purpose="Ganzseitige Overlay-Rekonstruktion: Scan begradigen, Zeilen und Woerter erkennen, dann pixelgenau ueber das Bild legen. Ohne Spaltenerkennung — ideal fuer Arbeitsblaetter."
audience={['Entwickler']}
architecture={{
services: ['klausur-service (FastAPI)', 'OpenCV', 'Tesseract'],
databases: ['PostgreSQL Sessions'],
}}
relatedPages={[
{ name: 'OCR Pipeline', href: '/ai/ocr-pipeline', description: 'Volle Pipeline mit Spalten' },
{ name: 'OCR Vergleich', href: '/ai/ocr-compare', description: 'Methoden-Vergleich' },
]}
defaultCollapsed
/>
{/* Session List */}
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4">
<div className="flex items-center justify-between mb-3">
<h3 className="text-sm font-medium text-gray-700 dark:text-gray-300">
Sessions ({sessions.length})
</h3>
<button
onClick={handleNewSession}
className="text-xs px-3 py-1.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors"
>
+ Neue Session
</button>
</div>
{loadingSessions ? (
<div className="text-sm text-gray-400 py-2">Lade Sessions...</div>
) : sessions.length === 0 ? (
<div className="text-sm text-gray-400 py-2">Noch keine Sessions vorhanden.</div>
) : (
<div className="space-y-1.5 max-h-[320px] overflow-y-auto">
{sessions.map((s) => {
const catInfo = DOCUMENT_CATEGORIES.find(c => c.value === s.document_category)
return (
<div
key={s.id}
className={`relative flex items-start gap-3 px-3 py-2.5 rounded-lg text-sm transition-colors cursor-pointer ${
sessionId === s.id
? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
}`}
>
{/* Thumbnail */}
<div
className="flex-shrink-0 w-12 h-12 rounded-md overflow-hidden bg-gray-100 dark:bg-gray-700"
onClick={() => openSession(s.id)}
>
{/* eslint-disable-next-line @next/next/no-img-element */}
<img
src={`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${s.id}/thumbnail?size=96`}
alt=""
className="w-full h-full object-cover"
loading="lazy"
onError={(e) => { (e.target as HTMLImageElement).style.display = 'none' }}
/>
</div>
{/* Info */}
<div className="flex-1 min-w-0" onClick={() => openSession(s.id)}>
{editingName === s.id ? (
<input
autoFocus
value={editNameValue}
onChange={(e) => setEditNameValue(e.target.value)}
onBlur={() => renameSession(s.id, editNameValue)}
onKeyDown={(e) => {
if (e.key === 'Enter') renameSession(s.id, editNameValue)
if (e.key === 'Escape') setEditingName(null)
}}
onClick={(e) => e.stopPropagation()}
className="w-full px-1 py-0.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600"
/>
) : (
<div className="truncate font-medium text-gray-700 dark:text-gray-300">
{s.name || s.filename}
</div>
)}
<button
onClick={(e) => {
e.stopPropagation()
navigator.clipboard.writeText(s.id)
const btn = e.currentTarget
btn.textContent = 'Kopiert!'
setTimeout(() => { btn.textContent = `ID: ${s.id.slice(0, 8)}` }, 1500)
}}
className="text-[10px] font-mono text-gray-400 hover:text-teal-500 transition-colors"
title={`Volle ID: ${s.id} — Klick zum Kopieren`}
>
ID: {s.id.slice(0, 8)}
</button>
<div className="text-xs text-gray-400 flex gap-2 mt-0.5">
<span>{new Date(s.created_at).toLocaleDateString('de-DE', { day: '2-digit', month: '2-digit', year: '2-digit', hour: '2-digit', minute: '2-digit' })}</span>
</div>
</div>
{/* Category Badge */}
<div className="flex flex-col gap-1 items-end flex-shrink-0" onClick={(e) => e.stopPropagation()}>
<button
onClick={() => setEditingCategory(editingCategory === s.id ? null : s.id)}
className={`text-[10px] px-1.5 py-0.5 rounded-full border transition-colors ${
catInfo
? 'bg-teal-50 dark:bg-teal-900/30 border-teal-200 dark:border-teal-700 text-teal-700 dark:text-teal-300'
: 'bg-gray-50 dark:bg-gray-700 border-gray-200 dark:border-gray-600 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300'
}`}
title="Kategorie setzen"
>
{catInfo ? `${catInfo.icon} ${catInfo.label}` : '+ Kategorie'}
</button>
</div>
{/* Actions */}
<div className="flex flex-col gap-0.5 flex-shrink-0">
<button
onClick={(e) => {
e.stopPropagation()
setEditNameValue(s.name || s.filename)
setEditingName(s.id)
}}
className="p-1 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300"
title="Umbenennen"
>
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
<path strokeLinecap="round" strokeLinejoin="round" d="M15.232 5.232l3.536 3.536m-2.036-5.036a2.5 2.5 0 113.536 3.536L6.5 21.036H3v-3.572L16.732 3.732z" />
</svg>
</button>
<button
onClick={(e) => {
e.stopPropagation()
if (confirm('Session loeschen?')) deleteSession(s.id)
}}
className="p-1 text-gray-400 hover:text-red-500"
title="Loeschen"
>
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
<path strokeLinecap="round" strokeLinejoin="round" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
</svg>
</button>
</div>
{/* Category dropdown */}
{editingCategory === s.id && (
<div
className="absolute right-0 top-full mt-1 z-20 bg-white dark:bg-gray-800 border border-gray-200 dark:border-gray-700 rounded-lg shadow-lg p-2 grid grid-cols-2 gap-1 w-64"
onClick={(e) => e.stopPropagation()}
>
{DOCUMENT_CATEGORIES.map((cat) => (
<button
key={cat.value}
onClick={() => updateCategory(s.id, cat.value)}
className={`text-xs px-2 py-1.5 rounded-md text-left transition-colors ${
s.document_category === cat.value
? 'bg-teal-100 dark:bg-teal-900/40 text-teal-700 dark:text-teal-300'
: 'hover:bg-gray-100 dark:hover:bg-gray-700 text-gray-600 dark:text-gray-400'
}`}
>
{cat.icon} {cat.label}
</button>
))}
</div>
)}
</div>
)
})}
</div>
)}
</div>
{/* Active session info + category picker */}
{sessionId && sessionName && (
<div className="relative flex items-center gap-3 text-sm text-gray-500 dark:text-gray-400">
<span>Aktive Session: <span className="font-medium text-gray-700 dark:text-gray-300">{sessionName}</span></span>
<button
onClick={() => setEditingActiveCategory(!editingActiveCategory)}
className={`text-xs px-2.5 py-1 rounded-full border transition-colors ${
activeCategory
? 'bg-teal-50 dark:bg-teal-900/30 border-teal-200 dark:border-teal-700 text-teal-700 dark:text-teal-300 hover:bg-teal-100 dark:hover:bg-teal-900/50'
: 'bg-amber-50 dark:bg-amber-900/20 border-amber-300 dark:border-amber-700 text-amber-700 dark:text-amber-300 hover:bg-amber-100 dark:hover:bg-amber-900/40 animate-pulse'
}`}
>
{activeCategory ? (() => {
const cat = DOCUMENT_CATEGORIES.find(c => c.value === activeCategory)
return cat ? `${cat.icon} ${cat.label}` : activeCategory
})() : 'Kategorie setzen'}
</button>
{isGroundTruth && (
<span className="text-xs px-2 py-0.5 rounded-full bg-amber-50 dark:bg-amber-900/20 border border-amber-300 dark:border-amber-700 text-amber-700 dark:text-amber-300">
GT
</span>
)}
{editingActiveCategory && (
<div className="absolute left-0 top-full mt-1 z-20 bg-white dark:bg-gray-800 border border-gray-200 dark:border-gray-700 rounded-lg shadow-lg p-2 grid grid-cols-2 gap-1 w-64">
{DOCUMENT_CATEGORIES.map((cat) => (
<button
key={cat.value}
onClick={() => {
updateCategory(sessionId, cat.value)
setEditingActiveCategory(false)
}}
className={`text-xs px-2 py-1.5 rounded-md text-left transition-colors ${
activeCategory === cat.value
? 'bg-teal-100 dark:bg-teal-900/40 text-teal-700 dark:text-teal-300'
: 'hover:bg-gray-100 dark:hover:bg-gray-700 text-gray-600 dark:text-gray-400'
}`}
>
{cat.icon} {cat.label}
</button>
))}
</div>
)}
</div>
)}
{/* Mode Toggle */}
<div className="flex items-center gap-1 bg-gray-100 dark:bg-gray-800 rounded-lg p-1 w-fit">
<button
onClick={() => {
if (mode === 'pipeline') return
setMode('pipeline')
setCurrentStep(0)
setSessionId(null)
setSteps(OVERLAY_PIPELINE_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
}}
className={`px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${
mode === 'pipeline'
? 'bg-white dark:bg-gray-700 text-gray-700 dark:text-gray-200 shadow-sm'
: 'text-gray-500 dark:text-gray-400 hover:text-gray-700 dark:hover:text-gray-300'
}`}
>
Pipeline (7 Schritte)
</button>
<button
onClick={() => {
if (mode === 'paddle-direct') return
setMode('paddle-direct')
setCurrentStep(0)
setSessionId(null)
setSteps(PADDLE_DIRECT_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
}}
className={`px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${
mode === 'paddle-direct'
? 'bg-white dark:bg-gray-700 text-gray-700 dark:text-gray-200 shadow-sm'
: 'text-gray-500 dark:text-gray-400 hover:text-gray-700 dark:hover:text-gray-300'
}`}
>
PP-OCRv5 Direct (5 Schritte)
</button>
<button
onClick={() => {
if (mode === 'kombi') return
setMode('kombi')
setCurrentStep(0)
setSessionId(null)
setSteps(KOMBI_STEPS.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' })))
}}
className={`px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${
mode === 'kombi'
? 'bg-white dark:bg-gray-700 text-gray-700 dark:text-gray-200 shadow-sm'
: 'text-gray-500 dark:text-gray-400 hover:text-gray-700 dark:hover:text-gray-300'
}`}
>
Kombi (7 Schritte)
</button>
</div>
<PipelineStepper
steps={steps}
currentStep={currentStep}
onStepClick={handleStepClick}
onReprocess={mode === 'pipeline' && sessionId != null ? reprocessFromStep : undefined}
/>
{subSessions.length > 0 && parentSessionId && sessionId && (
<BoxSessionTabs
parentSessionId={parentSessionId}
subSessions={subSessions}
activeSessionId={sessionId}
onSessionChange={handleSessionChange}
/>
)}
<div className="min-h-[400px]">{renderStep()}</div>
{/* Ground Truth button bar — visible on last step */}
{showGtButton && (
<div className="sticky bottom-0 bg-white dark:bg-gray-900 border-t dark:border-gray-700 py-3 px-4 -mx-1 flex items-center justify-between rounded-b-xl">
<div className="text-sm text-gray-500 dark:text-gray-400">
{gtMessage && (
<span className={gtMessage.includes('fehlgeschlagen') ? 'text-red-500' : 'text-amber-600 dark:text-amber-400'}>
{gtMessage}
</span>
)}
</div>
<button
onClick={handleMarkGroundTruth}
disabled={gtSaving}
className="px-4 py-2 text-sm bg-amber-600 text-white rounded hover:bg-amber-700 disabled:opacity-50"
>
{gtSaving ? 'Speichere...' : isGroundTruth ? 'Ground Truth aktualisieren' : 'Als Ground Truth markieren'}
</button>
</div>
)}
</div>
)
}

View File

@@ -1,87 +0,0 @@
import type { PipelineStep } from '../ocr-pipeline/types'
// Re-export types used by overlay components
export type {
PipelineStep,
PipelineStepStatus,
SessionListItem,
SessionInfo,
DocumentCategory,
DocumentTypeResult,
OrientationResult,
CropResult,
DeskewResult,
DewarpResult,
RowResult,
RowItem,
GridResult,
GridCell,
OcrWordBox,
WordBbox,
ColumnMeta,
} from '../ocr-pipeline/types'
export { DOCUMENT_CATEGORIES } from '../ocr-pipeline/types'
/**
* 7-step pipeline for full-page overlay reconstruction.
* Skips: Spalten (columns), LLM-Review (Korrektur), Ground-Truth (Validierung)
*/
export const OVERLAY_PIPELINE_STEPS: PipelineStep[] = [
{ id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' },
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
{ id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
{ id: 'rows', name: 'Zeilen', icon: '📏', status: 'pending' },
{ id: 'words', name: 'Woerter', icon: '🔤', status: 'pending' },
{ id: 'reconstruction', name: 'Overlay', icon: '🏗️', status: 'pending' },
]
/** Map from overlay UI step index to DB step number (1-indexed) */
export const OVERLAY_UI_TO_DB: Record<number, number> = {
0: 2, // orientation
1: 3, // deskew
2: 4, // dewarp
3: 5, // crop
4: 6, // rows (skip columns=6 in DB, rows=7 — but we reuse DB step numbering)
5: 7, // words
6: 9, // reconstruction
}
/**
* 5-step pipeline for Paddle Direct mode.
* Same preprocessing (orient/deskew/dewarp/crop), then PaddleOCR replaces rows+words+overlay.
*/
export const PADDLE_DIRECT_STEPS: PipelineStep[] = [
{ id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' },
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
{ id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
{ id: 'paddle-direct', name: 'PP-OCRv5 + Overlay', icon: '⚡', status: 'pending' },
]
/**
* 5-step pipeline for Kombi mode (PP-OCRv5 + Tesseract).
* Same preprocessing, then both engines run and results are merged.
*/
export const KOMBI_STEPS: PipelineStep[] = [
{ id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' },
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
{ id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
{ id: 'kombi', name: 'PP-OCRv5 + Tesseract', icon: '🔀', status: 'pending' },
{ id: 'structure', name: 'Struktur', icon: '🔍', status: 'pending' },
{ id: 'grid-editor', name: 'Review & GT', icon: '📊', status: 'pending' },
]
/** Map from DB step to overlay UI step index */
export function dbStepToOverlayUi(dbStep: number): number {
// DB: 1=start, 2=orient, 3=deskew, 4=dewarp, 5=crop, 6=columns, 7=rows, 8=words, 9=recon, 10=gt
if (dbStep <= 2) return 0 // orientation
if (dbStep === 3) return 1 // deskew
if (dbStep === 4) return 2 // dewarp
if (dbStep === 5) return 3 // crop
if (dbStep <= 7) return 4 // rows (skip columns)
if (dbStep === 8) return 5 // words
return 6 // reconstruction
}

View File

@@ -1,443 +0,0 @@
'use client'
import { Suspense, useCallback, useEffect, useState } from 'react'
import { PagePurpose } from '@/components/common/PagePurpose'
import { PipelineStepper } from '@/components/ocr-pipeline/PipelineStepper'
import { StepOrientation } from '@/components/ocr-pipeline/StepOrientation'
import { StepCrop } from '@/components/ocr-pipeline/StepCrop'
import { StepDeskew } from '@/components/ocr-pipeline/StepDeskew'
import { StepDewarp } from '@/components/ocr-pipeline/StepDewarp'
import { StepStructureDetection } from '@/components/ocr-pipeline/StepStructureDetection'
import { StepColumnDetection } from '@/components/ocr-pipeline/StepColumnDetection'
import { StepRowDetection } from '@/components/ocr-pipeline/StepRowDetection'
import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition'
import { StepLlmReview } from '@/components/ocr-pipeline/StepLlmReview'
import { StepReconstruction } from '@/components/ocr-pipeline/StepReconstruction'
import { StepGroundTruth } from '@/components/ocr-pipeline/StepGroundTruth'
import { DOCUMENT_CATEGORIES, type SessionListItem, type DocumentTypeResult, type DocumentCategory, type SubSession } from './types'
import { usePipelineNavigation } from './usePipelineNavigation'
const KLAUSUR_API = '/klausur-api'
const STEP_NAMES: Record<number, string> = {
1: 'Orientierung', 2: 'Begradigung', 3: 'Entzerrung', 4: 'Zuschneiden',
5: 'Spalten', 6: 'Zeilen', 7: 'Woerter', 8: 'Struktur',
9: 'Korrektur', 10: 'Rekonstruktion', 11: 'Validierung',
}
function OcrPipelineContent() {
const nav = usePipelineNavigation()
const [sessions, setSessions] = useState<SessionListItem[]>([])
const [loadingSessions, setLoadingSessions] = useState(true)
const [editingName, setEditingName] = useState<string | null>(null)
const [editNameValue, setEditNameValue] = useState('')
const [editingCategory, setEditingCategory] = useState<string | null>(null)
const [sessionName, setSessionName] = useState('')
const [activeCategory, setActiveCategory] = useState<DocumentCategory | undefined>(undefined)
const loadSessions = useCallback(async () => {
setLoadingSessions(true)
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`)
if (res.ok) {
const data = await res.json()
setSessions(data.sessions || [])
}
} catch (e) {
console.error('Failed to load sessions:', e)
} finally {
setLoadingSessions(false)
}
}, [])
useEffect(() => { loadSessions() }, [loadSessions])
// Sync session name when nav.sessionId changes
useEffect(() => {
if (!nav.sessionId) {
setSessionName('')
setActiveCategory(undefined)
return
}
const load = async () => {
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${nav.sessionId}`)
if (!res.ok) return
const data = await res.json()
setSessionName(data.name || data.filename || '')
setActiveCategory(data.document_category || undefined)
} catch { /* ignore */ }
}
load()
}, [nav.sessionId])
const openSession = useCallback((sid: string) => {
nav.goToSession(sid)
}, [nav])
const deleteSession = useCallback(async (sid: string) => {
try {
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, { method: 'DELETE' })
setSessions(prev => prev.filter(s => s.id !== sid))
if (nav.sessionId === sid) nav.goToSessionList()
} catch (e) {
console.error('Failed to delete session:', e)
}
}, [nav])
const renameSession = useCallback(async (sid: string, newName: string) => {
try {
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ name: newName }),
})
setSessions(prev => prev.map(s => (s.id === sid ? { ...s, name: newName } : s)))
if (nav.sessionId === sid) setSessionName(newName)
} catch (e) {
console.error('Failed to rename session:', e)
}
setEditingName(null)
}, [nav.sessionId])
const updateCategory = useCallback(async (sid: string, category: DocumentCategory) => {
try {
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ document_category: category }),
})
setSessions(prev => prev.map(s => (s.id === sid ? { ...s, document_category: category } : s)))
if (nav.sessionId === sid) setActiveCategory(category)
} catch (e) {
console.error('Failed to update category:', e)
}
setEditingCategory(null)
}, [nav.sessionId])
const deleteAllSessions = useCallback(async () => {
if (!confirm('Alle Sessions loeschen? Dies kann nicht rueckgaengig gemacht werden.')) return
try {
await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`, { method: 'DELETE' })
setSessions([])
nav.goToSessionList()
} catch (e) {
console.error('Failed to delete all sessions:', e)
}
}, [nav])
const handleStepClick = (index: number) => {
if (index <= nav.currentStepIndex || nav.steps[index].status === 'completed') {
nav.goToStep(index)
}
}
// Orientation: after upload, navigate to session at deskew step
const handleOrientationComplete = useCallback(async (sid: string) => {
loadSessions()
// Navigate directly to deskew step (index 1) for this session
nav.goToSession(sid)
}, [nav, loadSessions])
// Crop: detect doc type then advance
const handleCropNext = useCallback(async () => {
if (nav.sessionId) {
try {
const res = await fetch(
`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${nav.sessionId}/detect-type`,
{ method: 'POST' },
)
if (res.ok) {
const data: DocumentTypeResult = await res.json()
nav.setDocType(data)
}
} catch (e) {
console.error('Doc type detection failed:', e)
}
}
nav.goToNextStep()
}, [nav])
const handleDocTypeChange = (newDocType: DocumentTypeResult['doc_type']) => {
if (!nav.docTypeResult) return
let skipSteps: string[] = []
if (newDocType === 'full_text') skipSteps = ['columns', 'rows']
nav.setDocType({
...nav.docTypeResult,
doc_type: newDocType,
skip_steps: skipSteps,
pipeline: newDocType === 'full_text' ? 'full_page' : 'cell_first',
})
}
// Box sub-sessions (column detection) — still supported
const handleBoxSessionsCreated = useCallback((_subs: SubSession[]) => {
// Box sub-sessions are tracked by the backend; no client-side state needed anymore
}, [])
const renderStep = () => {
const sid = nav.sessionId
switch (nav.currentStepIndex) {
case 0:
return (
<StepOrientation
key={sid}
sessionId={sid}
onNext={handleOrientationComplete}
onSessionList={() => { loadSessions(); nav.goToSessionList() }}
/>
)
case 1:
return <StepDeskew key={sid} sessionId={sid} onNext={nav.goToNextStep} />
case 2:
return <StepDewarp key={sid} sessionId={sid} onNext={nav.goToNextStep} />
case 3:
return <StepCrop key={sid} sessionId={sid} onNext={handleCropNext} />
case 4:
return <StepColumnDetection sessionId={sid} onNext={nav.goToNextStep} onBoxSessionsCreated={handleBoxSessionsCreated} />
case 5:
return <StepRowDetection sessionId={sid} onNext={nav.goToNextStep} />
case 6:
return <StepWordRecognition sessionId={sid} onNext={nav.goToNextStep} goToStep={nav.goToStep} />
case 7:
return <StepStructureDetection sessionId={sid} onNext={nav.goToNextStep} />
case 8:
return <StepLlmReview sessionId={sid} onNext={nav.goToNextStep} />
case 9:
return <StepReconstruction sessionId={sid} onNext={nav.goToNextStep} />
case 10:
return <StepGroundTruth sessionId={sid} onNext={nav.goToNextStep} />
default:
return null
}
}
return (
<div className="space-y-6">
<PagePurpose
title="OCR Pipeline"
purpose="Schrittweise Seitenrekonstruktion: Scan begradigen, Spalten erkennen, Woerter lokalisieren und die Seite Wort fuer Wort nachbauen. Ziel: 10 Vokabelseiten fehlerfrei rekonstruieren."
audience={['Entwickler', 'Data Scientists']}
architecture={{
services: ['klausur-service (FastAPI)', 'OpenCV', 'Tesseract'],
databases: ['PostgreSQL Sessions'],
}}
relatedPages={[
{ name: 'OCR Vergleich', href: '/ai/ocr-compare', description: 'Methoden-Vergleich' },
{ name: 'OCR-Labeling', href: '/ai/ocr-labeling', description: 'Trainingsdaten' },
]}
defaultCollapsed
/>
{/* Session List */}
<div className="bg-white dark:bg-gray-800 rounded-xl border border-gray-200 dark:border-gray-700 p-4">
<div className="flex items-center justify-between mb-3">
<h3 className="text-sm font-medium text-gray-700 dark:text-gray-300">
Sessions ({sessions.length})
</h3>
<div className="flex gap-2">
{sessions.length > 0 && (
<button
onClick={deleteAllSessions}
className="text-xs px-3 py-1.5 text-red-600 hover:bg-red-50 dark:hover:bg-red-900/20 rounded-lg transition-colors"
title="Alle Sessions loeschen"
>
Alle loeschen
</button>
)}
<button
onClick={() => nav.goToSessionList()}
className="text-xs px-3 py-1.5 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors"
>
+ Neue Session
</button>
</div>
</div>
{loadingSessions ? (
<div className="text-sm text-gray-400 py-2">Lade Sessions...</div>
) : sessions.length === 0 ? (
<div className="text-sm text-gray-400 py-2">Noch keine Sessions vorhanden.</div>
) : (
<div className="space-y-1.5 max-h-[320px] overflow-y-auto">
{sessions.map((s) => {
const catInfo = DOCUMENT_CATEGORIES.find(c => c.value === s.document_category)
return (
<div
key={s.id}
className={`relative flex items-start gap-3 px-3 py-2.5 rounded-lg text-sm transition-colors cursor-pointer ${
nav.sessionId === s.id
? 'bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700'
: 'hover:bg-gray-50 dark:hover:bg-gray-700/50'
}`}
>
{/* Thumbnail */}
<div
className="flex-shrink-0 w-12 h-12 rounded-md overflow-hidden bg-gray-100 dark:bg-gray-700"
onClick={() => openSession(s.id)}
>
{/* eslint-disable-next-line @next/next/no-img-element */}
<img
src={`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${s.id}/thumbnail?size=96`}
alt=""
className="w-full h-full object-cover"
loading="lazy"
onError={(e) => { (e.target as HTMLImageElement).style.display = 'none' }}
/>
</div>
{/* Info */}
<div className="flex-1 min-w-0" onClick={() => openSession(s.id)}>
{editingName === s.id ? (
<input
autoFocus
value={editNameValue}
onChange={(e) => setEditNameValue(e.target.value)}
onBlur={() => renameSession(s.id, editNameValue)}
onKeyDown={(e) => {
if (e.key === 'Enter') renameSession(s.id, editNameValue)
if (e.key === 'Escape') setEditingName(null)
}}
onClick={(e) => e.stopPropagation()}
className="w-full px-1 py-0.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600"
/>
) : (
<div className="truncate font-medium text-gray-700 dark:text-gray-300">
{s.name || s.filename}
</div>
)}
{/* ID row */}
<button
onClick={(e) => {
e.stopPropagation()
navigator.clipboard.writeText(s.id)
const btn = e.currentTarget
btn.textContent = 'Kopiert!'
setTimeout(() => { btn.textContent = `ID: ${s.id.slice(0, 8)}` }, 1500)
}}
className="text-[10px] font-mono text-gray-400 hover:text-teal-500 transition-colors"
title={`Volle ID: ${s.id} — Klick zum Kopieren`}
>
ID: {s.id.slice(0, 8)}
</button>
<div className="text-xs text-gray-400 flex gap-2 mt-0.5">
<span>{new Date(s.created_at).toLocaleDateString('de-DE', { day: '2-digit', month: '2-digit', year: '2-digit', hour: '2-digit', minute: '2-digit' })}</span>
<span>Schritt {s.current_step}: {STEP_NAMES[s.current_step] || '?'}</span>
</div>
</div>
{/* Badges */}
<div className="flex flex-col gap-1 items-end flex-shrink-0" onClick={(e) => e.stopPropagation()}>
<button
onClick={() => setEditingCategory(editingCategory === s.id ? null : s.id)}
className={`text-[10px] px-1.5 py-0.5 rounded-full border transition-colors ${
catInfo
? 'bg-teal-50 dark:bg-teal-900/30 border-teal-200 dark:border-teal-700 text-teal-700 dark:text-teal-300'
: 'bg-gray-50 dark:bg-gray-700 border-gray-200 dark:border-gray-600 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300'
}`}
title="Kategorie setzen"
>
{catInfo ? `${catInfo.icon} ${catInfo.label}` : '+ Kategorie'}
</button>
{s.doc_type && (
<span className="text-[10px] px-1.5 py-0.5 rounded-full bg-gray-100 dark:bg-gray-700 text-gray-500 dark:text-gray-400 border border-gray-200 dark:border-gray-600">
{s.doc_type}
</span>
)}
</div>
{/* Action buttons */}
<div className="flex flex-col gap-0.5 flex-shrink-0">
<button
onClick={(e) => {
e.stopPropagation()
setEditNameValue(s.name || s.filename)
setEditingName(s.id)
}}
className="p-1 text-gray-400 hover:text-gray-600 dark:hover:text-gray-300"
title="Umbenennen"
>
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
<path strokeLinecap="round" strokeLinejoin="round" d="M15.232 5.232l3.536 3.536m-2.036-5.036a2.5 2.5 0 113.536 3.536L6.5 21.036H3v-3.572L16.732 3.732z" />
</svg>
</button>
<button
onClick={(e) => {
e.stopPropagation()
if (confirm('Session loeschen?')) deleteSession(s.id)
}}
className="p-1 text-gray-400 hover:text-red-500"
title="Loeschen"
>
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
<path strokeLinecap="round" strokeLinejoin="round" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16" />
</svg>
</button>
</div>
{/* Category dropdown */}
{editingCategory === s.id && (
<div
className="absolute right-0 top-full mt-1 z-20 bg-white dark:bg-gray-800 border border-gray-200 dark:border-gray-700 rounded-lg shadow-lg p-2 grid grid-cols-2 gap-1 w-64"
onClick={(e) => e.stopPropagation()}
>
{DOCUMENT_CATEGORIES.map((cat) => (
<button
key={cat.value}
onClick={() => updateCategory(s.id, cat.value)}
className={`text-xs px-2 py-1.5 rounded-md text-left transition-colors ${
s.document_category === cat.value
? 'bg-teal-100 dark:bg-teal-900/40 text-teal-700 dark:text-teal-300'
: 'hover:bg-gray-100 dark:hover:bg-gray-700 text-gray-600 dark:text-gray-400'
}`}
>
{cat.icon} {cat.label}
</button>
))}
</div>
)}
</div>
)
})}
</div>
)}
</div>
{/* Active session info */}
{nav.sessionId && sessionName && (
<div className="flex items-center gap-3 text-sm text-gray-500 dark:text-gray-400">
<span>Aktive Session: <span className="font-medium text-gray-700 dark:text-gray-300">{sessionName}</span></span>
{activeCategory && (() => {
const cat = DOCUMENT_CATEGORIES.find(c => c.value === activeCategory)
return cat ? <span className="text-xs px-2 py-0.5 rounded-full bg-teal-50 dark:bg-teal-900/30 border border-teal-200 dark:border-teal-700 text-teal-700 dark:text-teal-300">{cat.icon} {cat.label}</span> : null
})()}
{nav.docTypeResult && (
<span className="text-xs px-2 py-0.5 rounded-full bg-gray-100 dark:bg-gray-700 text-gray-500 dark:text-gray-400 border border-gray-200 dark:border-gray-600">
{nav.docTypeResult.doc_type}
</span>
)}
</div>
)}
<PipelineStepper
steps={nav.steps}
currentStep={nav.currentStepIndex}
onStepClick={handleStepClick}
onReprocess={nav.sessionId ? nav.reprocessFromStep : undefined}
docTypeResult={nav.docTypeResult}
onDocTypeChange={handleDocTypeChange}
/>
<div className="min-h-[400px]">{renderStep()}</div>
</div>
)
}
export default function OcrPipelinePage() {
return (
<Suspense fallback={<div className="p-8 text-gray-400">Lade Pipeline...</div>}>
<OcrPipelineContent />
</Suspense>
)
}

View File

@@ -1,430 +0,0 @@
export type PipelineStepStatus = 'pending' | 'active' | 'completed' | 'failed' | 'skipped'
export interface PipelineStep {
id: string
name: string
icon: string
status: PipelineStepStatus
}
export type DocumentCategory =
| 'vokabelseite' | 'woerterbuch' | 'buchseite' | 'arbeitsblatt' | 'klausurseite'
| 'mathearbeit' | 'statistik' | 'zeitung' | 'formular' | 'handschrift' | 'sonstiges'
export const DOCUMENT_CATEGORIES: { value: DocumentCategory; label: string; icon: string }[] = [
{ value: 'vokabelseite', label: 'Vokabelseite', icon: '📖' },
{ value: 'woerterbuch', label: 'Woerterbuch', icon: '📕' },
{ value: 'buchseite', label: 'Buchseite', icon: '📚' },
{ value: 'arbeitsblatt', label: 'Arbeitsblatt', icon: '📝' },
{ value: 'klausurseite', label: 'Klausurseite', icon: '📄' },
{ value: 'mathearbeit', label: 'Mathearbeit', icon: '🔢' },
{ value: 'statistik', label: 'Statistik', icon: '📊' },
{ value: 'zeitung', label: 'Zeitung', icon: '📰' },
{ value: 'formular', label: 'Formular', icon: '📋' },
{ value: 'handschrift', label: 'Handschrift', icon: '✍️' },
{ value: 'sonstiges', label: 'Sonstiges', icon: '📎' },
]
export interface SessionListItem {
id: string
name: string
filename: string
status: string
current_step: number
document_category?: DocumentCategory
doc_type?: string
parent_session_id?: string
document_group_id?: string
page_number?: number
is_ground_truth?: boolean
created_at: string
updated_at?: string
}
/** Box sub-session (from column detection zone_type='box') */
export interface SubSession {
id: string
name: string
box_index: number
current_step?: number
status?: string
}
export interface PipelineLogEntry {
step: string
completed_at: string
success: boolean
duration_ms?: number
metrics: Record<string, unknown>
}
export interface PipelineLog {
steps: PipelineLogEntry[]
}
export interface DocumentTypeResult {
doc_type: 'vocab_table' | 'full_text' | 'generic_table'
confidence: number
pipeline: 'cell_first' | 'full_page'
skip_steps: string[]
features?: Record<string, unknown>
duration_seconds?: number
}
export interface OrientationResult {
orientation_degrees: number
corrected: boolean
duration_seconds: number
}
export interface CropResult {
crop_applied: boolean
crop_rect?: { x: number; y: number; width: number; height: number }
crop_rect_pct?: { x: number; y: number; width: number; height: number }
original_size: { width: number; height: number }
cropped_size: { width: number; height: number }
detected_format?: string
format_confidence?: number
aspect_ratio?: number
border_fractions?: { top: number; bottom: number; left: number; right: number }
skipped?: boolean
duration_seconds?: number
}
export interface SessionInfo {
session_id: string
filename: string
name?: string
image_width: number
image_height: number
original_image_url: string
current_step?: number
document_category?: DocumentCategory
doc_type?: string
orientation_result?: OrientationResult
crop_result?: CropResult
deskew_result?: DeskewResult
dewarp_result?: DewarpResult
column_result?: ColumnResult
row_result?: RowResult
word_result?: GridResult
doc_type_result?: DocumentTypeResult
sub_sessions?: SubSession[]
parent_session_id?: string
box_index?: number
document_group_id?: string
page_number?: number
}
export interface DeskewResult {
session_id: string
angle_hough: number
angle_word_alignment: number
angle_iterative?: number
angle_residual?: number
angle_textline?: number
angle_applied: number
method_used: 'hough' | 'word_alignment' | 'manual' | 'iterative' | 'two_pass' | 'three_pass' | 'manual_combined'
confidence: number
duration_seconds: number
deskewed_image_url: string
binarized_image_url: string
}
export interface DeskewGroundTruth {
is_correct: boolean
corrected_angle?: number
notes?: string
}
export interface DewarpDetection {
method: string
shear_degrees: number
confidence: number
}
export interface DewarpResult {
session_id: string
method_used: string
shear_degrees: number
confidence: number
duration_seconds: number
dewarped_image_url: string
detections?: DewarpDetection[]
}
export interface DewarpGroundTruth {
is_correct: boolean
corrected_shear?: number
notes?: string
}
export interface PageRegion {
type: 'column_en' | 'column_de' | 'column_example' | 'page_ref'
| 'column_marker' | 'column_text' | 'column_ignore' | 'header' | 'footer'
x: number
y: number
width: number
height: number
classification_confidence?: number
classification_method?: string
}
export interface PageZone {
zone_type: 'content' | 'box'
y_start: number
y_end: number
box?: { x: number; y: number; width: number; height: number }
}
export interface ColumnResult {
columns: PageRegion[]
duration_seconds: number
zones?: PageZone[]
}
export interface ColumnGroundTruth {
is_correct: boolean
corrected_columns?: PageRegion[]
notes?: string
}
export interface ManualColumnDivider {
xPercent: number // Position in % of image width (0-100)
}
export type ColumnTypeKey = PageRegion['type']
export interface RowResult {
rows: RowItem[]
summary: Record<string, number>
total_rows: number
duration_seconds: number
}
export interface RowItem {
index: number
x: number
y: number
width: number
height: number
word_count: number
row_type: 'content' | 'header' | 'footer'
gap_before: number
}
export interface RowGroundTruth {
is_correct: boolean
corrected_rows?: RowItem[]
notes?: string
}
export interface StructureGraphic {
x: number
y: number
w: number
h: number
area: number
shape: string // image, illustration
color_name: string
color_hex: string
confidence: number
}
export interface ExcludeRegion {
x: number
y: number
w: number
h: number
label?: string
}
export interface DocLayoutRegion {
x: number
y: number
w: number
h: number
class_name: string
confidence: number
}
export interface StructureResult {
image_width: number
image_height: number
content_bounds: { x: number; y: number; w: number; h: number }
boxes: StructureBox[]
zones: StructureZone[]
graphics: StructureGraphic[]
exclude_regions?: ExcludeRegion[]
color_pixel_counts: Record<string, number>
has_words: boolean
word_count: number
border_ghosts_removed?: number
duration_seconds: number
/** PP-DocLayout regions (only present when method=ppdoclayout) */
layout_regions?: DocLayoutRegion[]
detection_method?: 'opencv' | 'ppdoclayout'
}
export interface StructureBox {
x: number
y: number
w: number
h: number
confidence: number
border_thickness: number
bg_color_name?: string
bg_color_hex?: string
}
export interface StructureZone {
index: number
zone_type: 'content' | 'box'
x: number
y: number
w: number
h: number
}
export interface WordBbox {
x: number
y: number
w: number
h: number
}
export interface OcrWordBox {
text: string
left: number // absolute image x in px
top: number // absolute image y in px
width: number // px
height: number // px
conf: number
color?: string // hex color of detected text, e.g. '#dc2626'
color_name?: string // 'black' | 'red' | 'blue' | 'green' | 'orange' | 'purple' | 'yellow'
recovered?: boolean // true if this word was recovered via color detection
}
export interface GridCell {
cell_id: string // "R03_C1"
row_index: number
col_index: number
col_type: string
text: string
confidence: number
bbox_px: WordBbox
bbox_pct: WordBbox
ocr_engine?: string
is_bold?: boolean
status?: 'pending' | 'confirmed' | 'edited' | 'skipped'
word_boxes?: OcrWordBox[] // per-word bounding boxes from OCR engine
}
export interface ColumnMeta {
index: number
type: string
x: number
width: number
}
export interface GridResult {
cells: GridCell[]
grid_shape: { rows: number; cols: number; total_cells: number }
columns_used: ColumnMeta[]
layout: 'vocab' | 'generic'
image_width: number
image_height: number
duration_seconds: number
ocr_engine?: string
vocab_entries?: WordEntry[] // Only when layout='vocab'
entries?: WordEntry[] // Backwards compat alias for vocab_entries
entry_count?: number
summary: {
total_cells: number
non_empty_cells: number
low_confidence: number
// Only when layout='vocab':
total_entries?: number
with_english?: number
with_german?: number
}
llm_review?: {
changes: { row_index: number; field: string; old: string; new: string }[]
model_used: string
duration_ms: number
entries_corrected: number
applied_count?: number
applied_at?: string
}
}
export interface WordEntry {
row_index: number
english: string
german: string
example: string
source_page?: string
marker?: string
confidence: number
bbox: WordBbox
bbox_en: WordBbox | null
bbox_de: WordBbox | null
bbox_ex: WordBbox | null
bbox_ref?: WordBbox | null
bbox_marker?: WordBbox | null
status?: 'pending' | 'confirmed' | 'edited' | 'skipped'
}
/** @deprecated Use GridResult instead */
export interface WordResult {
entries: WordEntry[]
entry_count: number
image_width: number
image_height: number
duration_seconds: number
ocr_engine?: string
summary: {
total_entries: number
with_english: number
with_german: number
low_confidence: number
}
}
export interface WordGroundTruth {
is_correct: boolean
corrected_entries?: WordEntry[]
notes?: string
}
export interface ImageRegion {
bbox_pct: { x: number; y: number; w: number; h: number }
prompt: string
description: string
image_b64: string | null
style: 'educational' | 'cartoon' | 'sketch' | 'clipart' | 'realistic'
}
export type ImageStyle = ImageRegion['style']
export const IMAGE_STYLES: { value: ImageStyle; label: string }[] = [
{ value: 'educational', label: 'Lehrbuch' },
{ value: 'cartoon', label: 'Cartoon' },
{ value: 'sketch', label: 'Skizze' },
{ value: 'clipart', label: 'Clipart' },
{ value: 'realistic', label: 'Realistisch' },
]
export const PIPELINE_STEPS: PipelineStep[] = [
{ id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' },
{ id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' },
{ id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' },
{ id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' },
{ id: 'columns', name: 'Spalten', icon: '📊', status: 'pending' },
{ id: 'rows', name: 'Zeilen', icon: '📏', status: 'pending' },
{ id: 'words', name: 'Woerter', icon: '🔤', status: 'pending' },
{ id: 'structure', name: 'Struktur', icon: '🔍', status: 'pending' },
{ id: 'llm-review', name: 'Korrektur', icon: '✏️', status: 'pending' },
{ id: 'reconstruction', name: 'Rekonstruktion', icon: '🏗️', status: 'pending' },
{ id: 'ground-truth', name: 'Validierung', icon: '✅', status: 'pending' },
]

View File

@@ -1,225 +0,0 @@
'use client'
import { useCallback, useEffect, useState } from 'react'
import { useRouter, useSearchParams } from 'next/navigation'
import { PIPELINE_STEPS, type PipelineStep, type PipelineStepStatus, type DocumentTypeResult } from './types'
const KLAUSUR_API = '/klausur-api'
export interface PipelineNav {
sessionId: string | null
currentStepIndex: number
currentStepId: string
steps: PipelineStep[]
docTypeResult: DocumentTypeResult | null
goToNextStep: () => void
goToStep: (index: number) => void
goToSession: (sessionId: string) => void
goToSessionList: () => void
setDocType: (result: DocumentTypeResult) => void
reprocessFromStep: (uiStep: number) => Promise<void>
}
const STEP_NAMES: Record<number, string> = {
1: 'Orientierung', 2: 'Begradigung', 3: 'Entzerrung', 4: 'Zuschneiden',
5: 'Spalten', 6: 'Zeilen', 7: 'Woerter', 8: 'Struktur',
9: 'Korrektur', 10: 'Rekonstruktion', 11: 'Validierung',
}
function buildSteps(uiStep: number, skipSteps: string[]): PipelineStep[] {
return PIPELINE_STEPS.map((s, i) => ({
...s,
status: (
skipSteps.includes(s.id) ? 'skipped'
: i < uiStep ? 'completed'
: i === uiStep ? 'active'
: 'pending'
) as PipelineStepStatus,
}))
}
export function usePipelineNavigation(): PipelineNav {
const router = useRouter()
const searchParams = useSearchParams()
const paramSession = searchParams.get('session')
const paramStep = searchParams.get('step')
const [sessionId, setSessionId] = useState<string | null>(paramSession)
const [currentStepIndex, setCurrentStepIndex] = useState(0)
const [docTypeResult, setDocTypeResult] = useState<DocumentTypeResult | null>(null)
const [steps, setSteps] = useState<PipelineStep[]>(buildSteps(0, []))
const [loaded, setLoaded] = useState(false)
// Load session info when session param changes
useEffect(() => {
if (!paramSession) {
setSessionId(null)
setCurrentStepIndex(0)
setDocTypeResult(null)
setSteps(buildSteps(0, []))
setLoaded(true)
return
}
const load = async () => {
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${paramSession}`)
if (!res.ok) return
const data = await res.json()
setSessionId(paramSession)
const savedDocType: DocumentTypeResult | null = data.doc_type_result || null
setDocTypeResult(savedDocType)
const dbStep = data.current_step || 1
let uiStep = Math.max(0, dbStep - 1)
const skipSteps = [...(savedDocType?.skip_steps || [])]
// Box sub-sessions (from column detection) skip pre-processing
const isBoxSubSession = !!data.parent_session_id
if (isBoxSubSession && dbStep >= 5) {
const SUB_SESSION_SKIP = ['orientation', 'deskew', 'dewarp', 'crop']
for (const s of SUB_SESSION_SKIP) {
if (!skipSteps.includes(s)) skipSteps.push(s)
}
if (uiStep < 4) uiStep = 4
}
// If URL has a step param, use that instead
if (paramStep) {
const stepIdx = PIPELINE_STEPS.findIndex(s => s.id === paramStep)
if (stepIdx >= 0) uiStep = stepIdx
}
setCurrentStepIndex(uiStep)
setSteps(buildSteps(uiStep, skipSteps))
} catch (e) {
console.error('Failed to load session:', e)
} finally {
setLoaded(true)
}
}
load()
}, [paramSession, paramStep])
const updateUrl = useCallback((sid: string | null, stepIdx?: number) => {
if (!sid) {
router.push('/ai/ocr-pipeline')
return
}
const stepId = stepIdx !== undefined ? PIPELINE_STEPS[stepIdx]?.id : undefined
const params = new URLSearchParams()
params.set('session', sid)
if (stepId) params.set('step', stepId)
router.push(`/ai/ocr-pipeline?${params.toString()}`)
}, [router])
const goToNextStep = useCallback(() => {
if (currentStepIndex >= steps.length - 1) {
// Last step — return to session list
setSessionId(null)
setCurrentStepIndex(0)
setDocTypeResult(null)
setSteps(buildSteps(0, []))
router.push('/ai/ocr-pipeline')
return
}
const skipSteps = docTypeResult?.skip_steps || []
let nextStep = currentStepIndex + 1
while (nextStep < steps.length && skipSteps.includes(PIPELINE_STEPS[nextStep]?.id)) {
nextStep++
}
if (nextStep >= steps.length) nextStep = steps.length - 1
setSteps(prev =>
prev.map((s, i) => {
if (i === currentStepIndex) return { ...s, status: 'completed' as PipelineStepStatus }
if (i === nextStep) return { ...s, status: 'active' as PipelineStepStatus }
if (i > currentStepIndex && i < nextStep && skipSteps.includes(PIPELINE_STEPS[i]?.id)) {
return { ...s, status: 'skipped' as PipelineStepStatus }
}
return s
}),
)
setCurrentStepIndex(nextStep)
if (sessionId) updateUrl(sessionId, nextStep)
}, [currentStepIndex, steps.length, docTypeResult, sessionId, updateUrl, router])
const goToStep = useCallback((index: number) => {
setCurrentStepIndex(index)
setSteps(prev =>
prev.map((s, i) => ({
...s,
status: s.status === 'skipped' ? 'skipped'
: i < index ? 'completed'
: i === index ? 'active'
: 'pending' as PipelineStepStatus,
})),
)
if (sessionId) updateUrl(sessionId, index)
}, [sessionId, updateUrl])
const goToSession = useCallback((sid: string) => {
updateUrl(sid)
}, [updateUrl])
const goToSessionList = useCallback(() => {
setSessionId(null)
setCurrentStepIndex(0)
setDocTypeResult(null)
setSteps(buildSteps(0, []))
router.push('/ai/ocr-pipeline')
}, [router])
const setDocType = useCallback((result: DocumentTypeResult) => {
setDocTypeResult(result)
const skipSteps = result.skip_steps || []
if (skipSteps.length > 0) {
setSteps(prev =>
prev.map(s =>
skipSteps.includes(s.id) ? { ...s, status: 'skipped' as PipelineStepStatus } : s,
),
)
}
}, [])
const reprocessFromStep = useCallback(async (uiStep: number) => {
if (!sessionId) return
const dbStep = uiStep + 1
if (!confirm(`Ab Schritt ${dbStep} (${STEP_NAMES[dbStep] || '?'}) neu verarbeiten? Nachfolgende Daten werden geloescht.`)) return
try {
const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reprocess`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ from_step: dbStep }),
})
if (!res.ok) {
const data = await res.json().catch(() => ({}))
console.error('Reprocess failed:', data.detail || res.status)
return
}
goToStep(uiStep)
} catch (e) {
console.error('Reprocess error:', e)
}
}, [sessionId, goToStep])
return {
sessionId,
currentStepIndex,
currentStepId: PIPELINE_STEPS[currentStepIndex]?.id || 'orientation',
steps,
docTypeResult,
goToNextStep,
goToStep,
goToSession,
goToSessionList,
setDocType,
reprocessFromStep,
}
}

View File

@@ -1,593 +0,0 @@
'use client'
/**
* Voice Service Admin Page (migrated from website/admin/voice)
*
* Displays:
* - Voice-First Architecture Overview
* - Developer Guide Content
* - Live Voice Demo (embedded from studio-v2)
* - Task State Machine Documentation
* - DSGVO Compliance Information
*/
import { useState } from 'react'
import Link from 'next/link'
import { PagePurpose } from '@/components/common/PagePurpose'
type TabType = 'overview' | 'demo' | 'tasks' | 'intents' | 'dsgvo' | 'api'
// Task State Machine data
const TASK_STATES = [
{ state: 'DRAFT', description: 'Task erstellt, noch nicht verarbeitet', color: 'bg-gray-100 text-gray-800', next: ['QUEUED', 'PAUSED'] },
{ state: 'QUEUED', description: 'In Warteschlange fuer Verarbeitung', color: 'bg-blue-100 text-blue-800', next: ['RUNNING', 'PAUSED'] },
{ state: 'RUNNING', description: 'Wird aktuell verarbeitet', color: 'bg-yellow-100 text-yellow-800', next: ['READY', 'PAUSED'] },
{ state: 'READY', description: 'Fertig, wartet auf User-Bestaetigung', color: 'bg-green-100 text-green-800', next: ['APPROVED', 'REJECTED', 'PAUSED'] },
{ state: 'APPROVED', description: 'Vom User bestaetigt', color: 'bg-emerald-100 text-emerald-800', next: ['COMPLETED'] },
{ state: 'REJECTED', description: 'Vom User abgelehnt', color: 'bg-red-100 text-red-800', next: ['DRAFT'] },
{ state: 'COMPLETED', description: 'Erfolgreich abgeschlossen', color: 'bg-teal-100 text-teal-800', next: [] },
{ state: 'EXPIRED', description: 'TTL ueberschritten', color: 'bg-orange-100 text-orange-800', next: [] },
{ state: 'PAUSED', description: 'Vom User pausiert', color: 'bg-purple-100 text-purple-800', next: ['DRAFT', 'QUEUED', 'RUNNING', 'READY'] },
]
// Intent Types (22 types organized by group)
const INTENT_GROUPS = [
{
group: 'Notizen',
color: 'bg-blue-50 border-blue-200',
intents: [
{ type: 'student_observation', example: 'Notiz zu Max: heute wiederholt gestoert', description: 'Schuelerbeobachtungen' },
{ type: 'reminder', example: 'Erinner mich morgen an Konferenz', description: 'Erinnerungen setzen' },
{ type: 'homework_check', example: '7b Mathe Hausaufgabe kontrollieren', description: 'Hausaufgaben pruefen' },
{ type: 'conference_topic', example: 'Thema Lehrerkonferenz: iPad-Regeln', description: 'Konferenzthemen' },
{ type: 'correction_thought', example: 'Aufgabe 3: haeufiger Fehler erklaeren', description: 'Korrekturgedanken' },
]
},
{
group: 'Content-Generierung',
color: 'bg-green-50 border-green-200',
intents: [
{ type: 'worksheet_generate', example: 'Erstelle 3 Lueckentexte zu Vokabeln', description: 'Arbeitsblaetter erstellen' },
{ type: 'quiz_generate', example: '10-Minuten Vokabeltest mit Loesungen', description: 'Quiz/Tests erstellen' },
{ type: 'quick_activity', example: '10 Minuten Einstieg, 5 Aufgaben', description: 'Schnelle Aktivitaeten' },
{ type: 'differentiation', example: 'Zwei Schwierigkeitsstufen: Basis und Plus', description: 'Differenzierung' },
]
},
{
group: 'Kommunikation',
color: 'bg-yellow-50 border-yellow-200',
intents: [
{ type: 'parent_letter', example: 'Neutraler Elternbrief wegen Stoerungen', description: 'Elternbriefe erstellen' },
{ type: 'class_message', example: 'Nachricht an 8a: Hausaufgaben bis Mittwoch', description: 'Klassennachrichten' },
]
},
{
group: 'Canvas-Editor',
color: 'bg-purple-50 border-purple-200',
intents: [
{ type: 'canvas_edit', example: 'Ueberschriften groesser, Zeilenabstand kleiner', description: 'Formatierung aendern' },
{ type: 'canvas_layout', example: 'Alles auf eine Seite, Drucklayout A4', description: 'Layout anpassen' },
{ type: 'canvas_element', example: 'Kasten fuer Merke hinzufuegen', description: 'Elemente hinzufuegen' },
{ type: 'canvas_image', example: 'Bild 2 nach links, Pfeil auf Aufgabe 3', description: 'Bilder positionieren' },
]
},
{
group: 'RAG & Korrektur',
color: 'bg-pink-50 border-pink-200',
intents: [
{ type: 'operator_checklist', example: 'Operatoren-Checkliste fuer diese Aufgabe', description: 'Operatoren abrufen' },
{ type: 'eh_passage', example: 'Erwartungshorizont-Passage zu diesem Thema', description: 'EH-Passagen suchen' },
{ type: 'feedback_suggestion', example: 'Kurze Feedbackformulierung vorschlagen', description: 'Feedback vorschlagen' },
]
},
{
group: 'Follow-up (TaskOrchestrator)',
color: 'bg-teal-50 border-teal-200',
intents: [
{ type: 'task_summary', example: 'Fasse alle offenen Tasks zusammen', description: 'Task-Uebersicht' },
{ type: 'convert_note', example: 'Mach aus der Notiz von gestern einen Elternbrief', description: 'Notizen konvertieren' },
{ type: 'schedule_reminder', example: 'Erinner mich morgen an das Gespraech mit Max', description: 'Erinnerungen planen' },
]
},
]
// DSGVO Data Categories
const DSGVO_CATEGORIES = [
{ category: 'Audio', processing: 'NUR transient im RAM, NIEMALS persistiert', storage: 'Keine', ttl: '-', icon: '🎤', risk: 'low' },
{ category: 'PII (Schuelernamen)', processing: 'NUR auf Lehrergeraet', storage: 'Client-side', ttl: '-', icon: '👤', risk: 'high' },
{ category: 'Pseudonyme', processing: 'Server erlaubt (student_ref, class_ref)', storage: 'Valkey Cache', ttl: '24h', icon: '🔢', risk: 'low' },
{ category: 'Transkripte', processing: 'NUR verschluesselt (AES-256-GCM)', storage: 'PostgreSQL', ttl: '7 Tage', icon: '📝', risk: 'medium' },
{ category: 'Task States', processing: 'TaskOrchestrator', storage: 'Valkey', ttl: '30 Tage', icon: '📋', risk: 'low' },
{ category: 'Audit Logs', processing: 'Nur truncated IDs, keine PII', storage: 'PostgreSQL', ttl: '90 Tage', icon: '📊', risk: 'low' },
]
// API Endpoints
const API_ENDPOINTS = [
{ method: 'POST', path: '/api/v1/sessions', description: 'Voice Session erstellen' },
{ method: 'GET', path: '/api/v1/sessions/{id}', description: 'Session Status abrufen' },
{ method: 'DELETE', path: '/api/v1/sessions/{id}', description: 'Session beenden' },
{ method: 'GET', path: '/api/v1/sessions/{id}/tasks', description: 'Pending Tasks abrufen' },
{ method: 'POST', path: '/api/v1/tasks', description: 'Task erstellen' },
{ method: 'GET', path: '/api/v1/tasks/{id}', description: 'Task Status abrufen' },
{ method: 'PUT', path: '/api/v1/tasks/{id}/transition', description: 'Task State aendern' },
{ method: 'DELETE', path: '/api/v1/tasks/{id}', description: 'Task loeschen' },
{ method: 'WS', path: '/ws/voice', description: 'Voice Streaming (WebSocket)' },
{ method: 'GET', path: '/health', description: 'Health Check' },
]
export default function VoiceMatrixPage() {
const [activeTab, setActiveTab] = useState<TabType>('overview')
const [demoLoaded, setDemoLoaded] = useState(false)
const tabs = [
{ id: 'overview', name: 'Architektur', icon: '🏗️' },
{ id: 'demo', name: 'Live Demo', icon: '🎤' },
{ id: 'tasks', name: 'Task States', icon: '📋' },
{ id: 'intents', name: 'Intents (22)', icon: '🎯' },
{ id: 'dsgvo', name: 'DSGVO', icon: '🔒' },
{ id: 'api', name: 'API', icon: '🔌' },
]
return (
<div>
{/* Page Purpose */}
<PagePurpose
title="Voice Service"
purpose="Voice-First Interface mit PersonaPlex-7B & TaskOrchestrator. Konfigurieren und testen Sie den Voice-Service fuer Lehrer-Interaktionen per Sprache."
audience={['Entwickler', 'Admins']}
architecture={{
services: ['voice-service (Python, Port 8091)', 'studio-v2 (Next.js)', 'valkey (Cache)'],
databases: ['PostgreSQL', 'Valkey Cache'],
}}
relatedPages={[
{ name: 'Matrix & Jitsi', href: '/communication/matrix', description: 'Kommunikation Monitoring' },
{ name: 'GPU Infrastruktur', href: '/infrastructure/gpu', description: 'GPU fuer Voice-Service' },
]}
collapsible={true}
defaultCollapsed={false}
/>
{/* Quick Links */}
<div className="mb-6 flex flex-wrap gap-3">
<a
href="https://macmini:3001/voice-test"
target="_blank"
rel="noopener noreferrer"
className="flex items-center gap-2 px-4 py-2 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors"
>
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 11a7 7 0 01-7 7m0 0a7 7 0 01-7-7m7 7v4m0 0H8m4 0h4m-4-8a3 3 0 01-3-3V5a3 3 0 116 0v6a3 3 0 01-3 3z" />
</svg>
Voice Test (Studio)
</a>
<a
href="https://macmini:8091/health"
target="_blank"
rel="noopener noreferrer"
className="flex items-center gap-2 px-4 py-2 bg-green-100 text-green-700 rounded-lg hover:bg-green-200 transition-colors"
>
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z" />
</svg>
Health Check
</a>
<Link
href="/development/docs"
className="flex items-center gap-2 px-4 py-2 bg-slate-100 text-slate-700 rounded-lg hover:bg-slate-200 transition-colors"
>
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 12h6m-6 4h6m2 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z" />
</svg>
Developer Docs
</Link>
</div>
{/* Stats Overview */}
<div className="grid grid-cols-2 md:grid-cols-6 gap-4 mb-6">
<div className="bg-white rounded-lg shadow p-4">
<div className="text-3xl font-bold text-teal-600">8091</div>
<div className="text-sm text-slate-500">Port</div>
</div>
<div className="bg-white rounded-lg shadow p-4">
<div className="text-3xl font-bold text-blue-600">22</div>
<div className="text-sm text-slate-500">Task Types</div>
</div>
<div className="bg-white rounded-lg shadow p-4">
<div className="text-3xl font-bold text-purple-600">9</div>
<div className="text-sm text-slate-500">Task States</div>
</div>
<div className="bg-white rounded-lg shadow p-4">
<div className="text-3xl font-bold text-green-600">24kHz</div>
<div className="text-sm text-slate-500">Audio Rate</div>
</div>
<div className="bg-white rounded-lg shadow p-4">
<div className="text-3xl font-bold text-orange-600">80ms</div>
<div className="text-sm text-slate-500">Frame Size</div>
</div>
<div className="bg-white rounded-lg shadow p-4">
<div className="text-3xl font-bold text-red-600">0</div>
<div className="text-sm text-slate-500">Audio Persist</div>
</div>
</div>
{/* Tabs */}
<div className="bg-white rounded-lg shadow mb-6">
<div className="border-b border-slate-200 px-4">
<div className="flex gap-1 overflow-x-auto">
{tabs.map((tab) => (
<button
key={tab.id}
onClick={() => setActiveTab(tab.id as TabType)}
className={`px-4 py-3 text-sm font-medium whitespace-nowrap transition-colors border-b-2 ${
activeTab === tab.id
? 'border-teal-600 text-teal-600'
: 'border-transparent text-slate-500 hover:text-slate-700'
}`}
>
<span className="mr-2">{tab.icon}</span>
{tab.name}
</button>
))}
</div>
</div>
<div className="p-6">
{/* Overview Tab */}
{activeTab === 'overview' && (
<div className="space-y-6">
<h3 className="text-lg font-semibold text-slate-900">Voice-First Architektur</h3>
{/* Architecture Diagram */}
<div className="bg-slate-50 rounded-lg p-6 font-mono text-sm overflow-x-auto">
<pre className="text-slate-700">{`
┌──────────────────────────────────────────────────────────────────┐
│ LEHRERGERAET (PWA / App) │
│ ┌────────────────────────────────────────────────────────────┐ │
│ │ VoiceCapture.tsx │ voice-encryption.ts │ voice-api.ts │ │
│ │ Mikrofon │ AES-256-GCM │ WebSocket Client │ │
│ └────────────────────────────────────────────────────────────┘ │
└───────────────────────────┬──────────────────────────────────────┘
│ WebSocket (wss://)
┌──────────────────────────────────────────────────────────────────┐
│ VOICE SERVICE (Port 8091) │
│ ┌────────────────────────────────────────────────────────────┐ │
│ │ main.py │ streaming.py │ sessions.py │ tasks.py │ │
│ └────────────────────────────────────────────────────────────┘ │
│ ┌────────────────────────────────────────────────────────────┐ │
│ │ task_orchestrator.py │ intent_router.py │ encryption │ │
│ └────────────────────────────────────────────────────────────┘ │
└───────────────────────────┬──────────────────────────────────────┘
┌──────────────────┼──────────────────┐
▼ ▼ ▼
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
│ PersonaPlex-7B │ │ Ollama Fallback │ │ Valkey Cache │
│ (A100 GPU) │ │ (Mac Mini) │ │ (Sessions) │
└─────────────────┘ └─────────────────┘ └─────────────────┘
`}</pre>
</div>
{/* Technology Stack */}
<div className="grid grid-cols-1 md:grid-cols-3 gap-4">
<div className="bg-blue-50 border border-blue-200 rounded-lg p-4">
<h4 className="font-semibold text-blue-800 mb-2">Voice Model (Produktion)</h4>
<p className="text-sm text-blue-700">PersonaPlex-7B (NVIDIA)</p>
<p className="text-xs text-blue-600 mt-1">Full-Duplex Speech-to-Speech</p>
<p className="text-xs text-blue-500">Lizenz: MIT + NVIDIA Open Model</p>
</div>
<div className="bg-green-50 border border-green-200 rounded-lg p-4">
<h4 className="font-semibold text-green-800 mb-2">Agent Orchestration</h4>
<p className="text-sm text-green-700">TaskOrchestrator</p>
<p className="text-xs text-green-600 mt-1">Task State Machine</p>
<p className="text-xs text-green-500">Lizenz: Proprietary</p>
</div>
<div className="bg-purple-50 border border-purple-200 rounded-lg p-4">
<h4 className="font-semibold text-purple-800 mb-2">Audio Codec</h4>
<p className="text-sm text-purple-700">Mimi (24kHz, 80ms)</p>
<p className="text-xs text-purple-600 mt-1">Low-Latency Streaming</p>
<p className="text-xs text-purple-500">Lizenz: MIT</p>
</div>
</div>
{/* Key Files */}
<div>
<h4 className="font-semibold text-slate-800 mb-3">Wichtige Dateien</h4>
<div className="bg-white border border-slate-200 rounded-lg overflow-hidden">
<table className="min-w-full divide-y divide-slate-200">
<thead className="bg-slate-50">
<tr>
<th className="px-4 py-2 text-left text-xs font-medium text-slate-500 uppercase">Datei</th>
<th className="px-4 py-2 text-left text-xs font-medium text-slate-500 uppercase">Beschreibung</th>
</tr>
</thead>
<tbody className="divide-y divide-slate-200">
<tr><td className="px-4 py-2 font-mono text-sm">voice-service/main.py</td><td className="px-4 py-2 text-sm text-slate-600">FastAPI Entry, WebSocket Handler</td></tr>
<tr><td className="px-4 py-2 font-mono text-sm">voice-service/services/task_orchestrator.py</td><td className="px-4 py-2 text-sm text-slate-600">Task State Machine</td></tr>
<tr><td className="px-4 py-2 font-mono text-sm">voice-service/services/intent_router.py</td><td className="px-4 py-2 text-sm text-slate-600">Intent Detection (22 Types)</td></tr>
<tr><td className="px-4 py-2 font-mono text-sm">voice-service/services/encryption_service.py</td><td className="px-4 py-2 text-sm text-slate-600">Namespace Key Management</td></tr>
<tr><td className="px-4 py-2 font-mono text-sm">studio-v2/components/voice/VoiceCapture.tsx</td><td className="px-4 py-2 text-sm text-slate-600">Frontend Mikrofon + Crypto</td></tr>
<tr><td className="px-4 py-2 font-mono text-sm">studio-v2/lib/voice/voice-encryption.ts</td><td className="px-4 py-2 text-sm text-slate-600">AES-256-GCM Client-side</td></tr>
</tbody>
</table>
</div>
</div>
</div>
)}
{/* Demo Tab */}
{activeTab === 'demo' && (
<div className="space-y-4">
<div className="flex items-center justify-between">
<h3 className="text-lg font-semibold text-slate-900">Live Voice Demo</h3>
<a
href="https://macmini:3001/voice-test"
target="_blank"
rel="noopener noreferrer"
className="text-sm text-teal-600 hover:text-teal-700 flex items-center gap-1"
>
In neuem Tab oeffnen
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14" />
</svg>
</a>
</div>
<div className="bg-slate-100 rounded-lg p-4 text-sm text-slate-600 mb-4">
<p><strong>Hinweis:</strong> Die Demo erfordert, dass der Voice Service (Port 8091) und das Studio-v2 Frontend (Port 3001) laufen.</p>
<code className="block mt-2 bg-slate-200 p-2 rounded">docker compose up -d voice-service && cd studio-v2 && npm run dev</code>
</div>
{/* Embedded Demo */}
<div className="relative bg-slate-900 rounded-lg overflow-hidden" style={{ height: '600px' }}>
{!demoLoaded && (
<div className="absolute inset-0 flex items-center justify-center">
<button
onClick={() => setDemoLoaded(true)}
className="px-6 py-3 bg-teal-600 text-white rounded-lg hover:bg-teal-700 transition-colors flex items-center gap-2"
>
<svg className="w-6 h-6" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M14.752 11.168l-3.197-2.132A1 1 0 0010 9.87v4.263a1 1 0 001.555.832l3.197-2.132a1 1 0 000-1.664z" />
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
</svg>
Voice Demo laden
</button>
</div>
)}
{demoLoaded && (
<iframe
src="https://macmini:3001/voice-test?embed=true"
className="w-full h-full border-0"
title="Voice Demo"
allow="microphone"
/>
)}
</div>
</div>
)}
{/* Task States Tab */}
{activeTab === 'tasks' && (
<div className="space-y-6">
<h3 className="text-lg font-semibold text-slate-900">Task State Machine (TaskOrchestrator)</h3>
{/* State Diagram */}
<div className="bg-slate-50 rounded-lg p-6 font-mono text-sm overflow-x-auto">
<pre className="text-slate-700">{`
DRAFT → QUEUED → RUNNING → READY
┌───────────┴───────────┐
│ │
APPROVED REJECTED
│ │
COMPLETED DRAFT (revision)
Any State → EXPIRED (TTL)
Any State → PAUSED (User Interrupt)
`}</pre>
</div>
{/* States Table */}
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
{TASK_STATES.map((state) => (
<div key={state.state} className={`${state.color} rounded-lg p-4`}>
<div className="font-semibold text-lg">{state.state}</div>
<p className="text-sm mt-1">{state.description}</p>
{state.next.length > 0 && (
<div className="mt-2 text-xs">
<span className="opacity-75">Naechste:</span>{' '}
{state.next.join(', ')}
</div>
)}
</div>
))}
</div>
</div>
)}
{/* Intents Tab */}
{activeTab === 'intents' && (
<div className="space-y-6">
<h3 className="text-lg font-semibold text-slate-900">Intent Types (22 unterstuetzte Typen)</h3>
{INTENT_GROUPS.map((group) => (
<div key={group.group} className={`${group.color} border rounded-lg p-4`}>
<h4 className="font-semibold text-slate-800 mb-3">{group.group}</h4>
<div className="space-y-2">
{group.intents.map((intent) => (
<div key={intent.type} className="bg-white rounded-lg p-3 shadow-sm">
<div className="flex items-start justify-between">
<div>
<code className="text-sm font-mono text-teal-700 bg-teal-50 px-2 py-0.5 rounded">
{intent.type}
</code>
<p className="text-sm text-slate-600 mt-1">{intent.description}</p>
</div>
</div>
<div className="mt-2 text-xs text-slate-500 italic">
Beispiel: &quot;{intent.example}&quot;
</div>
</div>
))}
</div>
</div>
))}
</div>
)}
{/* DSGVO Tab */}
{activeTab === 'dsgvo' && (
<div className="space-y-6">
<h3 className="text-lg font-semibold text-slate-900">DSGVO-Compliance</h3>
{/* Key Principles */}
<div className="bg-green-50 border border-green-200 rounded-lg p-4">
<h4 className="font-semibold text-green-800 mb-2">Kernprinzipien</h4>
<ul className="list-disc list-inside text-sm text-green-700 space-y-1">
<li><strong>Audio NIEMALS persistiert</strong> - Nur transient im RAM</li>
<li><strong>Namespace-Verschluesselung</strong> - Key nur auf Lehrergeraet</li>
<li><strong>Keine Klartext-PII serverseitig</strong> - Nur verschluesselt oder pseudonymisiert</li>
<li><strong>TTL-basierte Auto-Loeschung</strong> - 7/30/90 Tage je nach Kategorie</li>
</ul>
</div>
{/* Data Categories Table */}
<div className="bg-white border border-slate-200 rounded-lg overflow-hidden">
<table className="min-w-full divide-y divide-slate-200">
<thead className="bg-slate-50">
<tr>
<th className="px-4 py-3 text-left text-xs font-medium text-slate-500 uppercase">Kategorie</th>
<th className="px-4 py-3 text-left text-xs font-medium text-slate-500 uppercase">Verarbeitung</th>
<th className="px-4 py-3 text-left text-xs font-medium text-slate-500 uppercase">Speicherort</th>
<th className="px-4 py-3 text-left text-xs font-medium text-slate-500 uppercase">TTL</th>
<th className="px-4 py-3 text-left text-xs font-medium text-slate-500 uppercase">Risiko</th>
</tr>
</thead>
<tbody className="divide-y divide-slate-200">
{DSGVO_CATEGORIES.map((cat) => (
<tr key={cat.category}>
<td className="px-4 py-3">
<span className="mr-2">{cat.icon}</span>
<span className="font-medium">{cat.category}</span>
</td>
<td className="px-4 py-3 text-sm text-slate-600">{cat.processing}</td>
<td className="px-4 py-3 text-sm text-slate-600">{cat.storage}</td>
<td className="px-4 py-3 text-sm text-slate-600">{cat.ttl}</td>
<td className="px-4 py-3">
<span className={`px-2 py-1 rounded text-xs font-medium ${
cat.risk === 'low' ? 'bg-green-100 text-green-700' :
cat.risk === 'medium' ? 'bg-yellow-100 text-yellow-700' :
'bg-red-100 text-red-700'
}`}>
{cat.risk.toUpperCase()}
</span>
</td>
</tr>
))}
</tbody>
</table>
</div>
{/* Audit Log Info */}
<div className="bg-slate-50 border border-slate-200 rounded-lg p-4">
<h4 className="font-semibold text-slate-800 mb-2">Audit Logs (ohne PII)</h4>
<div className="grid grid-cols-2 gap-4 text-sm">
<div>
<span className="text-green-600 font-medium">Erlaubt:</span>
<ul className="list-disc list-inside text-slate-600 mt-1">
<li>ref_id (truncated)</li>
<li>content_type</li>
<li>size_bytes</li>
<li>ttl_hours</li>
</ul>
</div>
<div>
<span className="text-red-600 font-medium">Verboten:</span>
<ul className="list-disc list-inside text-slate-600 mt-1">
<li>user_name</li>
<li>content / transcript</li>
<li>email</li>
<li>student_name</li>
</ul>
</div>
</div>
</div>
</div>
)}
{/* API Tab */}
{activeTab === 'api' && (
<div className="space-y-6">
<h3 className="text-lg font-semibold text-slate-900">Voice Service API (Port 8091)</h3>
{/* REST Endpoints */}
<div className="bg-white border border-slate-200 rounded-lg overflow-hidden">
<table className="min-w-full divide-y divide-slate-200">
<thead className="bg-slate-50">
<tr>
<th className="px-4 py-3 text-left text-xs font-medium text-slate-500 uppercase">Methode</th>
<th className="px-4 py-3 text-left text-xs font-medium text-slate-500 uppercase">Endpoint</th>
<th className="px-4 py-3 text-left text-xs font-medium text-slate-500 uppercase">Beschreibung</th>
</tr>
</thead>
<tbody className="divide-y divide-slate-200">
{API_ENDPOINTS.map((ep, idx) => (
<tr key={idx}>
<td className="px-4 py-3">
<span className={`px-2 py-1 rounded text-xs font-medium ${
ep.method === 'GET' ? 'bg-green-100 text-green-700' :
ep.method === 'POST' ? 'bg-blue-100 text-blue-700' :
ep.method === 'PUT' ? 'bg-yellow-100 text-yellow-700' :
ep.method === 'DELETE' ? 'bg-red-100 text-red-700' :
'bg-purple-100 text-purple-700'
}`}>
{ep.method}
</span>
</td>
<td className="px-4 py-3 font-mono text-sm">{ep.path}</td>
<td className="px-4 py-3 text-sm text-slate-600">{ep.description}</td>
</tr>
))}
</tbody>
</table>
</div>
{/* WebSocket Protocol */}
<div className="bg-slate-50 rounded-lg p-4">
<h4 className="font-semibold text-slate-800 mb-3">WebSocket Protocol</h4>
<div className="grid grid-cols-1 md:grid-cols-2 gap-4 text-sm">
<div className="bg-white rounded-lg p-3 border border-slate-200">
<div className="font-medium text-slate-700 mb-2">Client Server</div>
<ul className="list-disc list-inside text-slate-600 space-y-1">
<li><code className="bg-slate-100 px-1 rounded">Binary</code>: Int16 PCM Audio (24kHz, 80ms)</li>
<li><code className="bg-slate-100 px-1 rounded">JSON</code>: {`{type: "config|end_turn|interrupt"}`}</li>
</ul>
</div>
<div className="bg-white rounded-lg p-3 border border-slate-200">
<div className="font-medium text-slate-700 mb-2">Server Client</div>
<ul className="list-disc list-inside text-slate-600 space-y-1">
<li><code className="bg-slate-100 px-1 rounded">Binary</code>: Audio Response (base64)</li>
<li><code className="bg-slate-100 px-1 rounded">JSON</code>: {`{type: "transcript|intent|status|error"}`}</li>
</ul>
</div>
</div>
</div>
{/* Example curl commands */}
<div className="bg-slate-900 rounded-lg p-4 text-sm">
<h4 className="font-semibold text-slate-300 mb-3">Beispiel: Session erstellen</h4>
<pre className="text-green-400 overflow-x-auto">{`curl -X POST https://macmini:8091/api/v1/sessions \\
-H "Content-Type: application/json" \\
-d '{
"namespace_id": "ns-12345678abcdef12345678abcdef12",
"key_hash": "sha256:dGVzdGtleWhhc2h0ZXN0a2V5aGFzaHRlc3Q=",
"device_type": "pwa"
}'`}</pre>
</div>
</div>
)}
</div>
</div>
</div>
)
}

View File

@@ -1,635 +0,0 @@
'use client'
/**
* Video & Chat Admin Page
*
* Matrix & Jitsi Monitoring Dashboard
* Provides system statistics, active calls, user metrics, and service health
* Migrated from website/app/admin/communication
*/
import { useEffect, useState, useCallback } from 'react'
import Link from 'next/link'
import { PagePurpose } from '@/components/common/PagePurpose'
import { getModuleByHref } from '@/lib/navigation'
interface MatrixStats {
total_users: number
active_users: number
total_rooms: number
active_rooms: number
messages_today: number
messages_this_week: number
status: 'online' | 'offline' | 'degraded'
}
interface JitsiStats {
active_meetings: number
total_participants: number
meetings_today: number
average_duration_minutes: number
peak_concurrent_users: number
total_minutes_today: number
status: 'online' | 'offline' | 'degraded'
}
interface TrafficStats {
matrix: {
bandwidth_in_mb: number
bandwidth_out_mb: number
messages_per_minute: number
media_uploads_today: number
media_size_mb: number
}
jitsi: {
bandwidth_in_mb: number
bandwidth_out_mb: number
video_streams_active: number
audio_streams_active: number
estimated_hourly_gb: number
}
total: {
bandwidth_in_mb: number
bandwidth_out_mb: number
estimated_monthly_gb: number
}
}
interface CommunicationStats {
matrix: MatrixStats
jitsi: JitsiStats
traffic?: TrafficStats
last_updated: string
}
interface ActiveMeeting {
room_name: string
display_name: string
participants: number
started_at: string
duration_minutes: number
}
interface RecentRoom {
room_id: string
name: string
member_count: number
last_activity: string
room_type: 'class' | 'parent' | 'staff' | 'general'
}
export default function VideoChatPage() {
const [stats, setStats] = useState<CommunicationStats | null>(null)
const [activeMeetings, setActiveMeetings] = useState<ActiveMeeting[]>([])
const [recentRooms, setRecentRooms] = useState<RecentRoom[]>([])
const [loading, setLoading] = useState(true)
const [error, setError] = useState<string | null>(null)
const moduleInfo = getModuleByHref('/communication/video-chat')
// Use local API proxy
const fetchStats = useCallback(async () => {
try {
const response = await fetch('/api/admin/communication/stats')
if (!response.ok) {
throw new Error(`HTTP ${response.status}`)
}
const data = await response.json()
setStats(data)
setActiveMeetings(data.active_meetings || [])
setRecentRooms(data.recent_rooms || [])
setError(null)
} catch (err) {
setError(err instanceof Error ? err.message : 'Verbindungsfehler')
// Set mock data for display purposes when API unavailable
setStats({
matrix: {
total_users: 0,
active_users: 0,
total_rooms: 0,
active_rooms: 0,
messages_today: 0,
messages_this_week: 0,
status: 'offline'
},
jitsi: {
active_meetings: 0,
total_participants: 0,
meetings_today: 0,
average_duration_minutes: 0,
peak_concurrent_users: 0,
total_minutes_today: 0,
status: 'offline'
},
last_updated: new Date().toISOString()
})
} finally {
setLoading(false)
}
}, [])
useEffect(() => {
fetchStats()
}, [fetchStats])
// Auto-refresh every 15 seconds
useEffect(() => {
const interval = setInterval(fetchStats, 15000)
return () => clearInterval(interval)
}, [fetchStats])
const getStatusBadge = (status: string) => {
const baseClasses = 'px-3 py-1 rounded-full text-xs font-semibold uppercase'
switch (status) {
case 'online':
return `${baseClasses} bg-green-100 text-green-800`
case 'degraded':
return `${baseClasses} bg-yellow-100 text-yellow-800`
case 'offline':
return `${baseClasses} bg-red-100 text-red-800`
default:
return `${baseClasses} bg-slate-100 text-slate-600`
}
}
const getRoomTypeBadge = (type: string) => {
const baseClasses = 'px-2 py-0.5 rounded text-xs font-medium'
switch (type) {
case 'class':
return `${baseClasses} bg-blue-100 text-blue-700`
case 'parent':
return `${baseClasses} bg-purple-100 text-purple-700`
case 'staff':
return `${baseClasses} bg-orange-100 text-orange-700`
default:
return `${baseClasses} bg-slate-100 text-slate-600`
}
}
const formatDuration = (minutes: number) => {
if (minutes < 60) return `${Math.round(minutes)} Min.`
const hours = Math.floor(minutes / 60)
const mins = Math.round(minutes % 60)
return `${hours}h ${mins}m`
}
const formatTimeAgo = (dateStr: string) => {
const date = new Date(dateStr)
const now = new Date()
const diffMs = now.getTime() - date.getTime()
const diffMins = Math.floor(diffMs / 60000)
if (diffMins < 1) return 'gerade eben'
if (diffMins < 60) return `vor ${diffMins} Min.`
if (diffMins < 1440) return `vor ${Math.floor(diffMins / 60)} Std.`
return `vor ${Math.floor(diffMins / 1440)} Tagen`
}
// Traffic estimation helpers for SysEleven planning
const calculateEstimatedTraffic = (direction: 'in' | 'out'): number => {
const messages = stats?.matrix?.messages_today || 0
const callMinutes = stats?.jitsi?.total_minutes_today || 0
const participants = stats?.jitsi?.total_participants || 0
const messageTrafficMB = messages * 0.002
const videoTrafficMB = callMinutes * participants * 0.011
if (direction === 'in') {
return messageTrafficMB * 0.3 + videoTrafficMB * 0.4
}
return messageTrafficMB * 0.7 + videoTrafficMB * 0.6
}
const calculateHourlyEstimate = (): number => {
const activeParticipants = stats?.jitsi?.total_participants || 0
return activeParticipants * 0.675
}
const calculateMonthlyEstimate = (): number => {
const dailyCallMinutes = stats?.jitsi?.total_minutes_today || 0
const avgParticipants = stats?.jitsi?.peak_concurrent_users || 1
const monthlyMinutes = dailyCallMinutes * 22
return (monthlyMinutes * avgParticipants * 11) / 1024
}
const getResourceRecommendation = (): string => {
const peakUsers = stats?.jitsi?.peak_concurrent_users || 0
const monthlyGB = calculateMonthlyEstimate()
if (monthlyGB < 10 || peakUsers < 5) {
return 'Starter (1 vCPU, 2GB RAM, 100GB Traffic)'
} else if (monthlyGB < 50 || peakUsers < 20) {
return 'Standard (2 vCPU, 4GB RAM, 500GB Traffic)'
} else if (monthlyGB < 200 || peakUsers < 50) {
return 'Professional (4 vCPU, 8GB RAM, 2TB Traffic)'
} else {
return 'Enterprise (8+ vCPU, 16GB+ RAM, Unlimited Traffic)'
}
}
return (
<div>
{/* Page Purpose */}
<PagePurpose
title={moduleInfo?.module.name || 'Video & Chat'}
purpose={moduleInfo?.module.purpose || 'Matrix & Jitsi Monitoring Dashboard'}
audience={moduleInfo?.module.audience || ['Admins', 'DevOps']}
architecture={{
services: ['synapse (Matrix)', 'jitsi-meet', 'prosody', 'jvb'],
databases: ['PostgreSQL', 'synapse-db'],
}}
collapsible={true}
defaultCollapsed={true}
/>
{/* Quick Actions */}
<div className="flex gap-3 mb-6">
<Link
href="/communication/video-chat/wizard"
className="px-4 py-2 bg-green-600 text-white rounded-lg hover:bg-green-700 transition-colors text-sm font-medium"
>
Test Wizard starten
</Link>
<button
onClick={fetchStats}
disabled={loading}
className="px-4 py-2 border border-slate-300 rounded-lg hover:bg-slate-50 disabled:opacity-50 text-sm"
>
{loading ? 'Lade...' : 'Aktualisieren'}
</button>
</div>
{/* Service Status Overview */}
<div className="grid grid-cols-1 md:grid-cols-2 gap-6 mb-6">
{/* Matrix Status Card */}
<div className="bg-white rounded-xl border border-slate-200 p-6">
<div className="flex items-center justify-between mb-4">
<div className="flex items-center gap-3">
<div className="w-10 h-10 bg-purple-100 rounded-lg flex items-center justify-center">
<svg className="w-6 h-6 text-purple-600" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M8 12h.01M12 12h.01M16 12h.01M21 12c0 4.418-4.03 8-9 8a9.863 9.863 0 01-4.255-.949L3 20l1.395-3.72C3.512 15.042 3 13.574 3 12c0-4.418 4.03-8 9-8s9 3.582 9 8z" />
</svg>
</div>
<div>
<h3 className="font-semibold text-slate-900">Matrix (Synapse)</h3>
<p className="text-sm text-slate-500">E2EE Messaging</p>
</div>
</div>
<span className={getStatusBadge(stats?.matrix.status || 'offline')}>
{stats?.matrix.status || 'offline'}
</span>
</div>
<div className="grid grid-cols-3 gap-4">
<div>
<div className="text-2xl font-bold text-slate-900">{stats?.matrix.total_users || 0}</div>
<div className="text-xs text-slate-500">Benutzer</div>
</div>
<div>
<div className="text-2xl font-bold text-slate-900">{stats?.matrix.active_users || 0}</div>
<div className="text-xs text-slate-500">Aktiv</div>
</div>
<div>
<div className="text-2xl font-bold text-slate-900">{stats?.matrix.total_rooms || 0}</div>
<div className="text-xs text-slate-500">Raeume</div>
</div>
</div>
<div className="mt-4 pt-4 border-t border-slate-100">
<div className="flex justify-between text-sm">
<span className="text-slate-500">Nachrichten heute</span>
<span className="font-medium">{stats?.matrix.messages_today || 0}</span>
</div>
<div className="flex justify-between text-sm mt-1">
<span className="text-slate-500">Diese Woche</span>
<span className="font-medium">{stats?.matrix.messages_this_week || 0}</span>
</div>
</div>
</div>
{/* Jitsi Status Card */}
<div className="bg-white rounded-xl border border-slate-200 p-6">
<div className="flex items-center justify-between mb-4">
<div className="flex items-center gap-3">
<div className="w-10 h-10 bg-blue-100 rounded-lg flex items-center justify-center">
<svg className="w-6 h-6 text-blue-600" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M15 10l4.553-2.276A1 1 0 0121 8.618v6.764a1 1 0 01-1.447.894L15 14M5 18h8a2 2 0 002-2V8a2 2 0 00-2-2H5a2 2 0 00-2 2v8a2 2 0 002 2z" />
</svg>
</div>
<div>
<h3 className="font-semibold text-slate-900">Jitsi Meet</h3>
<p className="text-sm text-slate-500">Videokonferenzen</p>
</div>
</div>
<span className={getStatusBadge(stats?.jitsi.status || 'offline')}>
{stats?.jitsi.status || 'offline'}
</span>
</div>
<div className="grid grid-cols-3 gap-4">
<div>
<div className="text-2xl font-bold text-green-600">{stats?.jitsi.active_meetings || 0}</div>
<div className="text-xs text-slate-500">Live Calls</div>
</div>
<div>
<div className="text-2xl font-bold text-slate-900">{stats?.jitsi.total_participants || 0}</div>
<div className="text-xs text-slate-500">Teilnehmer</div>
</div>
<div>
<div className="text-2xl font-bold text-slate-900">{stats?.jitsi.meetings_today || 0}</div>
<div className="text-xs text-slate-500">Calls heute</div>
</div>
</div>
<div className="mt-4 pt-4 border-t border-slate-100">
<div className="flex justify-between text-sm">
<span className="text-slate-500">Durchschnittliche Dauer</span>
<span className="font-medium">{formatDuration(stats?.jitsi.average_duration_minutes || 0)}</span>
</div>
<div className="flex justify-between text-sm mt-1">
<span className="text-slate-500">Peak gleichzeitig</span>
<span className="font-medium">{stats?.jitsi.peak_concurrent_users || 0} Nutzer</span>
</div>
</div>
</div>
</div>
{/* Traffic & Bandwidth Statistics */}
<div className="bg-white rounded-xl border border-slate-200 p-6 mb-6">
<div className="flex items-center justify-between mb-4">
<div className="flex items-center gap-3">
<div className="w-10 h-10 bg-emerald-100 rounded-lg flex items-center justify-center">
<svg className="w-6 h-6 text-emerald-600" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M13 7h8m0 0v8m0-8l-8 8-4-4-6 6" />
</svg>
</div>
<div>
<h3 className="font-semibold text-slate-900">Traffic & Bandbreite</h3>
<p className="text-sm text-slate-500">SysEleven Ressourcenplanung</p>
</div>
</div>
<span className="px-3 py-1 rounded-full text-xs font-semibold uppercase bg-emerald-100 text-emerald-800">
Live
</span>
</div>
<div className="grid grid-cols-2 md:grid-cols-4 gap-4 mb-4">
<div className="bg-slate-50 rounded-lg p-4">
<div className="text-xs text-slate-500 mb-1">Eingehend (heute)</div>
<div className="text-2xl font-bold text-slate-900">
{stats?.traffic?.total?.bandwidth_in_mb?.toFixed(1) || calculateEstimatedTraffic('in').toFixed(1)} MB
</div>
</div>
<div className="bg-slate-50 rounded-lg p-4">
<div className="text-xs text-slate-500 mb-1">Ausgehend (heute)</div>
<div className="text-2xl font-bold text-slate-900">
{stats?.traffic?.total?.bandwidth_out_mb?.toFixed(1) || calculateEstimatedTraffic('out').toFixed(1)} MB
</div>
</div>
<div className="bg-slate-50 rounded-lg p-4">
<div className="text-xs text-slate-500 mb-1">Geschaetzt/Stunde</div>
<div className="text-2xl font-bold text-blue-600">
{stats?.traffic?.jitsi?.estimated_hourly_gb?.toFixed(2) || calculateHourlyEstimate().toFixed(2)} GB
</div>
</div>
<div className="bg-slate-50 rounded-lg p-4">
<div className="text-xs text-slate-500 mb-1">Geschaetzt/Monat</div>
<div className="text-2xl font-bold text-emerald-600">
{stats?.traffic?.total?.estimated_monthly_gb?.toFixed(1) || calculateMonthlyEstimate().toFixed(1)} GB
</div>
</div>
</div>
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
{/* Matrix Traffic */}
<div className="border border-slate-200 rounded-lg p-4">
<div className="flex items-center gap-2 mb-3">
<div className="w-3 h-3 bg-purple-500 rounded-full"></div>
<span className="text-sm font-medium text-slate-700">Matrix Messaging</span>
</div>
<div className="space-y-2 text-sm">
<div className="flex justify-between">
<span className="text-slate-500">Nachrichten/Min</span>
<span className="font-medium">{stats?.traffic?.matrix?.messages_per_minute || Math.round((stats?.matrix?.messages_today || 0) / (new Date().getHours() || 1) / 60)}</span>
</div>
<div className="flex justify-between">
<span className="text-slate-500">Media Uploads heute</span>
<span className="font-medium">{stats?.traffic?.matrix?.media_uploads_today || 0}</span>
</div>
<div className="flex justify-between">
<span className="text-slate-500">Media Groesse</span>
<span className="font-medium">{stats?.traffic?.matrix?.media_size_mb?.toFixed(1) || '0.0'} MB</span>
</div>
</div>
</div>
{/* Jitsi Traffic */}
<div className="border border-slate-200 rounded-lg p-4">
<div className="flex items-center gap-2 mb-3">
<div className="w-3 h-3 bg-blue-500 rounded-full"></div>
<span className="text-sm font-medium text-slate-700">Jitsi Video</span>
</div>
<div className="space-y-2 text-sm">
<div className="flex justify-between">
<span className="text-slate-500">Video Streams aktiv</span>
<span className="font-medium">{stats?.traffic?.jitsi?.video_streams_active || (stats?.jitsi?.total_participants || 0)}</span>
</div>
<div className="flex justify-between">
<span className="text-slate-500">Audio Streams aktiv</span>
<span className="font-medium">{stats?.traffic?.jitsi?.audio_streams_active || (stats?.jitsi?.total_participants || 0)}</span>
</div>
<div className="flex justify-between">
<span className="text-slate-500">Bitrate geschaetzt</span>
<span className="font-medium">{((stats?.jitsi?.total_participants || 0) * 1.5).toFixed(1)} Mbps</span>
</div>
</div>
</div>
</div>
{/* SysEleven Recommendation */}
<div className="mt-4 p-4 bg-emerald-50 border border-emerald-200 rounded-lg">
<h4 className="text-sm font-semibold text-emerald-800 mb-2">SysEleven Empfehlung</h4>
<div className="text-sm text-emerald-700">
<p>Basierend auf aktuellem Traffic: <strong>{getResourceRecommendation()}</strong></p>
<p className="mt-1 text-xs text-emerald-600">
Peak Teilnehmer: {stats?.jitsi?.peak_concurrent_users || 0} |
Durchschnittliche Call-Dauer: {stats?.jitsi?.average_duration_minutes?.toFixed(0) || 0} Min. |
Calls heute: {stats?.jitsi?.meetings_today || 0}
</p>
</div>
</div>
</div>
{/* Active Meetings */}
<div className="bg-white rounded-xl border border-slate-200 p-6 mb-6">
<div className="flex items-center justify-between mb-4">
<h3 className="font-semibold text-slate-900">Aktive Meetings</h3>
</div>
{activeMeetings.length === 0 ? (
<div className="text-center py-8 text-slate-500">
<svg className="w-12 h-12 mx-auto mb-3 text-slate-300" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M15 10l4.553-2.276A1 1 0 0121 8.618v6.764a1 1 0 01-1.447.894L15 14M5 18h8a2 2 0 002-2V8a2 2 0 00-2-2H5a2 2 0 00-2 2v8a2 2 0 002 2z" />
</svg>
<p>Keine aktiven Meetings</p>
</div>
) : (
<div className="overflow-x-auto">
<table className="w-full">
<thead>
<tr className="text-left text-xs text-slate-500 uppercase border-b border-slate-200">
<th className="pb-3 pr-4">Meeting</th>
<th className="pb-3 pr-4">Teilnehmer</th>
<th className="pb-3 pr-4">Gestartet</th>
<th className="pb-3">Dauer</th>
</tr>
</thead>
<tbody className="divide-y divide-slate-100">
{activeMeetings.map((meeting, idx) => (
<tr key={idx} className="text-sm">
<td className="py-3 pr-4">
<div className="font-medium text-slate-900">{meeting.display_name}</div>
<div className="text-xs text-slate-500">{meeting.room_name}</div>
</td>
<td className="py-3 pr-4">
<span className="inline-flex items-center gap-1">
<span className="w-2 h-2 bg-green-500 rounded-full animate-pulse" />
{meeting.participants}
</span>
</td>
<td className="py-3 pr-4 text-slate-500">{formatTimeAgo(meeting.started_at)}</td>
<td className="py-3 font-medium">{formatDuration(meeting.duration_minutes)}</td>
</tr>
))}
</tbody>
</table>
</div>
)}
</div>
{/* Recent Chat Rooms & Usage Stats */}
<div className="grid grid-cols-1 lg:grid-cols-2 gap-6 mb-6">
<div className="bg-white rounded-xl border border-slate-200 p-6">
<h3 className="font-semibold text-slate-900 mb-4">Aktive Chat-Raeume</h3>
{recentRooms.length === 0 ? (
<div className="text-center py-6 text-slate-500">
<p>Keine aktiven Raeume</p>
</div>
) : (
<div className="space-y-3">
{recentRooms.slice(0, 5).map((room, idx) => (
<div key={idx} className="flex items-center justify-between p-3 bg-slate-50 rounded-lg">
<div className="flex items-center gap-3">
<div className="w-8 h-8 bg-slate-200 rounded-lg flex items-center justify-center">
<svg className="w-4 h-4 text-slate-600" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M17 20h5v-2a3 3 0 00-5.356-1.857M17 20H7m10 0v-2c0-.656-.126-1.283-.356-1.857M7 20H2v-2a3 3 0 015.356-1.857M7 20v-2c0-.656.126-1.283.356-1.857m0 0a5.002 5.002 0 019.288 0M15 7a3 3 0 11-6 0 3 3 0 016 0z" />
</svg>
</div>
<div>
<div className="font-medium text-slate-900 text-sm">{room.name}</div>
<div className="text-xs text-slate-500">{room.member_count} Mitglieder</div>
</div>
</div>
<div className="flex items-center gap-2">
<span className={getRoomTypeBadge(room.room_type)}>{room.room_type}</span>
<span className="text-xs text-slate-400">{formatTimeAgo(room.last_activity)}</span>
</div>
</div>
))}
</div>
)}
</div>
{/* Usage Statistics */}
<div className="bg-white rounded-xl border border-slate-200 p-6">
<h3 className="font-semibold text-slate-900 mb-4">Nutzungsstatistiken</h3>
<div className="space-y-4">
<div>
<div className="flex justify-between text-sm mb-1">
<span className="text-slate-600">Call-Minuten heute</span>
<span className="font-semibold">{stats?.jitsi.total_minutes_today || 0} Min.</span>
</div>
<div className="w-full bg-slate-100 rounded-full h-2">
<div
className="bg-blue-600 h-2 rounded-full transition-all"
style={{ width: `${Math.min((stats?.jitsi.total_minutes_today || 0) / 500 * 100, 100)}%` }}
/>
</div>
</div>
<div>
<div className="flex justify-between text-sm mb-1">
<span className="text-slate-600">Aktive Chat-Raeume</span>
<span className="font-semibold">{stats?.matrix.active_rooms || 0} / {stats?.matrix.total_rooms || 0}</span>
</div>
<div className="w-full bg-slate-100 rounded-full h-2">
<div
className="bg-purple-600 h-2 rounded-full transition-all"
style={{ width: `${stats?.matrix.total_rooms ? ((stats.matrix.active_rooms / stats.matrix.total_rooms) * 100) : 0}%` }}
/>
</div>
</div>
<div>
<div className="flex justify-between text-sm mb-1">
<span className="text-slate-600">Aktive Nutzer</span>
<span className="font-semibold">{stats?.matrix.active_users || 0} / {stats?.matrix.total_users || 0}</span>
</div>
<div className="w-full bg-slate-100 rounded-full h-2">
<div
className="bg-green-600 h-2 rounded-full transition-all"
style={{ width: `${stats?.matrix.total_users ? ((stats.matrix.active_users / stats.matrix.total_users) * 100) : 0}%` }}
/>
</div>
</div>
</div>
{/* Quick Actions */}
<div className="mt-6 pt-4 border-t border-slate-100">
<h4 className="text-sm font-medium text-slate-700 mb-3">Schnellaktionen</h4>
<div className="flex flex-wrap gap-2">
<a
href="http://localhost:8448/_synapse/admin"
target="_blank"
rel="noopener noreferrer"
className="px-3 py-1.5 text-sm bg-purple-100 text-purple-700 rounded-lg hover:bg-purple-200 transition-colors"
>
Synapse Admin
</a>
<a
href="http://localhost:8443"
target="_blank"
rel="noopener noreferrer"
className="px-3 py-1.5 text-sm bg-blue-100 text-blue-700 rounded-lg hover:bg-blue-200 transition-colors"
>
Jitsi Meet
</a>
</div>
</div>
</div>
</div>
{/* Connection Info */}
<div className="bg-blue-50 border border-blue-200 rounded-xl p-4">
<div className="flex gap-3">
<svg className="w-5 h-5 text-blue-600 flex-shrink-0 mt-0.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
</svg>
<div>
<h4 className="font-semibold text-blue-900">Service Konfiguration</h4>
<p className="text-sm text-blue-800 mt-1">
<strong>Matrix Homeserver:</strong> http://localhost:8448 (Synapse)<br />
<strong>Jitsi Meet:</strong> http://localhost:8443<br />
<strong>Auto-Refresh:</strong> Alle 15 Sekunden
</p>
{error && (
<p className="text-sm text-red-600 mt-2">
<strong>Fehler:</strong> {error} - Backend nicht erreichbar
</p>
)}
{stats?.last_updated && (
<p className="text-xs text-blue-600 mt-2">
Letzte Aktualisierung: {new Date(stats.last_updated).toLocaleString('de-DE')}
</p>
)}
</div>
</div>
</div>
</div>
)
}

View File

@@ -1,366 +0,0 @@
'use client'
/**
* Video & Chat Wizard Page
*
* Interactive learning and testing wizard for Matrix & Jitsi integration
* Migrated from website/app/admin/communication/wizard
*/
import { useState } from 'react'
import Link from 'next/link'
import {
WizardStepper,
WizardNavigation,
EducationCard,
ArchitectureContext,
TestRunner,
TestSummary,
type WizardStep,
type TestCategoryResult,
type FullTestResults,
type EducationContent,
type ArchitectureContextType,
} from '@/components/wizard'
// ==============================================
// Constants
// ==============================================
const BACKEND_URL = process.env.NEXT_PUBLIC_BACKEND_URL || 'http://localhost:8000'
const STEPS: WizardStep[] = [
{ id: 'welcome', name: 'Willkommen', icon: '👋', status: 'pending' },
{ id: 'api-health', name: 'API Status', icon: '💚', status: 'pending', category: 'api-health' },
{ id: 'matrix', name: 'Matrix', icon: '💬', status: 'pending', category: 'matrix' },
{ id: 'jitsi', name: 'Jitsi', icon: '📹', status: 'pending', category: 'jitsi' },
{ id: 'summary', name: 'Zusammenfassung', icon: '📊', status: 'pending' },
]
const EDUCATION_CONTENT: Record<string, EducationContent> = {
'welcome': {
title: 'Willkommen zum Video & Chat Wizard',
content: [
'Sichere Kommunikation ist das Rueckgrat moderner Bildungsplattformen.',
'',
'BreakPilot nutzt zwei Open-Source Systeme:',
'• Matrix Synapse: Dezentraler Messenger (Ende-zu-Ende verschluesselt)',
'• Jitsi Meet: Video-Konferenzen (WebRTC-basiert)',
'',
'Beide Systeme sind DSGVO-konform und self-hosted.',
'',
'In diesem Wizard testen wir:',
'• Matrix Homeserver und Federation',
'• Jitsi Video-Konferenz Server',
'• Integration mit der Schulverwaltung',
],
},
'api-health': {
title: 'Communication API - Backend Integration',
content: [
'Die Communication API verbindet Matrix und Jitsi mit BreakPilot.',
'',
'Funktionen:',
'• Automatische Raum-Erstellung fuer Klassen',
'• Eltern-Lehrer DM-Raeume',
'• Meeting-Planung mit Kalender-Integration',
'• Benachrichtigungen bei neuen Nachrichten',
'',
'Endpunkte:',
'• /api/v1/communication/admin/stats',
'• /api/v1/communication/admin/matrix/users',
'• /api/v1/communication/rooms',
],
},
'matrix': {
title: 'Matrix Synapse - Dezentraler Messenger',
content: [
'Matrix ist ein offenes Protokoll fuer sichere Kommunikation.',
'',
'Vorteile gegenueber WhatsApp/Teams:',
'• Ende-zu-Ende Verschluesselung (E2EE)',
'• Dezentral: Kein Single Point of Failure',
'• Federation: Kommunikation mit anderen Schulen',
'• Self-Hosted: Volle Datenkontrolle',
'',
'Raum-Typen in BreakPilot:',
'• Klassen-Info (Ankuendigungen)',
'• Elternvertreter-Raum',
'• Lehrer-Eltern DM',
'• Fachgruppen',
],
},
'jitsi': {
title: 'Jitsi Meet - Video-Konferenzen',
content: [
'Jitsi ist eine Open-Source Alternative zu Zoom/Teams.',
'',
'Features:',
'• WebRTC: Keine Software-Installation noetig',
'• Bildschirmfreigabe und Whiteboard',
'• Breakout-Raeume fuer Gruppenarbeit',
'• Aufzeichnung (optional, lokal)',
'',
'Anwendungsfaelle:',
'• Elternsprechtage (online)',
'• Fernunterricht bei Schulausfall',
'• Lehrerkonferenzen',
'• Foerdergespraeche',
],
},
'summary': {
title: 'Test-Zusammenfassung',
content: [
'Hier sehen Sie eine Uebersicht aller durchgefuehrten Tests:',
'• Matrix Homeserver Verfuegbarkeit',
'• Jitsi Server Status',
'• API-Integration',
],
},
}
const ARCHITECTURE_CONTEXTS: Record<string, ArchitectureContextType> = {
'api-health': {
layer: 'api',
services: ['backend', 'consent-service'],
dependencies: ['PostgreSQL', 'Matrix Synapse', 'Jitsi'],
dataFlow: ['Browser', 'FastAPI', 'Go Service', 'Matrix/Jitsi'],
},
'matrix': {
layer: 'service',
services: ['matrix'],
dependencies: ['PostgreSQL', 'Federation', 'TURN Server'],
dataFlow: ['Element Client', 'Matrix Synapse', 'Federation', 'PostgreSQL'],
},
'jitsi': {
layer: 'service',
services: ['jitsi'],
dependencies: ['Prosody XMPP', 'JVB', 'TURN/STUN'],
dataFlow: ['Browser', 'Nginx', 'Prosody', 'Jitsi Videobridge'],
},
}
// ==============================================
// Main Component
// ==============================================
export default function VideoChatWizardPage() {
const [currentStep, setCurrentStep] = useState(0)
const [steps, setSteps] = useState<WizardStep[]>(STEPS)
const [categoryResults, setCategoryResults] = useState<Record<string, TestCategoryResult>>({})
const [fullResults, setFullResults] = useState<FullTestResults | null>(null)
const [isLoading, setIsLoading] = useState(false)
const [error, setError] = useState<string | null>(null)
const currentStepData = steps[currentStep]
const isTestStep = currentStepData?.category !== undefined
const isWelcome = currentStepData?.id === 'welcome'
const isSummary = currentStepData?.id === 'summary'
const runCategoryTest = async (category: string) => {
setIsLoading(true)
setError(null)
try {
const response = await fetch(`${BACKEND_URL}/api/admin/communication-tests/${category}`, {
method: 'POST',
})
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`)
}
const result: TestCategoryResult = await response.json()
setCategoryResults((prev) => ({ ...prev, [category]: result }))
setSteps((prev) =>
prev.map((step) =>
step.category === category
? { ...step, status: result.failed === 0 ? 'completed' : 'failed' }
: step
)
)
} catch (err) {
setError(err instanceof Error ? err.message : 'Unbekannter Fehler')
} finally {
setIsLoading(false)
}
}
const runAllTests = async () => {
setIsLoading(true)
setError(null)
try {
const response = await fetch(`${BACKEND_URL}/api/admin/communication-tests/run-all`, {
method: 'POST',
})
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`)
}
const results: FullTestResults = await response.json()
setFullResults(results)
setSteps((prev) =>
prev.map((step) => {
if (step.category) {
const catResult = results.categories.find((c) => c.category === step.category)
if (catResult) {
return { ...step, status: catResult.failed === 0 ? 'completed' : 'failed' }
}
}
return step
})
)
const newCategoryResults: Record<string, TestCategoryResult> = {}
results.categories.forEach((cat) => {
newCategoryResults[cat.category] = cat
})
setCategoryResults(newCategoryResults)
} catch (err) {
setError(err instanceof Error ? err.message : 'Unbekannter Fehler')
} finally {
setIsLoading(false)
}
}
const goToNext = () => {
if (currentStep < steps.length - 1) {
setSteps((prev) =>
prev.map((step, idx) =>
idx === currentStep && step.status === 'pending'
? { ...step, status: 'completed' }
: step
)
)
setCurrentStep((prev) => prev + 1)
}
}
const goToPrev = () => {
if (currentStep > 0) {
setCurrentStep((prev) => prev - 1)
}
}
const handleStepClick = (index: number) => {
if (index <= currentStep || steps[index - 1]?.status !== 'pending') {
setCurrentStep(index)
}
}
return (
<div>
{/* Header */}
<div className="bg-white rounded-lg border border-slate-200 p-4 mb-6 flex items-center justify-between">
<div className="flex items-center">
<span className="text-3xl mr-3">💬</span>
<div>
<h2 className="text-lg font-bold text-gray-800">Video & Chat Test Wizard</h2>
<p className="text-sm text-gray-600">Matrix Messenger & Jitsi Video</p>
</div>
</div>
<Link href="/communication/video-chat" className="text-blue-600 hover:text-blue-800 text-sm">
&larr; Zurueck zu Video & Chat
</Link>
</div>
{/* Stepper */}
<div className="bg-white rounded-lg border border-slate-200 p-6 mb-6">
<WizardStepper steps={steps} currentStep={currentStep} onStepClick={handleStepClick} />
</div>
{/* Content */}
<div className="bg-white rounded-lg border border-slate-200 p-6">
<div className="flex items-center mb-6">
<span className="text-3xl mr-3">{currentStepData?.icon}</span>
<div>
<h2 className="text-xl font-bold text-gray-800">
Schritt {currentStep + 1}: {currentStepData?.name}
</h2>
<p className="text-gray-500 text-sm">
{currentStep + 1} von {steps.length}
</p>
</div>
</div>
<EducationCard content={EDUCATION_CONTENT[currentStepData?.id || '']} />
{isTestStep && currentStepData?.category && ARCHITECTURE_CONTEXTS[currentStepData.category] && (
<ArchitectureContext
context={ARCHITECTURE_CONTEXTS[currentStepData.category]}
currentStep={currentStepData.name}
/>
)}
{error && (
<div className="bg-red-50 border border-red-200 text-red-700 rounded-lg p-4 mb-6">
<strong>Fehler:</strong> {error}
</div>
)}
{isWelcome && (
<div className="text-center py-8">
<button
onClick={goToNext}
className="bg-blue-600 text-white px-8 py-3 rounded-lg font-medium hover:bg-blue-700 transition-colors"
>
Wizard starten
</button>
</div>
)}
{isTestStep && currentStepData?.category && (
<TestRunner
category={currentStepData.category}
categoryResult={categoryResults[currentStepData.category]}
isLoading={isLoading}
onRunTests={() => runCategoryTest(currentStepData.category!)}
/>
)}
{isSummary && (
<div>
{!fullResults ? (
<div className="text-center py-8">
<p className="text-gray-600 mb-4">
Fuehren Sie alle Tests aus um eine Zusammenfassung zu sehen.
</p>
<button
onClick={runAllTests}
disabled={isLoading}
className={`px-6 py-3 rounded-lg font-medium transition-colors ${
isLoading
? 'bg-gray-400 cursor-not-allowed'
: 'bg-blue-600 text-white hover:bg-blue-700'
}`}
>
{isLoading ? 'Alle Tests laufen...' : 'Alle Tests ausfuehren'}
</button>
</div>
) : (
<TestSummary results={fullResults} />
)}
</div>
)}
<WizardNavigation
currentStep={currentStep}
totalSteps={steps.length}
onPrev={goToPrev}
onNext={goToNext}
showNext={!isSummary}
isLoading={isLoading}
/>
</div>
<div className="text-center text-gray-500 text-sm mt-6">
Diese Tests pruefen die Matrix- und Jitsi-Integration.
Bei Fragen wenden Sie sich an das IT-Team.
</div>
</div>
)
}

View File

@@ -1,390 +0,0 @@
'use client'
/**
* GPU Infrastructure Admin Page
*
* vast.ai GPU Management for LLM Processing
*/
import { useEffect, useState, useCallback } from 'react'
import { PagePurpose } from '@/components/common/PagePurpose'
interface VastStatus {
instance_id: number | null
status: string
gpu_name: string | null
dph_total: number | null
endpoint_base_url: string | null
last_activity: string | null
auto_shutdown_in_minutes: number | null
total_runtime_hours: number | null
total_cost_usd: number | null
account_credit: number | null
account_total_spend: number | null
session_runtime_minutes: number | null
session_cost_usd: number | null
message: string | null
error?: string
}
export default function GPUInfrastructurePage() {
const [status, setStatus] = useState<VastStatus | null>(null)
const [loading, setLoading] = useState(true)
const [actionLoading, setActionLoading] = useState<string | null>(null)
const [error, setError] = useState<string | null>(null)
const [message, setMessage] = useState<string | null>(null)
const API_PROXY = '/api/admin/gpu'
const fetchStatus = useCallback(async () => {
setLoading(true)
setError(null)
try {
const response = await fetch(API_PROXY)
const data = await response.json()
if (!response.ok) {
throw new Error(data.error || `HTTP ${response.status}`)
}
setStatus(data)
} catch (err) {
setError(err instanceof Error ? err.message : 'Verbindungsfehler')
setStatus({
instance_id: null,
status: 'error',
gpu_name: null,
dph_total: null,
endpoint_base_url: null,
last_activity: null,
auto_shutdown_in_minutes: null,
total_runtime_hours: null,
total_cost_usd: null,
account_credit: null,
account_total_spend: null,
session_runtime_minutes: null,
session_cost_usd: null,
message: 'Verbindung fehlgeschlagen'
})
} finally {
setLoading(false)
}
}, [])
useEffect(() => {
fetchStatus()
}, [fetchStatus])
useEffect(() => {
const interval = setInterval(fetchStatus, 30000)
return () => clearInterval(interval)
}, [fetchStatus])
const powerOn = async () => {
setActionLoading('on')
setError(null)
setMessage(null)
try {
const response = await fetch(API_PROXY, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ action: 'on' }),
})
const data = await response.json()
if (!response.ok) {
throw new Error(data.error || data.detail || 'Aktion fehlgeschlagen')
}
setMessage('Start angefordert')
setTimeout(fetchStatus, 3000)
setTimeout(fetchStatus, 10000)
} catch (err) {
setError(err instanceof Error ? err.message : 'Fehler beim Starten')
fetchStatus()
} finally {
setActionLoading(null)
}
}
const powerOff = async () => {
setActionLoading('off')
setError(null)
setMessage(null)
try {
const response = await fetch(API_PROXY, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ action: 'off' }),
})
const data = await response.json()
if (!response.ok) {
throw new Error(data.error || data.detail || 'Aktion fehlgeschlagen')
}
setMessage('Stop angefordert')
setTimeout(fetchStatus, 3000)
setTimeout(fetchStatus, 10000)
} catch (err) {
setError(err instanceof Error ? err.message : 'Fehler beim Stoppen')
fetchStatus()
} finally {
setActionLoading(null)
}
}
const getStatusBadge = (s: string) => {
const baseClasses = 'px-3 py-1 rounded-full text-sm font-semibold uppercase'
switch (s) {
case 'running':
return `${baseClasses} bg-green-100 text-green-800`
case 'stopped':
case 'exited':
return `${baseClasses} bg-red-100 text-red-800`
case 'loading':
case 'scheduling':
case 'creating':
case 'starting...':
case 'stopping...':
return `${baseClasses} bg-yellow-100 text-yellow-800`
default:
return `${baseClasses} bg-slate-100 text-slate-600`
}
}
const getCreditColor = (credit: number | null) => {
if (credit === null) return 'text-slate-500'
if (credit < 5) return 'text-red-600'
if (credit < 15) return 'text-yellow-600'
return 'text-green-600'
}
return (
<div>
{/* Page Purpose */}
<PagePurpose
title="GPU Infrastruktur"
purpose="Verwalten Sie die vast.ai GPU-Instanzen fuer LLM-Verarbeitung und OCR. Starten/Stoppen Sie GPUs bei Bedarf und ueberwachen Sie Kosten in Echtzeit."
audience={['DevOps', 'Entwickler', 'System-Admins']}
architecture={{
services: ['vast.ai API', 'Ollama', 'VLLM'],
databases: ['PostgreSQL (Logs)'],
}}
relatedPages={[
{ name: 'Security', href: '/infrastructure/security', description: 'DevSecOps Dashboard' },
{ name: 'Builds', href: '/infrastructure/builds', description: 'CI/CD Pipeline' },
]}
collapsible={true}
defaultCollapsed={true}
/>
{/* Status Cards */}
<div className="bg-white rounded-xl border border-slate-200 p-6 mb-6">
<div className="grid grid-cols-2 md:grid-cols-3 lg:grid-cols-6 gap-6">
<div>
<div className="text-sm text-slate-500 mb-2">Status</div>
{loading ? (
<span className="px-3 py-1 rounded-full text-sm font-semibold bg-slate-100 text-slate-600">
Laden...
</span>
) : (
<span className={getStatusBadge(
actionLoading === 'on' ? 'starting...' :
actionLoading === 'off' ? 'stopping...' :
status?.status || 'unknown'
)}>
{actionLoading === 'on' ? 'starting...' :
actionLoading === 'off' ? 'stopping...' :
status?.status || 'unbekannt'}
</span>
)}
</div>
<div>
<div className="text-sm text-slate-500 mb-2">GPU</div>
<div className="font-semibold text-slate-900">
{status?.gpu_name || '-'}
</div>
</div>
<div>
<div className="text-sm text-slate-500 mb-2">Kosten/h</div>
<div className="font-semibold text-slate-900">
{status?.dph_total ? `$${status.dph_total.toFixed(3)}` : '-'}
</div>
</div>
<div>
<div className="text-sm text-slate-500 mb-2">Auto-Stop</div>
<div className="font-semibold text-slate-900">
{status && status.auto_shutdown_in_minutes !== null
? `${status.auto_shutdown_in_minutes} min`
: '-'}
</div>
</div>
<div>
<div className="text-sm text-slate-500 mb-2">Budget</div>
<div className={`font-bold text-lg ${getCreditColor(status?.account_credit ?? null)}`}>
{status && status.account_credit !== null
? `$${status.account_credit.toFixed(2)}`
: '-'}
</div>
</div>
<div>
<div className="text-sm text-slate-500 mb-2">Session</div>
<div className="font-semibold text-slate-900">
{status && status.session_runtime_minutes !== null && status.session_cost_usd !== null
? `${Math.round(status.session_runtime_minutes)} min / $${status.session_cost_usd.toFixed(3)}`
: '-'}
</div>
</div>
</div>
{/* Buttons */}
<div className="flex items-center gap-4 mt-6 pt-6 border-t border-slate-200">
<button
onClick={powerOn}
disabled={actionLoading !== null || status?.status === 'running'}
className="px-6 py-2 bg-orange-600 text-white rounded-lg font-medium hover:bg-orange-700 disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
>
Starten
</button>
<button
onClick={powerOff}
disabled={actionLoading !== null || status?.status !== 'running'}
className="px-6 py-2 bg-red-600 text-white rounded-lg font-medium hover:bg-red-700 disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
>
Stoppen
</button>
<button
onClick={fetchStatus}
disabled={loading}
className="px-4 py-2 border border-slate-300 text-slate-700 rounded-lg font-medium hover:bg-slate-50 disabled:opacity-50 transition-colors"
>
{loading ? 'Aktualisiere...' : 'Aktualisieren'}
</button>
{message && (
<span className="ml-4 text-sm text-green-600 font-medium">{message}</span>
)}
{error && (
<span className="ml-4 text-sm text-red-600 font-medium">{error}</span>
)}
</div>
</div>
{/* Extended Stats */}
<div className="grid grid-cols-1 lg:grid-cols-2 gap-6 mb-6">
<div className="bg-white rounded-xl border border-slate-200 p-6">
<h3 className="font-semibold text-slate-900 mb-4">Kosten-Uebersicht</h3>
<div className="space-y-4">
<div className="flex justify-between items-center">
<span className="text-slate-600">Session Laufzeit</span>
<span className="font-semibold">
{status && status.session_runtime_minutes !== null
? `${Math.round(status.session_runtime_minutes)} Minuten`
: '-'}
</span>
</div>
<div className="flex justify-between items-center">
<span className="text-slate-600">Session Kosten</span>
<span className="font-semibold">
{status && status.session_cost_usd !== null
? `$${status.session_cost_usd.toFixed(4)}`
: '-'}
</span>
</div>
<div className="flex justify-between items-center pt-4 border-t border-slate-100">
<span className="text-slate-600">Gesamtlaufzeit</span>
<span className="font-semibold">
{status && status.total_runtime_hours !== null
? `${status.total_runtime_hours.toFixed(1)} Stunden`
: '-'}
</span>
</div>
<div className="flex justify-between items-center">
<span className="text-slate-600">Gesamtkosten</span>
<span className="font-semibold">
{status && status.total_cost_usd !== null
? `$${status.total_cost_usd.toFixed(2)}`
: '-'}
</span>
</div>
<div className="flex justify-between items-center">
<span className="text-slate-600">vast.ai Ausgaben</span>
<span className="font-semibold">
{status && status.account_total_spend !== null
? `$${status.account_total_spend.toFixed(2)}`
: '-'}
</span>
</div>
</div>
</div>
<div className="bg-white rounded-xl border border-slate-200 p-6">
<h3 className="font-semibold text-slate-900 mb-4">Instanz-Details</h3>
<div className="space-y-4">
<div className="flex justify-between items-center">
<span className="text-slate-600">Instanz ID</span>
<span className="font-mono text-sm">
{status?.instance_id || '-'}
</span>
</div>
<div className="flex justify-between items-center">
<span className="text-slate-600">GPU</span>
<span className="font-semibold">
{status?.gpu_name || '-'}
</span>
</div>
<div className="flex justify-between items-center">
<span className="text-slate-600">Stundensatz</span>
<span className="font-semibold">
{status?.dph_total ? `$${status.dph_total.toFixed(4)}/h` : '-'}
</span>
</div>
<div className="flex justify-between items-center">
<span className="text-slate-600">Letzte Aktivitaet</span>
<span className="text-sm">
{status?.last_activity
? new Date(status.last_activity).toLocaleString('de-DE')
: '-'}
</span>
</div>
{status?.endpoint_base_url && status.status === 'running' && (
<div className="pt-4 border-t border-slate-100">
<div className="text-slate-600 text-sm mb-1">Endpoint</div>
<code className="text-xs bg-slate-100 px-2 py-1 rounded block overflow-x-auto">
{status.endpoint_base_url}
</code>
</div>
)}
</div>
</div>
</div>
{/* Info */}
<div className="bg-orange-50 border border-orange-200 rounded-xl p-4">
<div className="flex gap-3">
<svg className="w-5 h-5 text-orange-600 flex-shrink-0 mt-0.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
</svg>
<div>
<h4 className="font-semibold text-orange-900">Auto-Shutdown</h4>
<p className="text-sm text-orange-800 mt-1">
Die GPU-Instanz wird automatisch gestoppt, wenn sie laengere Zeit inaktiv ist.
Der Status wird alle 30 Sekunden automatisch aktualisiert.
</p>
</div>
</div>
</div>
</div>
)
}

View File

@@ -0,0 +1,328 @@
/**
* Tests for usePixelWordPositions hook.
*
* The hook performs pixel-based word positioning using an offscreen canvas.
* Since Canvas/getImageData is not available in jsdom, we test the pure
* computation logic by extracting and testing the algorithms directly.
*/
import { describe, it, expect } from 'vitest'
// ---------------------------------------------------------------------------
// Extract pure computation functions from the hook for testing
// ---------------------------------------------------------------------------
interface Cluster {
start: number
end: number
}
/**
* Cluster detection: find runs of dark pixels above a threshold.
* Replicates the cluster detection logic in usePixelWordPositions.
*/
function findClusters(proj: number[], ch: number, cw: number): Cluster[] {
const threshold = Math.max(1, ch * 0.03)
const minGap = Math.max(5, Math.round(cw * 0.02))
const clusters: Cluster[] = []
let inCluster = false
let clStart = 0
let gap = 0
for (let x = 0; x < cw; x++) {
if (proj[x] >= threshold) {
if (!inCluster) { clStart = x; inCluster = true }
gap = 0
} else if (inCluster) {
gap++
if (gap > minGap) {
clusters.push({ start: clStart, end: x - gap })
inCluster = false
gap = 0
}
}
}
if (inCluster) clusters.push({ start: clStart, end: cw - 1 - gap })
return clusters
}
/**
* Mirror clusters for 180° rotation.
* Replicates the rotation logic in usePixelWordPositions.
*/
function mirrorClusters(clusters: Cluster[], cw: number): Cluster[] {
return clusters.map(c => ({
start: cw - 1 - c.end,
end: cw - 1 - c.start,
})).reverse()
}
/**
* Compute fontRatio from cluster width, measured text width, and cell height.
* Replicates the font ratio calculation.
*/
function computeFontRatio(
clusterW: number,
measuredWidth: number,
refFontSize: number,
ch: number,
): number {
const autoFontPx = refFontSize * (clusterW / measuredWidth)
return Math.min(autoFontPx / ch, 1.0)
}
/**
* Mode normalization: find the most common fontRatio (bucketed to 0.02).
* Replicates the mode normalization in usePixelWordPositions.
*/
function normalizeFontRatios(ratios: number[]): number {
if (ratios.length === 0) return 0
const buckets = new Map<number, number>()
for (const r of ratios) {
const key = Math.round(r * 50) / 50
buckets.set(key, (buckets.get(key) || 0) + 1)
}
let modeRatio = ratios[0]
let modeCount = 0
for (const [ratio, count] of buckets) {
if (count > modeCount) { modeRatio = ratio; modeCount = count }
}
return modeRatio
}
/**
* Coordinate transform for 180° rotation.
*/
function transformCellCoords180(
x: number, y: number, w: number, h: number,
imgW: number, imgH: number,
): { cx: number; cy: number } {
return {
cx: Math.round((100 - x - w) / 100 * imgW),
cy: Math.round((100 - y - h) / 100 * imgH),
}
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
describe('findClusters', () => {
it('should find a single cluster', () => {
// Simulate a projection with dark pixels from x=10 to x=50
const proj = new Array(100).fill(0)
for (let x = 10; x <= 50; x++) proj[x] = 10
const clusters = findClusters(proj, 100, 100)
expect(clusters.length).toBe(1)
expect(clusters[0].start).toBe(10)
expect(clusters[0].end).toBe(50)
})
it('should find multiple clusters separated by gaps', () => {
const proj = new Array(200).fill(0)
// Two word groups with a gap between
for (let x = 10; x <= 40; x++) proj[x] = 10
for (let x = 80; x <= 120; x++) proj[x] = 10
const clusters = findClusters(proj, 100, 200)
expect(clusters.length).toBe(2)
expect(clusters[0].start).toBe(10)
expect(clusters[1].start).toBe(80)
})
it('should merge clusters with small gaps', () => {
// Gap smaller than minGap should not split clusters
const proj = new Array(100).fill(0)
for (let x = 10; x <= 30; x++) proj[x] = 10
// Small gap (3px) — minGap = max(5, 100*0.02) = 5
for (let x = 34; x <= 50; x++) proj[x] = 10
const clusters = findClusters(proj, 100, 100)
expect(clusters.length).toBe(1) // merged into one cluster
})
it('should return empty for all-white projection', () => {
const proj = new Array(100).fill(0)
const clusters = findClusters(proj, 100, 100)
expect(clusters.length).toBe(0)
})
})
describe('mirrorClusters', () => {
it('should mirror clusters for 180° rotation', () => {
const clusters: Cluster[] = [
{ start: 10, end: 50 },
{ start: 80, end: 120 },
]
const cw = 200
const mirrored = mirrorClusters(clusters, cw)
// Cluster at (10,50) → (cw-1-50, cw-1-10) = (149, 189)
// Cluster at (80,120) → (cw-1-120, cw-1-80) = (79, 119)
// After reverse: [(79,119), (149,189)]
expect(mirrored.length).toBe(2)
expect(mirrored[0]).toEqual({ start: 79, end: 119 })
expect(mirrored[1]).toEqual({ start: 149, end: 189 })
})
it('should maintain left-to-right order after mirroring', () => {
const clusters: Cluster[] = [
{ start: 5, end: 30 },
{ start: 50, end: 80 },
{ start: 100, end: 130 },
]
const mirrored = mirrorClusters(clusters, 200)
// After mirroring and reversing, order should be left-to-right
for (let i = 1; i < mirrored.length; i++) {
expect(mirrored[i].start).toBeGreaterThan(mirrored[i - 1].start)
}
})
it('should handle single cluster', () => {
const clusters: Cluster[] = [{ start: 20, end: 80 }]
const mirrored = mirrorClusters(clusters, 200)
expect(mirrored.length).toBe(1)
expect(mirrored[0]).toEqual({ start: 119, end: 179 })
})
})
describe('computeFontRatio', () => {
it('should compute ratio based on cluster vs measured width', () => {
// Cluster is 100px wide, measured text at 40px font is 200px → autoFont = 20px
// Cell height = 30px → ratio = 20/30 = 0.667
const ratio = computeFontRatio(100, 200, 40, 30)
expect(ratio).toBeCloseTo(0.667, 2)
})
it('should cap ratio at 1.0', () => {
// Very large cluster relative to measured text
const ratio = computeFontRatio(400, 100, 40, 30)
expect(ratio).toBe(1.0)
})
it('should handle small cluster width', () => {
const ratio = computeFontRatio(10, 200, 40, 30)
expect(ratio).toBeCloseTo(0.067, 2)
})
})
describe('normalizeFontRatios', () => {
it('should return the most common ratio', () => {
const ratios = [0.5, 0.5, 0.5, 0.3, 0.3, 0.7]
const mode = normalizeFontRatios(ratios)
expect(mode).toBe(0.5)
})
it('should bucket ratios to nearest 0.02', () => {
// 0.51 and 0.49 both round to 0.50 (nearest 0.02)
const ratios = [0.51, 0.49, 0.50, 0.30]
const mode = normalizeFontRatios(ratios)
expect(mode).toBe(0.50)
})
it('should handle empty array', () => {
expect(normalizeFontRatios([])).toBe(0)
})
it('should handle single ratio', () => {
expect(normalizeFontRatios([0.65])).toBe(0.66) // rounded to nearest 0.02
})
})
describe('transformCellCoords180', () => {
it('should transform cell coordinates for 180° rotation', () => {
// Cell at x=10%, y=20%, w=30%, h=5% on a 1000x2000 image
const { cx, cy } = transformCellCoords180(10, 20, 30, 5, 1000, 2000)
// Expected: cx = (100 - 10 - 30) / 100 * 1000 = 600
// cy = (100 - 20 - 5) / 100 * 2000 = 1500
expect(cx).toBe(600)
expect(cy).toBe(1500)
})
it('should handle cell at origin', () => {
const { cx, cy } = transformCellCoords180(0, 0, 50, 50, 1000, 1000)
// Expected: cx = (100 - 0 - 50) / 100 * 1000 = 500
// cy = (100 - 0 - 50) / 100 * 1000 = 500
expect(cx).toBe(500)
expect(cy).toBe(500)
})
it('should handle cell at bottom-right', () => {
const { cx, cy } = transformCellCoords180(80, 90, 20, 10, 1000, 2000)
// Expected: cx = (100 - 80 - 20) / 100 * 1000 = 0
// cy = (100 - 90 - 10) / 100 * 2000 = 0
expect(cx).toBe(0)
expect(cy).toBe(0)
})
})
describe('sub-session coordinate conversion', () => {
/**
* Test the coordinate conversion from sub-session (box-relative)
* to parent (page-absolute) coordinates.
* Replicates the logic in StepReconstruction loadSessionData.
*/
it('should convert sub-session cell coords to parent space', () => {
const imgW = 1746
const imgH = 2487
// Box zone in pixels
const box = { x: 50, y: 1145, width: 1100, height: 270 }
// Box in percent
const boxXPct = (box.x / imgW) * 100
const boxYPct = (box.y / imgH) * 100
const boxWPct = (box.width / imgW) * 100
const boxHPct = (box.height / imgH) * 100
// Sub-session cell at (10%, 20%, 80%, 15%) relative to box
const subCell = { x: 10, y: 20, w: 80, h: 15 }
const parentX = boxXPct + (subCell.x / 100) * boxWPct
const parentY = boxYPct + (subCell.y / 100) * boxHPct
const parentW = (subCell.w / 100) * boxWPct
const parentH = (subCell.h / 100) * boxHPct
// Box start in percent: x ≈ 2.86%, y ≈ 46.04%
expect(parentX).toBeCloseTo(boxXPct + 0.1 * boxWPct, 2)
expect(parentY).toBeCloseTo(boxYPct + 0.2 * boxHPct, 2)
expect(parentW).toBeCloseTo(0.8 * boxWPct, 2)
expect(parentH).toBeCloseTo(0.15 * boxHPct, 2)
// All values should be within 0-100%
expect(parentX).toBeGreaterThan(0)
expect(parentY).toBeGreaterThan(0)
expect(parentX + parentW).toBeLessThan(100)
expect(parentY + parentH).toBeLessThan(100)
})
it('should place sub-cell at box origin when sub coords are 0,0', () => {
const imgW = 1000
const imgH = 2000
const box = { x: 100, y: 500, width: 800, height: 200 }
const boxXPct = (box.x / imgW) * 100 // 10%
const boxYPct = (box.y / imgH) * 100 // 25%
const parentX = boxXPct + (0 / 100) * ((box.width / imgW) * 100)
const parentY = boxYPct + (0 / 100) * ((box.height / imgH) * 100)
expect(parentX).toBeCloseTo(10, 1)
expect(parentY).toBeCloseTo(25, 1)
})
})

View File

@@ -49,22 +49,6 @@ export const navigation: NavCategory[] = [
purpose: 'E-Mail-Konten verwalten und KI-Kategorisierung nutzen. IMAP/SMTP Konfiguration, Vorlagen und Audit-Log.',
audience: ['Support', 'Admins'],
},
{
id: 'video-chat',
name: 'Video & Chat',
href: '/communication/video-chat',
description: 'Matrix & Jitsi Monitoring',
purpose: 'Dashboard fuer Matrix Synapse und Jitsi Meet. Service-Status, aktive Meetings, Traffic-Analyse und Ressourcen-Empfehlungen.',
audience: ['Admins', 'DevOps'],
},
{
id: 'voice-service',
name: 'Voice Service',
href: '/communication/matrix',
description: 'PersonaPlex-7B & TaskOrchestrator',
purpose: 'Voice-First Interface Konfiguration und Architektur-Dokumentation. Live Demo, Task States, Intents und DSGVO-Informationen.',
audience: ['Entwickler', 'Admins'],
},
{
id: 'alerts',
name: 'Alerts Monitoring',
@@ -132,24 +116,6 @@ export const navigation: NavCategory[] = [
// -----------------------------------------------------------------------
// KI-Werkzeuge: Standalone-Tools fuer Entwicklung & QA
// -----------------------------------------------------------------------
{
id: 'ocr-compare',
name: 'OCR Vergleich',
href: '/ai/ocr-compare',
description: 'OCR-Methoden & Vokabel-Extraktion',
purpose: 'Vergleichen Sie verschiedene OCR-Methoden (lokales LLM, Vision LLM, PaddleOCR, Tesseract, Anthropic) fuer Vokabel-Extraktion. Grid-Overlay, Block-Review und LLM-Vergleich.',
audience: ['Entwickler', 'Data Scientists', 'Lehrer'],
subgroup: 'KI-Werkzeuge',
},
{
id: 'ocr-pipeline',
name: 'OCR Pipeline',
href: '/ai/ocr-pipeline',
description: 'Schrittweise Seitenrekonstruktion',
purpose: 'Schrittweise Seitenrekonstruktion: Scan begradigen, Spalten erkennen, Woerter lokalisieren und die Seite Wort fuer Wort nachbauen. 6-Schritt-Pipeline mit Ground Truth Validierung.',
audience: ['Entwickler', 'Data Scientists'],
subgroup: 'KI-Werkzeuge',
},
{
id: 'ocr-kombi',
name: 'OCR Kombi',
@@ -159,15 +125,6 @@ export const navigation: NavCategory[] = [
audience: ['Entwickler'],
subgroup: 'KI-Werkzeuge',
},
{
id: 'ocr-overlay',
name: 'OCR Overlay (Legacy)',
href: '/ai/ocr-overlay',
description: 'Ganzseitige Overlay-Rekonstruktion',
purpose: 'Arbeitsblatt ohne Spaltenerkennung direkt als Overlay rekonstruieren. Vereinfachte 7-Schritt-Pipeline.',
audience: ['Entwickler'],
subgroup: 'KI-Werkzeuge',
},
{
id: 'test-quality',
name: 'Test Quality (BQAS)',
@@ -178,16 +135,6 @@ export const navigation: NavCategory[] = [
oldAdminPath: '/admin/quality',
subgroup: 'KI-Werkzeuge',
},
{
id: 'gpu',
name: 'GPU Infrastruktur',
href: '/ai/gpu',
description: 'vast.ai GPU Management',
purpose: 'Verwalten Sie GPU-Instanzen auf vast.ai fuer ML-Training und Inferenz.',
audience: ['DevOps', 'Entwickler'],
oldAdminPath: '/admin/gpu',
subgroup: 'KI-Werkzeuge',
},
// -----------------------------------------------------------------------
// KI-Anwendungen: Endnutzer-orientierte KI-Module
// -----------------------------------------------------------------------
@@ -209,15 +156,6 @@ export const navigation: NavCategory[] = [
audience: ['Entwickler', 'QA'],
subgroup: 'KI-Werkzeuge',
},
{
id: 'model-management',
name: 'Model Management',
href: '/ai/model-management',
description: 'ONNX & PyTorch Modell-Verwaltung',
purpose: 'Verfuegbare ML-Modelle verwalten (PyTorch vs ONNX), Backend umschalten, Benchmark-Vergleiche ausfuehren und RAM/Performance-Metriken einsehen.',
audience: ['Entwickler', 'DevOps'],
subgroup: 'KI-Werkzeuge',
},
{
id: 'agents',
name: 'Agent Management',

View File

@@ -1,10 +1 @@
"""
Infrastructure management module.
Provides control plane for external GPU resources (vast.ai).
"""
from .vast_client import VastAIClient
from .vast_power import router as vast_router
__all__ = ["VastAIClient", "vast_router"]
# Infrastructure module (vast.ai GPU management removed — see git history)

View File

@@ -1,419 +0,0 @@
"""
Vast.ai REST API Client.
Verwendet die offizielle vast.ai API statt CLI fuer mehr Stabilitaet.
API Dokumentation: https://docs.vast.ai/api
"""
import asyncio
import logging
from dataclasses import dataclass, field
from datetime import datetime, timezone
from enum import Enum
from typing import Optional, Dict, Any, List
import httpx
logger = logging.getLogger(__name__)
class InstanceStatus(Enum):
"""Vast.ai Instance Status."""
RUNNING = "running"
STOPPED = "stopped"
EXITED = "exited"
LOADING = "loading"
SCHEDULING = "scheduling"
CREATING = "creating"
UNKNOWN = "unknown"
@dataclass
class AccountInfo:
"""Informationen ueber den vast.ai Account."""
credit: float # Aktuelles Guthaben in USD
balance: float # Balance (meist 0)
total_spend: float # Gesamtausgaben
username: str
email: str
has_billing: bool
@classmethod
def from_api_response(cls, data: Dict[str, Any]) -> "AccountInfo":
"""Erstellt AccountInfo aus API Response."""
return cls(
credit=data.get("credit", 0.0),
balance=data.get("balance", 0.0),
total_spend=abs(data.get("total_spend", 0.0)), # API gibt negativ zurück
username=data.get("username", ""),
email=data.get("email", ""),
has_billing=data.get("has_billing", False),
)
def to_dict(self) -> Dict[str, Any]:
"""Serialisiert zu Dictionary."""
return {
"credit": self.credit,
"balance": self.balance,
"total_spend": self.total_spend,
"username": self.username,
"email": self.email,
"has_billing": self.has_billing,
}
@dataclass
class InstanceInfo:
"""Informationen ueber eine vast.ai Instanz."""
id: int
status: InstanceStatus
machine_id: Optional[int] = None
gpu_name: Optional[str] = None
num_gpus: int = 1
gpu_ram: Optional[float] = None # GB
cpu_ram: Optional[float] = None # GB
disk_space: Optional[float] = None # GB
dph_total: Optional[float] = None # $/hour
public_ipaddr: Optional[str] = None
ports: Dict[str, Any] = field(default_factory=dict)
label: Optional[str] = None
image_uuid: Optional[str] = None
started_at: Optional[datetime] = None
@classmethod
def from_api_response(cls, data: Dict[str, Any]) -> "InstanceInfo":
"""Erstellt InstanceInfo aus API Response."""
status_map = {
"running": InstanceStatus.RUNNING,
"exited": InstanceStatus.EXITED,
"loading": InstanceStatus.LOADING,
"scheduling": InstanceStatus.SCHEDULING,
"creating": InstanceStatus.CREATING,
}
actual_status = data.get("actual_status", "unknown")
status = status_map.get(actual_status, InstanceStatus.UNKNOWN)
# Parse ports mapping
ports = {}
if "ports" in data and data["ports"]:
ports = data["ports"]
# Parse started_at
started_at = None
if "start_date" in data and data["start_date"]:
try:
started_at = datetime.fromtimestamp(data["start_date"], tz=timezone.utc)
except (ValueError, TypeError):
pass
return cls(
id=data.get("id", 0),
status=status,
machine_id=data.get("machine_id"),
gpu_name=data.get("gpu_name"),
num_gpus=data.get("num_gpus", 1),
gpu_ram=data.get("gpu_ram"),
cpu_ram=data.get("cpu_ram"),
disk_space=data.get("disk_space"),
dph_total=data.get("dph_total"),
public_ipaddr=data.get("public_ipaddr"),
ports=ports,
label=data.get("label"),
image_uuid=data.get("image_uuid"),
started_at=started_at,
)
def get_endpoint_url(self, internal_port: int = 8001) -> Optional[str]:
"""Berechnet die externe URL fuer einen internen Port."""
if not self.public_ipaddr:
return None
# vast.ai mapped interne Ports auf externe Ports
# Format: {"8001/tcp": [{"HostIp": "0.0.0.0", "HostPort": "12345"}]}
port_key = f"{internal_port}/tcp"
if port_key in self.ports:
port_info = self.ports[port_key]
if isinstance(port_info, list) and port_info:
host_port = port_info[0].get("HostPort")
if host_port:
return f"http://{self.public_ipaddr}:{host_port}"
# Fallback: Direkter Port
return f"http://{self.public_ipaddr}:{internal_port}"
def to_dict(self) -> Dict[str, Any]:
"""Serialisiert zu Dictionary."""
return {
"id": self.id,
"status": self.status.value,
"machine_id": self.machine_id,
"gpu_name": self.gpu_name,
"num_gpus": self.num_gpus,
"gpu_ram": self.gpu_ram,
"cpu_ram": self.cpu_ram,
"disk_space": self.disk_space,
"dph_total": self.dph_total,
"public_ipaddr": self.public_ipaddr,
"ports": self.ports,
"label": self.label,
"started_at": self.started_at.isoformat() if self.started_at else None,
}
class VastAIClient:
"""
Async Client fuer vast.ai REST API.
Verwendet die offizielle API unter https://console.vast.ai/api/v0/
"""
BASE_URL = "https://console.vast.ai/api/v0"
def __init__(self, api_key: str, timeout: float = 30.0):
self.api_key = api_key
self.timeout = timeout
self._client: Optional[httpx.AsyncClient] = None
async def _get_client(self) -> httpx.AsyncClient:
"""Lazy Client-Erstellung."""
if self._client is None or self._client.is_closed:
self._client = httpx.AsyncClient(
timeout=self.timeout,
headers={
"Accept": "application/json",
},
)
return self._client
async def close(self) -> None:
"""Schliesst den HTTP Client."""
if self._client and not self._client.is_closed:
await self._client.aclose()
self._client = None
def _build_url(self, endpoint: str) -> str:
"""Baut vollstaendige URL mit API Key."""
sep = "&" if "?" in endpoint else "?"
return f"{self.BASE_URL}{endpoint}{sep}api_key={self.api_key}"
async def list_instances(self) -> List[InstanceInfo]:
"""Listet alle Instanzen auf."""
client = await self._get_client()
url = self._build_url("/instances/")
try:
response = await client.get(url)
response.raise_for_status()
data = response.json()
instances = []
if "instances" in data:
for inst_data in data["instances"]:
instances.append(InstanceInfo.from_api_response(inst_data))
return instances
except httpx.HTTPStatusError as e:
logger.error(f"vast.ai API error listing instances: {e}")
raise
async def get_instance(self, instance_id: int) -> Optional[InstanceInfo]:
"""Holt Details einer spezifischen Instanz."""
client = await self._get_client()
url = self._build_url(f"/instances/{instance_id}/")
try:
response = await client.get(url)
response.raise_for_status()
data = response.json()
if "instances" in data:
instances = data["instances"]
# API gibt bei einzelner Instanz ein dict zurück, bei Liste eine Liste
if isinstance(instances, list) and instances:
return InstanceInfo.from_api_response(instances[0])
elif isinstance(instances, dict):
# Füge ID hinzu falls nicht vorhanden
if "id" not in instances:
instances["id"] = instance_id
return InstanceInfo.from_api_response(instances)
elif isinstance(data, dict) and "id" in data:
return InstanceInfo.from_api_response(data)
return None
except httpx.HTTPStatusError as e:
if e.response.status_code == 404:
return None
logger.error(f"vast.ai API error getting instance {instance_id}: {e}")
raise
async def start_instance(self, instance_id: int) -> bool:
"""Startet eine gestoppte Instanz."""
client = await self._get_client()
url = self._build_url(f"/instances/{instance_id}/")
try:
response = await client.put(
url,
json={"state": "running"},
)
response.raise_for_status()
logger.info(f"vast.ai instance {instance_id} start requested")
return True
except httpx.HTTPStatusError as e:
logger.error(f"vast.ai API error starting instance {instance_id}: {e}")
return False
async def stop_instance(self, instance_id: int) -> bool:
"""Stoppt eine laufende Instanz (haelt Disk)."""
client = await self._get_client()
url = self._build_url(f"/instances/{instance_id}/")
try:
response = await client.put(
url,
json={"state": "stopped"},
)
response.raise_for_status()
logger.info(f"vast.ai instance {instance_id} stop requested")
return True
except httpx.HTTPStatusError as e:
logger.error(f"vast.ai API error stopping instance {instance_id}: {e}")
return False
async def destroy_instance(self, instance_id: int) -> bool:
"""Loescht eine Instanz komplett (Disk weg!)."""
client = await self._get_client()
url = self._build_url(f"/instances/{instance_id}/")
try:
response = await client.delete(url)
response.raise_for_status()
logger.info(f"vast.ai instance {instance_id} destroyed")
return True
except httpx.HTTPStatusError as e:
logger.error(f"vast.ai API error destroying instance {instance_id}: {e}")
return False
async def set_label(self, instance_id: int, label: str) -> bool:
"""Setzt ein Label fuer eine Instanz."""
client = await self._get_client()
url = self._build_url(f"/instances/{instance_id}/")
try:
response = await client.put(
url,
json={"label": label},
)
response.raise_for_status()
return True
except httpx.HTTPStatusError as e:
logger.error(f"vast.ai API error setting label on instance {instance_id}: {e}")
return False
async def wait_for_status(
self,
instance_id: int,
target_status: InstanceStatus,
timeout_seconds: int = 300,
poll_interval: float = 5.0,
) -> Optional[InstanceInfo]:
"""
Wartet bis eine Instanz einen bestimmten Status erreicht.
Returns:
InstanceInfo wenn Status erreicht, None bei Timeout.
"""
deadline = asyncio.get_event_loop().time() + timeout_seconds
while asyncio.get_event_loop().time() < deadline:
instance = await self.get_instance(instance_id)
if instance and instance.status == target_status:
return instance
if instance:
logger.debug(
f"vast.ai instance {instance_id} status: {instance.status.value}, "
f"waiting for {target_status.value}"
)
await asyncio.sleep(poll_interval)
logger.warning(
f"Timeout waiting for instance {instance_id} to reach {target_status.value}"
)
return None
async def wait_for_health(
self,
instance: InstanceInfo,
health_path: str = "/health",
internal_port: int = 8001,
timeout_seconds: int = 600,
poll_interval: float = 5.0,
) -> bool:
"""
Wartet bis der Health-Endpoint erreichbar ist.
Returns:
True wenn Health OK, False bei Timeout.
"""
endpoint = instance.get_endpoint_url(internal_port)
if not endpoint:
logger.error("No endpoint URL available for health check")
return False
health_url = f"{endpoint.rstrip('/')}{health_path}"
logger.info(f"Waiting for health at {health_url}")
deadline = asyncio.get_event_loop().time() + timeout_seconds
health_client = httpx.AsyncClient(timeout=5.0)
try:
while asyncio.get_event_loop().time() < deadline:
try:
response = await health_client.get(health_url)
if 200 <= response.status_code < 300:
logger.info(f"Health check passed: {health_url}")
return True
except Exception as e:
logger.debug(f"Health check failed: {e}")
await asyncio.sleep(poll_interval)
logger.warning(f"Health check timeout: {health_url}")
return False
finally:
await health_client.aclose()
async def get_account_info(self) -> Optional[AccountInfo]:
"""
Holt Account-Informationen inkl. Credit/Budget.
Returns:
AccountInfo oder None bei Fehler.
"""
client = await self._get_client()
url = self._build_url("/users/current/")
try:
response = await client.get(url)
response.raise_for_status()
data = response.json()
return AccountInfo.from_api_response(data)
except httpx.HTTPStatusError as e:
logger.error(f"vast.ai API error getting account info: {e}")
return None
except Exception as e:
logger.error(f"Error getting vast.ai account info: {e}")
return None

View File

@@ -1,618 +0,0 @@
"""
Vast.ai Power Control API.
Stellt Endpoints bereit fuer:
- Start/Stop von vast.ai Instanzen
- Status-Abfrage
- Auto-Shutdown bei Inaktivitaet
- Kosten-Tracking
Sicherheit: Alle Endpoints erfordern CONTROL_API_KEY.
"""
import asyncio
import json
import logging
import os
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional, Dict, Any, List
from fastapi import APIRouter, Depends, HTTPException, Header, BackgroundTasks
from pydantic import BaseModel, Field
from .vast_client import VastAIClient, InstanceInfo, InstanceStatus, AccountInfo
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/infra/vast", tags=["Infrastructure"])
# -------------------------
# Configuration (ENV)
# -------------------------
VAST_API_KEY = os.getenv("VAST_API_KEY")
VAST_INSTANCE_ID = os.getenv("VAST_INSTANCE_ID") # Numeric instance ID
CONTROL_API_KEY = os.getenv("CONTROL_API_KEY") # Admin key for these endpoints
# Health check configuration
VAST_HEALTH_PORT = int(os.getenv("VAST_HEALTH_PORT", "8001"))
VAST_HEALTH_PATH = os.getenv("VAST_HEALTH_PATH", "/health")
VAST_WAIT_TIMEOUT_S = int(os.getenv("VAST_WAIT_TIMEOUT_S", "600")) # 10 min
# Auto-shutdown configuration
AUTO_SHUTDOWN_ENABLED = os.getenv("VAST_AUTO_SHUTDOWN", "true").lower() == "true"
AUTO_SHUTDOWN_MINUTES = int(os.getenv("VAST_AUTO_SHUTDOWN_MINUTES", "30"))
# State persistence (in /tmp for container compatibility)
STATE_PATH = Path(os.getenv("VAST_STATE_PATH", "/tmp/vast_state.json"))
AUDIT_PATH = Path(os.getenv("VAST_AUDIT_PATH", "/tmp/vast_audit.log"))
# -------------------------
# State Management
# -------------------------
class VastState:
"""
Persistenter State fuer vast.ai Kontrolle.
Speichert:
- Aktueller Endpunkt (weil IP sich aendern kann)
- Letzte Aktivitaet (fuer Auto-Shutdown)
- Kosten-Tracking
"""
def __init__(self, path: Path = STATE_PATH):
self.path = path
self._state: Dict[str, Any] = self._load()
def _load(self) -> Dict[str, Any]:
"""Laedt State von Disk."""
if not self.path.exists():
return {
"desired_state": None,
"endpoint_base_url": None,
"last_activity": None,
"last_start": None,
"last_stop": None,
"total_runtime_seconds": 0,
"total_cost_usd": 0.0,
}
try:
return json.loads(self.path.read_text(encoding="utf-8"))
except Exception:
return {}
def _save(self) -> None:
"""Speichert State auf Disk."""
self.path.parent.mkdir(parents=True, exist_ok=True)
self.path.write_text(
json.dumps(self._state, ensure_ascii=False, indent=2),
encoding="utf-8",
)
def get(self, key: str, default: Any = None) -> Any:
return self._state.get(key, default)
def set(self, key: str, value: Any) -> None:
self._state[key] = value
self._save()
def update(self, data: Dict[str, Any]) -> None:
self._state.update(data)
self._save()
def record_activity(self) -> None:
"""Zeichnet letzte Aktivitaet auf (fuer Auto-Shutdown)."""
self._state["last_activity"] = datetime.now(timezone.utc).isoformat()
self._save()
def get_last_activity(self) -> Optional[datetime]:
"""Gibt letzte Aktivitaet als datetime."""
ts = self._state.get("last_activity")
if ts:
return datetime.fromisoformat(ts)
return None
def record_start(self) -> None:
"""Zeichnet Start-Zeit auf."""
self._state["last_start"] = datetime.now(timezone.utc).isoformat()
self._state["desired_state"] = "RUNNING"
self._save()
def record_stop(self, dph_total: Optional[float] = None) -> None:
"""Zeichnet Stop-Zeit auf und berechnet Kosten."""
now = datetime.now(timezone.utc)
self._state["last_stop"] = now.isoformat()
self._state["desired_state"] = "STOPPED"
# Berechne Runtime und Kosten
last_start = self._state.get("last_start")
if last_start:
start_dt = datetime.fromisoformat(last_start)
runtime_seconds = (now - start_dt).total_seconds()
self._state["total_runtime_seconds"] = (
self._state.get("total_runtime_seconds", 0) + runtime_seconds
)
if dph_total:
hours = runtime_seconds / 3600
cost = hours * dph_total
self._state["total_cost_usd"] = (
self._state.get("total_cost_usd", 0.0) + cost
)
logger.info(
f"Session cost: ${cost:.3f} ({runtime_seconds/60:.1f} min @ ${dph_total}/h)"
)
self._save()
# Global state instance
_state = VastState()
# -------------------------
# Audit Logging
# -------------------------
def audit_log(event: str, actor: str = "system", meta: Optional[Dict[str, Any]] = None) -> None:
"""Schreibt Audit-Log Eintrag."""
meta = meta or {}
line = json.dumps(
{
"ts": datetime.now(timezone.utc).isoformat(),
"event": event,
"actor": actor,
"meta": meta,
},
ensure_ascii=False,
)
AUDIT_PATH.parent.mkdir(parents=True, exist_ok=True)
with AUDIT_PATH.open("a", encoding="utf-8") as f:
f.write(line + "\n")
logger.info(f"AUDIT: {event} by {actor}")
# -------------------------
# Request/Response Models
# -------------------------
class PowerOnRequest(BaseModel):
wait_for_health: bool = Field(default=True, description="Warten bis LLM bereit")
health_path: str = Field(default=VAST_HEALTH_PATH)
health_port: int = Field(default=VAST_HEALTH_PORT)
class PowerOnResponse(BaseModel):
status: str
instance_id: Optional[int] = None
endpoint_base_url: Optional[str] = None
health_url: Optional[str] = None
message: Optional[str] = None
class PowerOffRequest(BaseModel):
pass # Keine Parameter noetig
class PowerOffResponse(BaseModel):
status: str
session_runtime_minutes: Optional[float] = None
session_cost_usd: Optional[float] = None
message: Optional[str] = None
class VastStatusResponse(BaseModel):
instance_id: Optional[int] = None
status: str
gpu_name: Optional[str] = None
dph_total: Optional[float] = None
endpoint_base_url: Optional[str] = None
last_activity: Optional[str] = None
auto_shutdown_in_minutes: Optional[int] = None
total_runtime_hours: Optional[float] = None
total_cost_usd: Optional[float] = None
# Budget / Credit Informationen
account_credit: Optional[float] = None # Verbleibendes Guthaben in USD
account_total_spend: Optional[float] = None # Gesamtausgaben auf vast.ai
# Session-Kosten (seit letztem Start)
session_runtime_minutes: Optional[float] = None
session_cost_usd: Optional[float] = None
message: Optional[str] = None
class CostStatsResponse(BaseModel):
total_runtime_hours: float
total_cost_usd: float
sessions_count: int
avg_session_minutes: float
# -------------------------
# Security Dependency
# -------------------------
def require_control_key(x_api_key: Optional[str] = Header(default=None)) -> None:
"""
Admin-Schutz fuer Control-Endpoints.
Header: X-API-Key: <CONTROL_API_KEY>
"""
if not CONTROL_API_KEY:
raise HTTPException(
status_code=500,
detail="CONTROL_API_KEY not configured on server",
)
if x_api_key != CONTROL_API_KEY:
raise HTTPException(status_code=401, detail="Unauthorized")
# -------------------------
# Auto-Shutdown Background Task
# -------------------------
_shutdown_task: Optional[asyncio.Task] = None
async def auto_shutdown_monitor() -> None:
"""
Hintergrund-Task der bei Inaktivitaet die Instanz stoppt.
Laeuft permanent wenn Instanz an ist und prueft alle 60s ob
Aktivitaet stattfand. Stoppt Instanz wenn keine Aktivitaet
seit AUTO_SHUTDOWN_MINUTES.
"""
if not VAST_API_KEY or not VAST_INSTANCE_ID:
return
client = VastAIClient(VAST_API_KEY)
try:
while True:
await asyncio.sleep(60) # Check every minute
if not AUTO_SHUTDOWN_ENABLED:
continue
last_activity = _state.get_last_activity()
if not last_activity:
continue
# Berechne Inaktivitaet
now = datetime.now(timezone.utc)
inactive_minutes = (now - last_activity).total_seconds() / 60
if inactive_minutes >= AUTO_SHUTDOWN_MINUTES:
logger.info(
f"Auto-shutdown triggered: {inactive_minutes:.1f} min inactive"
)
audit_log(
"auto_shutdown",
actor="system",
meta={"inactive_minutes": inactive_minutes},
)
# Hole aktuelle Instanz-Info fuer Kosten
instance = await client.get_instance(int(VAST_INSTANCE_ID))
dph = instance.dph_total if instance else None
# Stop
await client.stop_instance(int(VAST_INSTANCE_ID))
_state.record_stop(dph_total=dph)
audit_log("auto_shutdown_complete", actor="system")
except asyncio.CancelledError:
pass
except Exception as e:
logger.error(f"Auto-shutdown monitor error: {e}")
finally:
await client.close()
def start_auto_shutdown_monitor() -> None:
"""Startet den Auto-Shutdown Monitor."""
global _shutdown_task
if _shutdown_task is None or _shutdown_task.done():
_shutdown_task = asyncio.create_task(auto_shutdown_monitor())
logger.info("Auto-shutdown monitor started")
def stop_auto_shutdown_monitor() -> None:
"""Stoppt den Auto-Shutdown Monitor."""
global _shutdown_task
if _shutdown_task and not _shutdown_task.done():
_shutdown_task.cancel()
logger.info("Auto-shutdown monitor stopped")
# -------------------------
# API Endpoints
# -------------------------
@router.get("/status", response_model=VastStatusResponse, dependencies=[Depends(require_control_key)])
async def get_status() -> VastStatusResponse:
"""
Gibt Status der vast.ai Instanz zurueck.
Inkludiert:
- Aktueller Status (running/stopped/etc)
- GPU Info und Kosten pro Stunde
- Endpoint URL
- Auto-Shutdown Timer
- Gesamtkosten
- Account Credit (verbleibendes Budget)
- Session-Kosten (seit letztem Start)
"""
if not VAST_API_KEY or not VAST_INSTANCE_ID:
return VastStatusResponse(
status="unconfigured",
message="VAST_API_KEY or VAST_INSTANCE_ID not set",
)
client = VastAIClient(VAST_API_KEY)
try:
instance = await client.get_instance(int(VAST_INSTANCE_ID))
if not instance:
return VastStatusResponse(
instance_id=int(VAST_INSTANCE_ID),
status="not_found",
message=f"Instance {VAST_INSTANCE_ID} not found",
)
# Hole Account-Info fuer Budget/Credit
account_info = await client.get_account_info()
account_credit = account_info.credit if account_info else None
account_total_spend = account_info.total_spend if account_info else None
# Update endpoint if running
endpoint = None
if instance.status == InstanceStatus.RUNNING:
endpoint = instance.get_endpoint_url(VAST_HEALTH_PORT)
if endpoint:
_state.set("endpoint_base_url", endpoint)
# Calculate auto-shutdown timer
auto_shutdown_minutes = None
if AUTO_SHUTDOWN_ENABLED and instance.status == InstanceStatus.RUNNING:
last_activity = _state.get_last_activity()
if last_activity:
inactive = (datetime.now(timezone.utc) - last_activity).total_seconds() / 60
auto_shutdown_minutes = max(0, int(AUTO_SHUTDOWN_MINUTES - inactive))
# Berechne aktuelle Session-Kosten (wenn Instanz laeuft)
session_runtime_minutes = None
session_cost_usd = None
last_start = _state.get("last_start")
# Falls Instanz laeuft aber kein last_start gesetzt (z.B. nach Container-Neustart),
# nutze start_date aus der vast.ai API falls vorhanden, sonst jetzt
if instance.status == InstanceStatus.RUNNING and not last_start:
if instance.started_at:
_state.set("last_start", instance.started_at.isoformat())
last_start = instance.started_at.isoformat()
else:
_state.record_start()
last_start = _state.get("last_start")
if last_start and instance.status == InstanceStatus.RUNNING:
start_dt = datetime.fromisoformat(last_start)
session_runtime_minutes = (datetime.now(timezone.utc) - start_dt).total_seconds() / 60
if instance.dph_total:
session_cost_usd = (session_runtime_minutes / 60) * instance.dph_total
return VastStatusResponse(
instance_id=instance.id,
status=instance.status.value,
gpu_name=instance.gpu_name,
dph_total=instance.dph_total,
endpoint_base_url=endpoint or _state.get("endpoint_base_url"),
last_activity=_state.get("last_activity"),
auto_shutdown_in_minutes=auto_shutdown_minutes,
total_runtime_hours=_state.get("total_runtime_seconds", 0) / 3600,
total_cost_usd=_state.get("total_cost_usd", 0.0),
account_credit=account_credit,
account_total_spend=account_total_spend,
session_runtime_minutes=session_runtime_minutes,
session_cost_usd=session_cost_usd,
)
finally:
await client.close()
@router.post("/power/on", response_model=PowerOnResponse, dependencies=[Depends(require_control_key)])
async def power_on(
payload: PowerOnRequest,
background_tasks: BackgroundTasks,
) -> PowerOnResponse:
"""
Startet die vast.ai Instanz.
1. Startet Instanz via API
2. Wartet auf Status RUNNING
3. Optional: Wartet auf Health-Endpoint
4. Startet Auto-Shutdown Monitor
"""
if not VAST_API_KEY or not VAST_INSTANCE_ID:
raise HTTPException(
status_code=500,
detail="VAST_API_KEY or VAST_INSTANCE_ID not configured",
)
instance_id = int(VAST_INSTANCE_ID)
audit_log("power_on_requested", meta={"instance_id": instance_id})
client = VastAIClient(VAST_API_KEY)
try:
# Start instance
success = await client.start_instance(instance_id)
if not success:
raise HTTPException(status_code=502, detail="Failed to start instance")
_state.record_start()
_state.record_activity()
# Wait for running status
instance = await client.wait_for_status(
instance_id,
InstanceStatus.RUNNING,
timeout_seconds=300,
)
if not instance:
return PowerOnResponse(
status="starting",
instance_id=instance_id,
message="Instance start requested but not yet running. Check status.",
)
# Get endpoint
endpoint = instance.get_endpoint_url(payload.health_port)
if endpoint:
_state.set("endpoint_base_url", endpoint)
# Wait for health if requested
if payload.wait_for_health:
health_ok = await client.wait_for_health(
instance,
health_path=payload.health_path,
internal_port=payload.health_port,
timeout_seconds=VAST_WAIT_TIMEOUT_S,
)
if not health_ok:
audit_log("power_on_health_timeout", meta={"instance_id": instance_id})
return PowerOnResponse(
status="running_unhealthy",
instance_id=instance_id,
endpoint_base_url=endpoint,
message=f"Instance running but health check failed at {endpoint}{payload.health_path}",
)
# Start auto-shutdown monitor
start_auto_shutdown_monitor()
audit_log("power_on_complete", meta={
"instance_id": instance_id,
"endpoint": endpoint,
})
return PowerOnResponse(
status="running",
instance_id=instance_id,
endpoint_base_url=endpoint,
health_url=f"{endpoint}{payload.health_path}" if endpoint else None,
message="Instance running and healthy",
)
finally:
await client.close()
@router.post("/power/off", response_model=PowerOffResponse, dependencies=[Depends(require_control_key)])
async def power_off(payload: PowerOffRequest) -> PowerOffResponse:
"""
Stoppt die vast.ai Instanz (behaelt Disk).
Berechnet Session-Kosten und -Laufzeit.
"""
if not VAST_API_KEY or not VAST_INSTANCE_ID:
raise HTTPException(
status_code=500,
detail="VAST_API_KEY or VAST_INSTANCE_ID not configured",
)
instance_id = int(VAST_INSTANCE_ID)
audit_log("power_off_requested", meta={"instance_id": instance_id})
# Stop auto-shutdown monitor
stop_auto_shutdown_monitor()
client = VastAIClient(VAST_API_KEY)
try:
# Get current info for cost calculation
instance = await client.get_instance(instance_id)
dph = instance.dph_total if instance else None
# Calculate session stats before updating state
session_runtime = 0.0
session_cost = 0.0
last_start = _state.get("last_start")
if last_start:
start_dt = datetime.fromisoformat(last_start)
session_runtime = (datetime.now(timezone.utc) - start_dt).total_seconds() / 60
if dph:
session_cost = (session_runtime / 60) * dph
# Stop instance
success = await client.stop_instance(instance_id)
if not success:
raise HTTPException(status_code=502, detail="Failed to stop instance")
_state.record_stop(dph_total=dph)
audit_log("power_off_complete", meta={
"instance_id": instance_id,
"session_minutes": session_runtime,
"session_cost": session_cost,
})
return PowerOffResponse(
status="stopped",
session_runtime_minutes=session_runtime,
session_cost_usd=session_cost,
message=f"Instance stopped. Session: {session_runtime:.1f} min, ${session_cost:.3f}",
)
finally:
await client.close()
@router.post("/activity", dependencies=[Depends(require_control_key)])
async def record_activity() -> Dict[str, str]:
"""
Zeichnet Aktivitaet auf (verzoegert Auto-Shutdown).
Sollte von LLM Gateway aufgerufen werden bei jedem Request.
"""
_state.record_activity()
return {"status": "recorded", "last_activity": _state.get("last_activity")}
@router.get("/costs", response_model=CostStatsResponse, dependencies=[Depends(require_control_key)])
async def get_costs() -> CostStatsResponse:
"""
Gibt Kosten-Statistiken zurueck.
"""
total_seconds = _state.get("total_runtime_seconds", 0)
total_cost = _state.get("total_cost_usd", 0.0)
# TODO: Sessions count from audit log
sessions = 1 if total_seconds > 0 else 0
avg_minutes = (total_seconds / 60 / sessions) if sessions > 0 else 0
return CostStatsResponse(
total_runtime_hours=total_seconds / 3600,
total_cost_usd=total_cost,
sessions_count=sessions,
avg_session_minutes=avg_minutes,
)
@router.get("/audit", dependencies=[Depends(require_control_key)])
async def get_audit_log(limit: int = 50) -> List[Dict[str, Any]]:
"""
Gibt letzte Audit-Log Eintraege zurueck.
"""
if not AUDIT_PATH.exists():
return []
lines = AUDIT_PATH.read_text(encoding="utf-8").strip().split("\n")
entries = []
for line in lines[-limit:]:
try:
entries.append(json.loads(line))
except json.JSONDecodeError:
continue
return list(reversed(entries)) # Neueste zuerst

View File

@@ -1,199 +0,0 @@
"""
BreakPilot Jitsi API
Ermoeglicht das Versenden von Jitsi-Meeting-Einladungen per Email.
"""
import os
import uuid
from datetime import datetime
from typing import Optional, List
from pydantic import BaseModel, Field
from fastapi import APIRouter, HTTPException
router = APIRouter(prefix="/api/jitsi", tags=["Jitsi"])
# Standard Jitsi Server (kann konfiguriert werden)
JITSI_SERVER = os.getenv("JITSI_SERVER", "https://meet.jit.si")
# ==========================================
# PYDANTIC MODELS
# ==========================================
class JitsiInvitation(BaseModel):
"""Model fuer Jitsi-Meeting-Einladung."""
to_email: str = Field(..., description="Email-Adresse des Teilnehmers")
to_name: str = Field(..., description="Name des Teilnehmers")
organizer_name: str = Field(default="BreakPilot Lehrer", description="Name des Organisators")
meeting_title: str = Field(..., description="Titel des Meetings")
meeting_date: str = Field(..., description="Datum z.B. '20. Dezember 2024'")
meeting_time: str = Field(..., description="Uhrzeit z.B. '14:00 Uhr'")
room_name: Optional[str] = Field(None, description="Raumname (wird generiert wenn leer)")
additional_info: Optional[str] = Field(None, description="Zusaetzliche Informationen")
class JitsiInvitationResponse(BaseModel):
"""Antwort auf eine Jitsi-Einladung."""
success: bool
jitsi_url: str
room_name: str
email_sent: bool
email_error: Optional[str] = None
class JitsiBulkInvitation(BaseModel):
"""Model fuer mehrere Jitsi-Einladungen."""
recipients: List[dict] = Field(..., description="Liste von {email, name} Objekten")
organizer_name: str = Field(default="BreakPilot Lehrer")
meeting_title: str
meeting_date: str
meeting_time: str
room_name: Optional[str] = None
additional_info: Optional[str] = None
class JitsiBulkResponse(BaseModel):
"""Antwort auf Bulk-Einladungen."""
jitsi_url: str
room_name: str
sent: int
failed: int
errors: List[str]
# ==========================================
# HELPER FUNCTIONS
# ==========================================
def generate_room_name() -> str:
"""Generiert einen sicheren Raumnamen."""
# UUID-basiert fuer Sicherheit
unique_id = uuid.uuid4().hex[:12]
return f"BreakPilot-{unique_id}"
def build_jitsi_url(room_name: str) -> str:
"""Erstellt die vollstaendige Jitsi-URL."""
return f"{JITSI_SERVER}/{room_name}"
# ==========================================
# API ENDPOINTS
# ==========================================
@router.post("/invite", response_model=JitsiInvitationResponse)
async def send_jitsi_invitation(invitation: JitsiInvitation):
"""
Sendet eine Jitsi-Meeting-Einladung per Email.
Der Empfaenger kann dem Meeting ueber den Browser beitreten,
ohne Matrix oder andere Software installieren zu muessen.
"""
# Raumname generieren oder verwenden
room_name = invitation.room_name or generate_room_name()
jitsi_url = build_jitsi_url(room_name)
email_sent = False
email_error = None
try:
from email_service import email_service
result = email_service.send_jitsi_invitation(
to_email=invitation.to_email,
to_name=invitation.to_name,
organizer_name=invitation.organizer_name,
meeting_title=invitation.meeting_title,
meeting_date=invitation.meeting_date,
meeting_time=invitation.meeting_time,
jitsi_url=jitsi_url,
additional_info=invitation.additional_info
)
email_sent = result.success
if not result.success:
email_error = result.error
except Exception as e:
email_error = str(e)
return JitsiInvitationResponse(
success=email_sent,
jitsi_url=jitsi_url,
room_name=room_name,
email_sent=email_sent,
email_error=email_error
)
@router.post("/invite/bulk", response_model=JitsiBulkResponse)
async def send_bulk_jitsi_invitations(bulk: JitsiBulkInvitation):
"""
Sendet Jitsi-Einladungen an mehrere Empfaenger.
Alle Empfaenger erhalten eine Einladung zum selben Meeting.
"""
# Gemeinsamer Raumname fuer alle
room_name = bulk.room_name or generate_room_name()
jitsi_url = build_jitsi_url(room_name)
sent = 0
failed = 0
errors = []
try:
from email_service import email_service
for recipient in bulk.recipients:
if not recipient.get("email"):
errors.append(f"Fehlende Email fuer {recipient.get('name', 'Unbekannt')}")
failed += 1
continue
result = email_service.send_jitsi_invitation(
to_email=recipient["email"],
to_name=recipient.get("name", ""),
organizer_name=bulk.organizer_name,
meeting_title=bulk.meeting_title,
meeting_date=bulk.meeting_date,
meeting_time=bulk.meeting_time,
jitsi_url=jitsi_url,
additional_info=bulk.additional_info
)
if result.success:
sent += 1
else:
failed += 1
errors.append(f"{recipient.get('email')}: {result.error}")
except Exception as e:
errors.append(f"Allgemeiner Fehler: {str(e)}")
return JitsiBulkResponse(
jitsi_url=jitsi_url,
room_name=room_name,
sent=sent,
failed=failed,
errors=errors[:20] # Max 20 Fehler zurueckgeben
)
@router.get("/room")
async def generate_meeting_room():
"""
Generiert einen neuen Meeting-Raum.
Gibt die URL zurueck ohne Einladungen zu senden.
"""
room_name = generate_room_name()
jitsi_url = build_jitsi_url(room_name)
return {
"room_name": room_name,
"jitsi_url": jitsi_url,
"server": JITSI_SERVER,
"created_at": datetime.utcnow().isoformat()
}

View File

@@ -40,7 +40,6 @@ os.environ["DATABASE_URL"] = DATABASE_URL
# ---------------------------------------------------------------------------
LLM_GATEWAY_ENABLED = os.getenv("LLM_GATEWAY_ENABLED", "false").lower() == "true"
ALERTS_AGENT_ENABLED = os.getenv("ALERTS_AGENT_ENABLED", "false").lower() == "true"
VAST_API_KEY = os.getenv("VAST_API_KEY")
# ---------------------------------------------------------------------------
@@ -116,15 +115,10 @@ app.include_router(unit_router) # Already has /api/units prefix
from unit_analytics_api import router as unit_analytics_router
app.include_router(unit_analytics_router) # Already has /api/analytics prefix
# --- 5. Meetings / Jitsi ---
from meetings_api import router as meetings_api_router
app.include_router(meetings_api_router) # Already has /api/meetings prefix
from recording_api import router as recording_api_router
app.include_router(recording_api_router) # Already has /api/recordings prefix
from jitsi_api import router as jitsi_router
app.include_router(jitsi_router) # Already has /api/jitsi prefix
# --- 6. Messenger ---
from messenger_api import router as messenger_router
@@ -184,11 +178,6 @@ if ALERTS_AGENT_ENABLED:
from alerts_agent.api import router as alerts_router
app.include_router(alerts_router, prefix="/api", tags=["Alerts Agent"])
# --- 14. vast.ai GPU Infrastructure (optional) ---
if VAST_API_KEY:
from infra.vast_power import router as vast_router
app.include_router(vast_router, tags=["GPU Infrastructure"])
# ---------------------------------------------------------------------------
# Middleware (from shared middleware/ package)

View File

@@ -1,443 +0,0 @@
"""
Meetings API Module
Backend API endpoints for Jitsi Meet integration
"""
import os
import uuid
import httpx
from datetime import datetime, timedelta
from typing import Optional, List
from fastapi import APIRouter, HTTPException, Depends
from pydantic import BaseModel, EmailStr
router = APIRouter(prefix="/api/meetings", tags=["meetings"])
# ============================================
# Configuration
# ============================================
JITSI_BASE_URL = os.getenv("JITSI_PUBLIC_URL", "http://localhost:8443")
CONSENT_SERVICE_URL = os.getenv("CONSENT_SERVICE_URL", "http://localhost:8081")
# ============================================
# Models
# ============================================
class MeetingConfig(BaseModel):
enable_lobby: bool = True
enable_recording: bool = False
start_with_audio_muted: bool = True
start_with_video_muted: bool = False
require_display_name: bool = True
enable_breakout: bool = False
class CreateMeetingRequest(BaseModel):
type: str = "quick" # quick, scheduled, training, parent, class
title: str = "Neues Meeting"
duration: int = 60
scheduled_at: Optional[str] = None
config: Optional[MeetingConfig] = None
description: Optional[str] = None
invites: Optional[List[str]] = None
class ScheduleMeetingRequest(BaseModel):
title: str
scheduled_at: str
duration: int = 60
description: Optional[str] = None
invites: Optional[List[str]] = None
class TrainingRequest(BaseModel):
title: str
description: Optional[str] = None
scheduled_at: str
duration: int = 120
max_participants: int = 20
trainer: str
config: Optional[MeetingConfig] = None
class ParentTeacherRequest(BaseModel):
student_name: str
parent_name: str
parent_email: Optional[str] = None
scheduled_at: str
reason: Optional[str] = None
send_invite: bool = True
duration: int = 30
class MeetingResponse(BaseModel):
room_name: str
join_url: str
moderator_url: Optional[str] = None
password: Optional[str] = None
expires_at: Optional[str] = None
class MeetingStats(BaseModel):
active: int = 0
scheduled: int = 0
recordings: int = 0
participants: int = 0
class ActiveMeeting(BaseModel):
room_name: str
title: str
participants: int
started_at: str
# ============================================
# In-Memory Storage (for demo purposes)
# In production, use database
# ============================================
scheduled_meetings = []
active_meetings = []
trainings = []
recordings = []
# ============================================
# Helper Functions
# ============================================
def generate_room_name(prefix: str = "meeting") -> str:
"""Generate a unique room name"""
return f"{prefix}-{uuid.uuid4().hex[:8]}"
def generate_password() -> str:
"""Generate a simple password"""
return uuid.uuid4().hex[:8]
def build_jitsi_url(room_name: str, config: Optional[MeetingConfig] = None) -> str:
"""Build Jitsi meeting URL with config parameters"""
params = []
if config:
if config.start_with_audio_muted:
params.append("config.startWithAudioMuted=true")
if config.start_with_video_muted:
params.append("config.startWithVideoMuted=true")
if config.require_display_name:
params.append("config.requireDisplayName=true")
# Common config
params.extend([
"config.prejoinPageEnabled=false",
"config.disableDeepLinking=true",
"config.defaultLanguage=de",
"interfaceConfig.SHOW_JITSI_WATERMARK=false",
"interfaceConfig.SHOW_BRAND_WATERMARK=false"
])
url = f"{JITSI_BASE_URL}/{room_name}"
if params:
url += "#" + "&".join(params)
return url
async def call_consent_service(endpoint: str, method: str = "GET", data: dict = None) -> dict:
"""Call the consent service API"""
async with httpx.AsyncClient() as client:
url = f"{CONSENT_SERVICE_URL}{endpoint}"
if method == "GET":
response = await client.get(url)
elif method == "POST":
response = await client.post(url, json=data)
else:
raise ValueError(f"Unsupported method: {method}")
if response.status_code >= 400:
return None
return response.json()
# ============================================
# API Endpoints
# ============================================
@router.get("/stats", response_model=MeetingStats)
async def get_meeting_stats():
"""Get meeting statistics"""
return MeetingStats(
active=len(active_meetings),
scheduled=len(scheduled_meetings),
recordings=len(recordings),
participants=sum(m.get("participants", 0) for m in active_meetings)
)
@router.get("/active", response_model=List[ActiveMeeting])
async def get_active_meetings():
"""Get list of active meetings"""
return [
ActiveMeeting(
room_name=m["room_name"],
title=m["title"],
participants=m.get("participants", 0),
started_at=m.get("started_at", datetime.now().isoformat())
)
for m in active_meetings
]
@router.post("/create", response_model=MeetingResponse)
async def create_meeting(request: CreateMeetingRequest):
"""Create a new meeting"""
config = request.config or MeetingConfig()
# Generate room name based on type
if request.type == "quick":
room_name = generate_room_name("quick")
elif request.type == "training":
room_name = generate_room_name("schulung")
elif request.type == "parent":
room_name = generate_room_name("elterngespraech")
elif request.type == "class":
room_name = generate_room_name("klasse")
else:
room_name = generate_room_name("meeting")
join_url = build_jitsi_url(room_name, config)
# Store meeting if scheduled
if request.scheduled_at:
scheduled_meetings.append({
"room_name": room_name,
"title": request.title,
"scheduled_at": request.scheduled_at,
"duration": request.duration,
"config": config.model_dump() if config else None
})
return MeetingResponse(
room_name=room_name,
join_url=join_url
)
@router.post("/schedule", response_model=MeetingResponse)
async def schedule_meeting(request: ScheduleMeetingRequest):
"""Schedule a new meeting"""
room_name = generate_room_name("meeting")
meeting = {
"room_name": room_name,
"title": request.title,
"scheduled_at": request.scheduled_at,
"duration": request.duration,
"description": request.description,
"invites": request.invites or []
}
scheduled_meetings.append(meeting)
join_url = build_jitsi_url(room_name)
# TODO: Send email invites if configured
return MeetingResponse(
room_name=room_name,
join_url=join_url
)
@router.post("/training", response_model=MeetingResponse)
async def create_training(request: TrainingRequest):
"""Create a training session"""
# Generate room name from title
title_slug = request.title.lower().replace(" ", "-")[:20]
room_name = f"schulung-{title_slug}-{uuid.uuid4().hex[:4]}"
config = request.config or MeetingConfig(
enable_lobby=True,
enable_recording=True,
start_with_audio_muted=True
)
training = {
"room_name": room_name,
"title": request.title,
"description": request.description,
"scheduled_at": request.scheduled_at,
"duration": request.duration,
"max_participants": request.max_participants,
"trainer": request.trainer,
"config": config.model_dump()
}
trainings.append(training)
scheduled_meetings.append(training)
join_url = build_jitsi_url(room_name, config)
return MeetingResponse(
room_name=room_name,
join_url=join_url
)
@router.post("/parent-teacher", response_model=MeetingResponse)
async def create_parent_teacher_meeting(request: ParentTeacherRequest):
"""Create a parent-teacher meeting"""
# Generate room name with student name and date
student_slug = request.student_name.lower().replace(" ", "-")[:15]
date_str = datetime.fromisoformat(request.scheduled_at).strftime("%Y%m%d-%H%M")
room_name = f"elterngespraech-{student_slug}-{date_str}"
# Generate password for security
password = generate_password()
config = MeetingConfig(
enable_lobby=True,
enable_recording=False,
start_with_audio_muted=False
)
meeting = {
"room_name": room_name,
"title": f"Elterngespräch - {request.student_name}",
"student_name": request.student_name,
"parent_name": request.parent_name,
"parent_email": request.parent_email,
"scheduled_at": request.scheduled_at,
"duration": request.duration,
"reason": request.reason,
"password": password,
"config": config.model_dump()
}
scheduled_meetings.append(meeting)
join_url = build_jitsi_url(room_name, config)
# TODO: Send email invite to parents if configured
return MeetingResponse(
room_name=room_name,
join_url=join_url,
password=password
)
@router.get("/scheduled")
async def get_scheduled_meetings():
"""Get all scheduled meetings"""
return scheduled_meetings
@router.get("/trainings")
async def get_trainings():
"""Get all training sessions"""
return trainings
@router.delete("/{room_name}")
async def delete_meeting(room_name: str):
"""Delete a scheduled meeting"""
# Find and remove the meeting (in-place modification)
for i, m in enumerate(scheduled_meetings):
if m["room_name"] == room_name:
scheduled_meetings.pop(i)
break
return {"status": "deleted"}
# ============================================
# Recording Endpoints
# ============================================
@router.get("/recordings")
async def get_recordings():
"""Get list of recordings"""
# Demo data
return [
{
"id": "docker-basics",
"title": "Docker Grundlagen Schulung",
"date": "2025-12-10T10:00:00",
"duration": "1:30:00",
"size_mb": 156,
"participants": 15
},
{
"id": "team-kw49",
"title": "Team-Meeting KW 49",
"date": "2025-12-06T14:00:00",
"duration": "1:00:00",
"size_mb": 98,
"participants": 8
},
{
"id": "parent-mueller",
"title": "Elterngespräch - Max Müller",
"date": "2025-12-02T16:00:00",
"duration": "0:28:00",
"size_mb": 42,
"participants": 2
}
]
@router.get("/recordings/{recording_id}")
async def get_recording(recording_id: str):
"""Get recording details"""
return {
"id": recording_id,
"title": "Recording " + recording_id,
"date": "2025-12-10T10:00:00",
"duration": "1:30:00",
"size_mb": 156,
"download_url": f"/api/recordings/{recording_id}/download"
}
@router.get("/recordings/{recording_id}/download")
async def download_recording(recording_id: str):
"""Download a recording"""
# In production, this would stream the actual file
raise HTTPException(status_code=404, detail="Recording file not found (demo mode)")
@router.delete("/recordings/{recording_id}")
async def delete_recording(recording_id: str):
"""Delete a recording"""
return {"status": "deleted", "id": recording_id}
# ============================================
# Health Check
# ============================================
@router.get("/health")
async def health_check():
"""Check meetings service health"""
# Check Jitsi availability
jitsi_healthy = False
try:
async with httpx.AsyncClient(timeout=5.0) as client:
response = await client.get(JITSI_BASE_URL)
jitsi_healthy = response.status_code == 200
except Exception:
pass
return {
"status": "healthy" if jitsi_healthy else "degraded",
"jitsi_url": JITSI_BASE_URL,
"jitsi_available": jitsi_healthy,
"scheduled_meetings": len(scheduled_meetings),
"active_meetings": len(active_meetings)
}

View File

@@ -1,320 +0,0 @@
#!/usr/bin/env python3
"""
vast.ai Profile Extractor Script
Dieses Skript läuft auf vast.ai und extrahiert Profildaten von Universitäts-Webseiten.
Verwendung auf vast.ai:
1. Lade dieses Skript auf deine vast.ai Instanz
2. Installiere Abhängigkeiten: pip install requests beautifulsoup4 openai
3. Setze Umgebungsvariablen:
- BREAKPILOT_API_URL=http://deine-ip:8086
- BREAKPILOT_API_KEY=dev-key
- OPENAI_API_KEY=sk-...
4. Starte: python vast_ai_extractor.py
"""
import os
import sys
import json
import time
import logging
import requests
from bs4 import BeautifulSoup
from typing import Optional, Dict, Any, List
# Logging Setup
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Configuration
API_URL = os.environ.get('BREAKPILOT_API_URL', 'http://localhost:8086')
API_KEY = os.environ.get('BREAKPILOT_API_KEY', 'dev-key')
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY', '')
BATCH_SIZE = 10
SLEEP_BETWEEN_REQUESTS = 1 # Sekunden zwischen Requests (respektiere rate limits)
def fetch_pending_profiles(limit: int = 50) -> List[Dict]:
"""Hole Profile die noch extrahiert werden müssen."""
try:
response = requests.get(
f"{API_URL}/api/v1/ai/extraction/pending",
params={"limit": limit},
headers={"Authorization": f"Bearer {API_KEY}"},
timeout=30
)
response.raise_for_status()
data = response.json()
return data.get("tasks", [])
except Exception as e:
logger.error(f"Fehler beim Abrufen der Profile: {e}")
return []
def fetch_profile_page(url: str) -> Optional[str]:
"""Lade den HTML-Inhalt einer Profilseite."""
try:
headers = {
'User-Agent': 'Mozilla/5.0 (compatible; BreakPilot-Crawler/1.0; +https://breakpilot.de)',
'Accept': 'text/html,application/xhtml+xml',
'Accept-Language': 'de-DE,de;q=0.9,en;q=0.8',
}
response = requests.get(url, headers=headers, timeout=30)
response.raise_for_status()
return response.text
except Exception as e:
logger.error(f"Fehler beim Laden von {url}: {e}")
return None
def extract_with_beautifulsoup(html: str, url: str) -> Dict[str, Any]:
"""Extrahiere Basis-Informationen mit BeautifulSoup (ohne AI)."""
soup = BeautifulSoup(html, 'html.parser')
data = {}
# Email suchen
email_links = soup.find_all('a', href=lambda x: x and x.startswith('mailto:'))
if email_links:
email = email_links[0]['href'].replace('mailto:', '').split('?')[0]
data['email'] = email
# Telefon suchen
phone_links = soup.find_all('a', href=lambda x: x and x.startswith('tel:'))
if phone_links:
data['phone'] = phone_links[0]['href'].replace('tel:', '')
# ORCID suchen
orcid_links = soup.find_all('a', href=lambda x: x and 'orcid.org' in x)
if orcid_links:
orcid = orcid_links[0]['href']
# Extrahiere ORCID ID
if '/' in orcid:
data['orcid'] = orcid.split('/')[-1]
# Google Scholar suchen
scholar_links = soup.find_all('a', href=lambda x: x and 'scholar.google' in x)
if scholar_links:
href = scholar_links[0]['href']
if 'user=' in href:
data['google_scholar_id'] = href.split('user=')[1].split('&')[0]
# ResearchGate suchen
rg_links = soup.find_all('a', href=lambda x: x and 'researchgate.net' in x)
if rg_links:
data['researchgate_url'] = rg_links[0]['href']
# LinkedIn suchen
linkedin_links = soup.find_all('a', href=lambda x: x and 'linkedin.com' in x)
if linkedin_links:
data['linkedin_url'] = linkedin_links[0]['href']
# Institut/Abteilung Links sammeln (für Hierarchie-Erkennung)
base_domain = '/'.join(url.split('/')[:3])
department_links = []
for link in soup.find_all('a', href=True):
href = link['href']
text = link.get_text(strip=True)
# Suche nach Links die auf Institute/Fakultäten hindeuten
if any(kw in text.lower() for kw in ['institut', 'fakultät', 'fachbereich', 'abteilung', 'lehrstuhl']):
if href.startswith('/'):
href = base_domain + href
if href.startswith('http'):
department_links.append({'url': href, 'name': text})
if department_links:
# Nimm den ersten gefundenen Department-Link
data['department_url'] = department_links[0]['url']
data['department_name'] = department_links[0]['name']
return data
def extract_with_ai(html: str, url: str, full_name: str) -> Dict[str, Any]:
"""Extrahiere strukturierte Daten mit OpenAI GPT."""
if not OPENAI_API_KEY:
logger.warning("Kein OPENAI_API_KEY gesetzt - nutze nur BeautifulSoup")
return extract_with_beautifulsoup(html, url)
try:
import openai
client = openai.OpenAI(api_key=OPENAI_API_KEY)
# Reduziere HTML auf relevanten Text
soup = BeautifulSoup(html, 'html.parser')
# Entferne Scripts, Styles, etc.
for tag in soup(['script', 'style', 'nav', 'footer', 'header']):
tag.decompose()
# Extrahiere Text
text = soup.get_text(separator='\n', strip=True)
# Limitiere auf 8000 Zeichen für API
text = text[:8000]
prompt = f"""Analysiere diese Universitäts-Profilseite für {full_name} und extrahiere folgende Informationen im JSON-Format:
{{
"email": "email@uni.de oder null",
"phone": "Telefonnummer oder null",
"office": "Raum/Büro oder null",
"position": "Position/Titel (z.B. Wissenschaftlicher Mitarbeiter, Professorin) oder null",
"department_name": "Name des Instituts/der Abteilung oder null",
"research_interests": ["Liste", "der", "Forschungsthemen"] oder [],
"teaching_topics": ["Liste", "der", "Lehrveranstaltungen/Fächer"] oder [],
"supervisor_name": "Name des Vorgesetzten/Lehrstuhlinhabers falls erkennbar oder null"
}}
Profilseite von {url}:
{text}
Antworte NUR mit dem JSON-Objekt, keine Erklärungen."""
response = client.chat.completions.create(
model="gpt-4o-mini", # Kostengünstig und schnell
messages=[{"role": "user", "content": prompt}],
temperature=0.1,
max_tokens=500
)
result_text = response.choices[0].message.content.strip()
# Parse JSON (entferne eventuelle Markdown-Blöcke)
if result_text.startswith('```'):
result_text = result_text.split('```')[1]
if result_text.startswith('json'):
result_text = result_text[4:]
ai_data = json.loads(result_text)
# Kombiniere mit BeautifulSoup-Ergebnissen (für Links wie ORCID)
bs_data = extract_with_beautifulsoup(html, url)
# AI-Daten haben Priorität, aber BS-Daten für spezifische Links
for key in ['orcid', 'google_scholar_id', 'researchgate_url', 'linkedin_url']:
if key in bs_data and bs_data[key]:
ai_data[key] = bs_data[key]
return ai_data
except Exception as e:
logger.error(f"AI-Extraktion fehlgeschlagen: {e}")
return extract_with_beautifulsoup(html, url)
def submit_extracted_data(staff_id: str, data: Dict[str, Any]) -> bool:
"""Sende extrahierte Daten zurück an BreakPilot."""
try:
payload = {"staff_id": staff_id, **data}
# Entferne None-Werte
payload = {k: v for k, v in payload.items() if v is not None}
response = requests.post(
f"{API_URL}/api/v1/ai/extraction/submit",
json=payload,
headers={
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
},
timeout=30
)
response.raise_for_status()
return True
except Exception as e:
logger.error(f"Fehler beim Senden der Daten für {staff_id}: {e}")
return False
def process_profiles():
"""Hauptschleife: Hole Profile, extrahiere Daten, sende zurück."""
logger.info(f"Starte Extraktion - API: {API_URL}")
processed = 0
errors = 0
while True:
# Hole neue Profile
profiles = fetch_pending_profiles(limit=BATCH_SIZE)
if not profiles:
logger.info("Keine weiteren Profile zum Verarbeiten. Warte 60 Sekunden...")
time.sleep(60)
continue
logger.info(f"Verarbeite {len(profiles)} Profile...")
for profile in profiles:
staff_id = profile['staff_id']
url = profile['profile_url']
full_name = profile.get('full_name', 'Unbekannt')
logger.info(f"Verarbeite: {full_name} - {url}")
# Lade Profilseite
html = fetch_profile_page(url)
if not html:
errors += 1
continue
# Extrahiere Daten
extracted = extract_with_ai(html, url, full_name)
if extracted:
# Sende zurück
if submit_extracted_data(staff_id, extracted):
processed += 1
logger.info(f"Erfolgreich: {full_name} - Email: {extracted.get('email', 'N/A')}")
else:
errors += 1
else:
errors += 1
# Rate limiting
time.sleep(SLEEP_BETWEEN_REQUESTS)
logger.info(f"Batch abgeschlossen. Gesamt: {processed} erfolgreich, {errors} Fehler")
def main():
"""Einstiegspunkt."""
logger.info("=" * 60)
logger.info("BreakPilot vast.ai Profile Extractor")
logger.info("=" * 60)
# Prüfe Konfiguration
if not API_KEY:
logger.error("BREAKPILOT_API_KEY nicht gesetzt!")
sys.exit(1)
if not OPENAI_API_KEY:
logger.warning("OPENAI_API_KEY nicht gesetzt - nutze nur BeautifulSoup-Extraktion")
# Teste Verbindung
try:
response = requests.get(
f"{API_URL}/v1/health",
headers={"Authorization": f"Bearer {API_KEY}"},
timeout=10
)
logger.info(f"API-Verbindung OK: {response.status_code}")
except Exception as e:
logger.error(f"Kann API nicht erreichen: {e}")
logger.error(f"Stelle sicher dass {API_URL} erreichbar ist!")
sys.exit(1)
# Starte Verarbeitung
try:
process_profiles()
except KeyboardInterrupt:
logger.info("Beendet durch Benutzer")
except Exception as e:
logger.error(f"Unerwarteter Fehler: {e}")
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,119 @@
"""
LightOnOCR-2-1B Service
End-to-end VLM OCR fuer gedruckten und gemischten Text.
1B Parameter, Apple MPS-faehig (M-Serie).
Modell: lightonai/LightOnOCR-2-1B
Lizenz: Apache 2.0
Quelle: https://huggingface.co/lightonai/LightOnOCR-2-1B
Unterstuetzte Dokumenttypen:
- Buchseiten, Vokabelseiten
- Arbeitsblaetter, Klausuren
- Gemischt gedruckt/handschriftlich
DATENSCHUTZ: Alle Verarbeitung erfolgt lokal.
"""
import io
import logging
import os
from typing import Optional, Tuple
logger = logging.getLogger(__name__)
LIGHTON_MODEL_ID = os.getenv("LIGHTON_OCR_MODEL", "lightonai/LightOnOCR-2-1B")
_lighton_model = None
_lighton_processor = None
_lighton_available: Optional[bool] = None
def _check_lighton_available() -> bool:
"""Check if LightOnOCR dependencies (transformers, torch) are available."""
global _lighton_available
if _lighton_available is not None:
return _lighton_available
try:
from transformers import AutoModelForImageTextToText, AutoProcessor # noqa: F401
import torch # noqa: F401
_lighton_available = True
except ImportError as e:
logger.warning(f"LightOnOCR deps not available: {e}")
_lighton_available = False
return _lighton_available
def get_lighton_model() -> Tuple:
"""
Lazy-load LightOnOCR-2-1B processor and model.
Returns (processor, model) or (None, None) on failure.
Device priority: MPS (Apple Silicon) > CUDA > CPU.
"""
global _lighton_model, _lighton_processor
if _lighton_model is not None:
return _lighton_processor, _lighton_model
if not _check_lighton_available():
return None, None
try:
import torch
from transformers import AutoModelForImageTextToText, AutoProcessor
if torch.backends.mps.is_available():
device = "mps"
elif torch.cuda.is_available():
device = "cuda"
else:
device = "cpu"
dtype = torch.bfloat16
logger.info(f"Loading LightOnOCR-2-1B on {device} ({dtype}) from {LIGHTON_MODEL_ID} ...")
_lighton_processor = AutoProcessor.from_pretrained(LIGHTON_MODEL_ID)
_lighton_model = AutoModelForImageTextToText.from_pretrained(
LIGHTON_MODEL_ID, torch_dtype=dtype
).to(device)
_lighton_model.eval()
logger.info("LightOnOCR-2-1B loaded successfully")
except Exception as e:
logger.error(f"Failed to load LightOnOCR-2-1B: {e}")
_lighton_model = None
_lighton_processor = None
return _lighton_processor, _lighton_model
def run_lighton_ocr_sync(image_bytes: bytes) -> Optional[str]:
"""
Run LightOnOCR on image bytes (synchronous).
Returns extracted text or None on error.
Generic — works for any document/page region.
"""
processor, model = get_lighton_model()
if processor is None or model is None:
return None
try:
import torch
from PIL import Image as _PILImage
pil_img = _PILImage.open(io.BytesIO(image_bytes)).convert("RGB")
conversation = [{"role": "user", "content": [{"type": "image"}]}]
inputs = processor.apply_chat_template(
conversation, images=[pil_img],
add_generation_prompt=True, return_tensors="pt"
).to(model.device)
with torch.no_grad():
output_ids = model.generate(**inputs, max_new_tokens=1024)
text = processor.decode(output_ids[0], skip_special_tokens=True)
return text.strip() if text else None
except Exception as e:
logger.error(f"LightOnOCR inference failed: {e}")
return None

View File

@@ -0,0 +1,100 @@
#!/usr/bin/env python3
"""Debug script: analyze text line slopes on deskewed image to determine true residual shear."""
import sys, math, asyncio
sys.path.insert(0, "/app/backend")
import cv2
import numpy as np
import pytesseract
from ocr_pipeline_session_store import get_session_db
SESSION_ID = "3dcb1897-09a6-4b80-91b5-7e4207980bf3"
async def main():
s = await get_session_db(SESSION_ID)
if not s:
print("Session not found")
return
deskewed_png = s.get("deskewed_png")
if not deskewed_png:
print("No deskewed_png stored")
return
arr = np.frombuffer(deskewed_png, dtype=np.uint8)
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
h, w = img.shape[:2]
print(f"Deskewed image: {w}x{h}")
# Detect text line slopes using Tesseract word positions
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
data = pytesseract.image_to_data(gray, output_type=pytesseract.Output.DICT, config="--psm 6")
lines = {}
for i in range(len(data["text"])):
txt = (data["text"][i] or "").strip()
if len(txt) < 2 or data["conf"][i] < 30:
continue
key = (data["block_num"][i], data["par_num"][i], data["line_num"][i])
cx = data["left"][i] + data["width"][i] / 2
cy = data["top"][i] + data["height"][i] / 2
if key not in lines:
lines[key] = []
lines[key].append((cx, cy))
slopes = []
for key, pts in lines.items():
if len(pts) < 3:
continue
pts.sort(key=lambda p: p[0])
xs = np.array([p[0] for p in pts])
ys = np.array([p[1] for p in pts])
if xs[-1] - xs[0] < w * 0.2:
continue
A = np.vstack([xs, np.ones(len(xs))]).T
result = np.linalg.lstsq(A, ys, rcond=None)
slope = result[0][0]
angle_deg = math.degrees(math.atan(slope))
slopes.append(angle_deg)
if not slopes:
print("No text lines detected")
return
median_slope = sorted(slopes)[len(slopes) // 2]
mean_slope = sum(slopes) / len(slopes)
print(f"Text lines found: {len(slopes)}")
print(f"Median slope: {median_slope:.4f} deg")
print(f"Mean slope: {mean_slope:.4f} deg")
print(f"Range: [{min(slopes):.4f}, {max(slopes):.4f}]")
print()
print("Individual line slopes:")
for s in sorted(slopes):
print(f" {s:+.4f}")
# Also test the 4 dewarp methods directly
print("\n--- Dewarp method results on deskewed image ---")
from cv_vocab_pipeline import (
_detect_shear_angle, _detect_shear_by_projection,
_detect_shear_by_hough, _detect_shear_by_text_lines,
)
for name, fn in [
("vertical_edge", _detect_shear_angle),
("projection", _detect_shear_by_projection),
("hough_lines", _detect_shear_by_hough),
("text_lines", _detect_shear_by_text_lines),
]:
r = fn(img)
print(f" {name}: shear={r['shear_degrees']:.4f} conf={r['confidence']:.3f}")
# The user says "right side needs to come down 3mm"
# For a ~85mm wide image (1002px at ~300DPI), 3mm ~ 35px
# shear angle = atan(35 / 1556) ~ 1.29 degrees
# Let's check: what does the image look like if we apply 0.5, 1.0, 1.5 deg shear?
print("\n--- Pixel shift at right edge for various shear angles ---")
for deg in [0.5, 0.8, 1.0, 1.3, 1.5, 2.0]:
shift_px = h * math.tan(math.radians(deg))
shift_mm = shift_px / (w / 85.0) # approximate mm
print(f" {deg:.1f} deg -> {shift_px:.0f}px shift -> ~{shift_mm:.1f}mm")
asyncio.run(main())

View File

@@ -0,0 +1,256 @@
"""
Tests for box boundary row filtering logic (box_ranges_inner).
Verifies that rows at the border of box zones are NOT excluded during
row detection and word filtering. This prevents the last row above a
box from being clipped by the box's border pixels.
Related fix in ocr_pipeline_api.py: detect_rows() and detect_words()
use box_ranges_inner (shrunk by border_thickness, min 5px) instead of
full box_ranges for row exclusion.
"""
import pytest
import numpy as np
from dataclasses import dataclass
# ---------------------------------------------------------------------------
# Simulate the box_ranges_inner calculation from ocr_pipeline_api.py
# ---------------------------------------------------------------------------
def compute_box_ranges(zones: list[dict]) -> tuple[list, list]:
"""
Replicates the box_ranges / box_ranges_inner calculation
from detect_rows() in ocr_pipeline_api.py.
"""
box_ranges = []
box_ranges_inner = []
for zone in zones:
if zone.get("zone_type") == "box" and zone.get("box"):
box = zone["box"]
bt = max(box.get("border_thickness", 0), 5) # minimum 5px margin
box_ranges.append((box["y"], box["y"] + box["height"]))
box_ranges_inner.append((box["y"] + bt, box["y"] + box["height"] - bt))
return box_ranges, box_ranges_inner
def build_content_strips(box_ranges_inner: list, top_y: int, bottom_y: int) -> list:
"""
Replicates the content_strips calculation from detect_rows() in ocr_pipeline_api.py.
"""
sorted_boxes = sorted(box_ranges_inner, key=lambda r: r[0])
content_strips = []
strip_start = top_y
for by_start, by_end in sorted_boxes:
if by_start > strip_start:
content_strips.append((strip_start, by_start))
strip_start = max(strip_start, by_end)
if strip_start < bottom_y:
content_strips.append((strip_start, bottom_y))
return [(ys, ye) for ys, ye in content_strips if ye - ys >= 20]
def row_in_box(row_y: int, row_height: int, box_ranges_inner: list) -> bool:
"""
Replicates the _row_in_box filter from detect_words() in ocr_pipeline_api.py.
"""
center_y = row_y + row_height / 2
return any(by_s <= center_y < by_e for by_s, by_e in box_ranges_inner)
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
class TestBoxRangesInner:
"""Tests for box_ranges_inner calculation."""
def test_border_thickness_shrinks_inner_range(self):
"""Inner range should be shrunk by border_thickness."""
zones = [{
"zone_type": "box",
"box": {"x": 50, "y": 500, "width": 1100, "height": 200, "border_thickness": 10},
}]
box_ranges, inner = compute_box_ranges(zones)
assert box_ranges == [(500, 700)]
assert inner == [(510, 690)] # shrunk by 10px on each side
def test_minimum_5px_margin(self):
"""Even with border_thickness=0, minimum 5px margin should apply."""
zones = [{
"zone_type": "box",
"box": {"x": 50, "y": 500, "width": 1100, "height": 200, "border_thickness": 0},
}]
_, inner = compute_box_ranges(zones)
assert inner == [(505, 695)] # minimum 5px applied
def test_no_box_zones_returns_empty(self):
"""Without box zones, both ranges should be empty."""
zones = [
{"zone_type": "content", "y": 0, "height": 500},
]
box_ranges, inner = compute_box_ranges(zones)
assert box_ranges == []
assert inner == []
def test_multiple_boxes(self):
"""Multiple boxes should each get their own inner range."""
zones = [
{"zone_type": "box", "box": {"x": 50, "y": 300, "width": 1100, "height": 150, "border_thickness": 8}},
{"zone_type": "box", "box": {"x": 50, "y": 700, "width": 1100, "height": 150, "border_thickness": 3}},
]
box_ranges, inner = compute_box_ranges(zones)
assert len(box_ranges) == 2
assert len(inner) == 2
assert inner[0] == (308, 442) # 300+8 to 450-8
assert inner[1] == (705, 845) # 700+5(min) to 850-5(min)
class TestContentStrips:
"""Tests for content strip building with box_ranges_inner."""
def test_single_box_creates_two_strips(self):
"""A single box in the middle should create two content strips."""
inner = [(505, 695)] # box inner at y=505..695
strips = build_content_strips(inner, top_y=100, bottom_y=1700)
assert len(strips) == 2
assert strips[0] == (100, 505) # above box
assert strips[1] == (695, 1700) # below box
def test_content_strip_includes_box_border_area(self):
"""Content strips should INCLUDE the box border area (not just stop at box outer edge)."""
# Box at y=500, height=200, border=10 → inner=(510, 690)
inner = [(510, 690)]
strips = build_content_strips(inner, top_y=100, bottom_y=1700)
# Strip above extends to 510 (not 500), including border area
assert strips[0] == (100, 510)
# Strip below starts at 690 (not 700), including border area
assert strips[1] == (690, 1700)
def test_row_at_box_border_is_in_content_strip(self):
"""A row at y=495 (just above box at y=500) should be in the content strip."""
# Box at y=500, height=200, border=10 → inner=(510, 690)
inner = [(510, 690)]
strips = build_content_strips(inner, top_y=100, bottom_y=1700)
# Row at y=495, height=30 → center at y=510 → just at the edge
row_center = 495 + 15 # = 510
# This row center is at the boundary — it should be in the first strip
in_first_strip = strips[0][0] <= row_center <= strips[0][1]
assert in_first_strip
def test_no_boxes_single_strip(self):
"""Without boxes, a single strip covering the full content should be returned."""
strips = build_content_strips([], top_y=100, bottom_y=1700)
assert len(strips) == 1
assert strips[0] == (100, 1700)
class TestRowInBoxFilter:
"""Tests for the _row_in_box filter using box_ranges_inner."""
def test_row_inside_box_is_excluded(self):
"""A row clearly inside the box inner range should be excluded."""
inner = [(510, 690)]
# Row at y=550, height=30 → center at 565
assert row_in_box(550, 30, inner) is True
def test_row_above_box_not_excluded(self):
"""A row above the box (at the border area) should NOT be excluded."""
inner = [(510, 690)]
# Row at y=490, height=30 → center at 505 → below inner start (510)
assert row_in_box(490, 30, inner) is False
def test_row_below_box_not_excluded(self):
"""A row below the box (at the border area) should NOT be excluded."""
inner = [(510, 690)]
# Row at y=695, height=30 → center at 710 → above inner end (690)
assert row_in_box(695, 30, inner) is False
def test_row_at_box_border_not_excluded(self):
"""A row overlapping with the box border should NOT be excluded.
This is the key fix: previously, box_ranges (not inner) was used,
which would exclude this row because its center (505) falls within
the full box range (500-700).
"""
# Full box range: (500, 700), inner: (510, 690)
inner = [(510, 690)]
# Row at y=490, height=30 → center at 505
# With box_ranges (500, 700): 500 <= 505 < 700 → excluded (BUG!)
# With box_ranges_inner (510, 690): 510 <= 505 → False → not excluded (FIXED!)
assert row_in_box(490, 30, inner) is False
def test_row_at_bottom_border_not_excluded(self):
"""A row overlapping with the bottom box border should NOT be excluded."""
inner = [(510, 690)]
# Row at y=685, height=30 → center at 700
# With box_ranges (500, 700): 500 <= 700 < 700 → not excluded (edge)
# With box_ranges_inner (510, 690): 510 <= 700 → True but 700 >= 690 → False
assert row_in_box(685, 30, inner) is False
def test_no_boxes_nothing_excluded(self):
"""Without box zones, no rows should be excluded."""
assert row_in_box(500, 30, []) is False
class TestBoxBoundaryIntegration:
"""Integration test: simulate the full row → content strip → filter pipeline."""
def test_boundary_row_preserved_with_inner_ranges(self):
"""
End-to-end: A row at the box boundary is preserved in content strips
and not filtered out by _row_in_box.
Simulates the real scenario: page with a box at y=500..700,
border_thickness=10. Row at y=488..518 (center=503) sits just
above the box border.
"""
zones = [{
"zone_type": "box",
"box": {"x": 50, "y": 500, "width": 1100, "height": 200, "border_thickness": 10},
}]
# Step 1: Compute inner ranges
box_ranges, inner = compute_box_ranges(zones)
assert inner == [(510, 690)]
# Step 2: Build content strips
strips = build_content_strips(inner, top_y=20, bottom_y=2400)
assert len(strips) == 2
# First strip extends to 510 (includes the border area 500-510)
assert strips[0] == (20, 510)
# Step 3: Check that the boundary row is NOT in box
row_y, row_h = 488, 30 # center = 503
assert row_in_box(row_y, row_h, inner) is False
# Step 4: Verify the row's center falls within a content strip
row_center = row_y + row_h / 2 # 503
in_any_strip = any(ys <= row_center < ye for ys, ye in strips)
assert in_any_strip, f"Row center {row_center} should be in content strips {strips}"
def test_boundary_row_would_be_lost_with_full_ranges(self):
"""
Demonstrates the bug: using full box_ranges (not inner) WOULD
exclude the boundary row.
"""
zones = [{
"zone_type": "box",
"box": {"x": 50, "y": 500, "width": 1100, "height": 200, "border_thickness": 10},
}]
box_ranges, _ = compute_box_ranges(zones)
# The full range is (500, 700)
row_center = 488 + 30 / 2 # 503
# With full range: 500 <= 503 < 700 → would be excluded!
in_box_full = any(by_s <= row_center < by_e for by_s, by_e in box_ranges)
assert in_box_full is True, "Full range SHOULD incorrectly exclude this row"

View File

@@ -0,0 +1,285 @@
"""Tests for dictionary/Wörterbuch page detection.
Tests the _score_dictionary_signals() function and _classify_dictionary_columns()
from cv_layout.py.
"""
import sys
import os
# Add backend to path for imports
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from cv_vocab_types import ColumnGeometry
from cv_layout import _score_dictionary_signals, _classify_dictionary_columns, _score_language
def _make_words(texts, start_y=0, y_step=30, x=100, conf=80):
"""Create a list of word dicts from text strings."""
return [
{
"text": t,
"conf": conf,
"top": start_y + i * y_step,
"left": x,
"height": 20,
"width": len(t) * 10,
}
for i, t in enumerate(texts)
]
def _make_geom(index, words, x=0, width=200, width_ratio=0.15):
"""Create a ColumnGeometry with given words."""
return ColumnGeometry(
index=index,
x=x,
y=0,
width=width,
height=1000,
word_count=len(words),
words=words,
width_ratio=width_ratio,
)
class TestDictionarySignals:
"""Test _score_dictionary_signals with synthetic data."""
def test_alphabetical_column_detected(self):
"""A column with alphabetically ordered words should score high."""
# Simulate a dictionary headword column: Z words
headwords = _make_words([
"Zahl", "Zahn", "zart", "Zauber", "Zaun",
"Zeichen", "zeigen", "Zeit", "Zelt", "Zentrum",
"zerbrechen", "Zeug", "Ziel", "Zimmer", "Zitrone",
"Zoll", "Zone", "Zoo", "Zucker", "Zug",
])
# Article column
articles = _make_words(
["die", "der", "das", "der", "der",
"das", "die", "die", "das", "das",
"der", "das", "das", "das", "die",
"der", "die", "der", "der", "der"],
x=0,
)
# Translation column
translations = _make_words(
["number", "tooth", "tender", "magic", "fence",
"sign", "to show", "time", "tent", "centre",
"to break", "stuff", "goal", "room", "lemon",
"customs", "zone", "zoo", "sugar", "train"],
x=400,
)
geoms = [
_make_geom(0, articles, x=0, width=60, width_ratio=0.05),
_make_geom(1, headwords, x=80, width=200, width_ratio=0.15),
_make_geom(2, translations, x=400, width=200, width_ratio=0.15),
]
result = _score_dictionary_signals(geoms)
assert result["signals"]["alphabetical_score"] >= 0.80, (
f"Expected alphabetical_score >= 0.80, got {result['signals']['alphabetical_score']}"
)
assert result["signals"]["article_density"] >= 0.80, (
f"Expected article_density >= 0.80, got {result['signals']['article_density']}"
)
assert result["signals"]["first_letter_uniformity"] >= 0.60, (
f"Expected first_letter_uniformity >= 0.60, got {result['signals']['first_letter_uniformity']}"
)
assert result["is_dictionary"] is True
assert result["confidence"] >= 0.40
def test_non_dictionary_vocab_table(self):
"""A normal vocab table (topic-grouped, no alphabetical order) should NOT be detected."""
en_words = _make_words([
"school", "teacher", "homework", "pencil", "break",
"lunch", "friend", "computer", "book", "bag",
])
de_words = _make_words([
"Schule", "Lehrer", "Hausaufgaben", "Bleistift", "Pause",
"Mittagessen", "Freund", "Computer", "Buch", "Tasche",
], x=300)
geoms = [
_make_geom(0, en_words, x=0, width=200, width_ratio=0.20),
_make_geom(1, de_words, x=300, width=200, width_ratio=0.20),
]
result = _score_dictionary_signals(geoms)
# Alphabetical score should be moderate at best (random order)
assert result["is_dictionary"] is False, (
f"Normal vocab table should NOT be detected as dictionary, "
f"confidence={result['confidence']}"
)
def test_article_column_detection(self):
"""A narrow column with mostly articles should be identified."""
articles = _make_words(
["der", "die", "das", "der", "die", "das", "der", "die", "das", "der"],
x=0,
)
headwords = _make_words(
["Apfel", "Birne", "Dose", "Eis", "Fisch",
"Gabel", "Haus", "Igel", "Jacke", "Kuchen"],
)
translations = _make_words(
["apple", "pear", "can", "ice", "fish",
"fork", "house", "hedgehog", "jacket", "cake"],
x=400,
)
geoms = [
_make_geom(0, articles, x=0, width=50, width_ratio=0.04),
_make_geom(1, headwords, x=80, width=200, width_ratio=0.15),
_make_geom(2, translations, x=400, width=200, width_ratio=0.15),
]
result = _score_dictionary_signals(geoms)
assert result["signals"]["article_density"] >= 0.80
assert result["signals"]["article_col"] == 0
def test_first_letter_uniformity(self):
"""Words all starting with same letter should have high uniformity."""
z_words = _make_words([
"Zahl", "Zahn", "zart", "Zauber", "Zaun",
"Zeichen", "zeigen", "Zeit", "Zelt", "Zentrum",
])
other = _make_words(
["number", "tooth", "tender", "magic", "fence",
"sign", "to show", "time", "tent", "centre"],
x=300,
)
geoms = [
_make_geom(0, z_words, x=0, width=200, width_ratio=0.15),
_make_geom(1, other, x=300, width=200, width_ratio=0.15),
]
result = _score_dictionary_signals(geoms)
assert result["signals"]["first_letter_uniformity"] >= 0.80
def test_letter_transition_detected(self):
"""Words transitioning from one letter to next (A→B) should be detected."""
words = _make_words([
"Apfel", "Arm", "Auto", "Auge", "Abend",
"Ball", "Baum", "Berg", "Blume", "Boot",
])
other = _make_words(
["apple", "arm", "car", "eye", "evening",
"ball", "tree", "mountain", "flower", "boat"],
x=300,
)
geoms = [
_make_geom(0, words, x=0, width=200, width_ratio=0.15),
_make_geom(1, other, x=300, width=200, width_ratio=0.15),
]
result = _score_dictionary_signals(geoms)
assert result["signals"]["has_letter_transition"] is True
def test_category_boost(self):
"""document_category='woerterbuch' should boost confidence."""
# Weak signals that normally wouldn't trigger dictionary detection
words_a = _make_words(["cat", "dog", "fish", "hat", "map"], x=0)
words_b = _make_words(["Katze", "Hund", "Fisch", "Hut", "Karte"], x=300)
geoms = [
_make_geom(0, words_a, x=0, width=200, width_ratio=0.15),
_make_geom(1, words_b, x=300, width=200, width_ratio=0.15),
]
without_boost = _score_dictionary_signals(geoms)
with_boost = _score_dictionary_signals(geoms, document_category="woerterbuch")
assert with_boost["confidence"] > without_boost["confidence"]
assert with_boost["confidence"] - without_boost["confidence"] >= 0.19 # ~0.20 boost
def test_margin_strip_signal(self):
"""margin_strip_detected=True should contribute to confidence."""
words_a = _make_words(["Apfel", "Arm", "Auto", "Auge", "Abend"], x=0)
words_b = _make_words(["apple", "arm", "car", "eye", "evening"], x=300)
geoms = [
_make_geom(0, words_a, x=0, width=200, width_ratio=0.15),
_make_geom(1, words_b, x=300, width=200, width_ratio=0.15),
]
without = _score_dictionary_signals(geoms, margin_strip_detected=False)
with_strip = _score_dictionary_signals(geoms, margin_strip_detected=True)
assert with_strip["confidence"] > without["confidence"]
assert with_strip["signals"]["margin_strip_detected"] is True
def test_too_few_columns(self):
"""Single column should return is_dictionary=False."""
words = _make_words(["Zahl", "Zahn", "zart", "Zauber", "Zaun"])
geoms = [_make_geom(0, words)]
result = _score_dictionary_signals(geoms)
assert result["is_dictionary"] is False
def test_empty_words(self):
"""Columns with no words should return is_dictionary=False."""
geoms = [
_make_geom(0, [], x=0),
_make_geom(1, [], x=300),
]
result = _score_dictionary_signals(geoms)
assert result["is_dictionary"] is False
class TestClassifyDictionaryColumns:
"""Test _classify_dictionary_columns with dictionary-detected data."""
def test_assigns_article_and_headword(self):
"""When dictionary detected, assigns column_article and column_headword."""
articles = _make_words(
["der", "die", "das", "der", "die", "das", "der", "die", "das", "der"],
x=0,
)
headwords = _make_words([
"Zahl", "Zahn", "zart", "Zauber", "Zaun",
"Zeichen", "zeigen", "Zeit", "Zelt", "Zentrum",
])
translations = _make_words(
["number", "tooth", "tender", "magic", "fence",
"sign", "to show", "time", "tent", "centre"],
x=400,
)
geoms = [
_make_geom(0, articles, x=0, width=50, width_ratio=0.04),
_make_geom(1, headwords, x=80, width=200, width_ratio=0.15),
_make_geom(2, translations, x=400, width=200, width_ratio=0.15),
]
dict_signals = _score_dictionary_signals(geoms)
assert dict_signals["is_dictionary"] is True
lang_scores = [_score_language(g.words) for g in geoms]
regions = _classify_dictionary_columns(geoms, dict_signals, lang_scores, 1000)
assert regions is not None
types = [r.type for r in regions]
assert "column_article" in types, f"Expected column_article in {types}"
assert "column_headword" in types, f"Expected column_headword in {types}"
# All regions should have classification_method='dictionary'
for r in regions:
assert r.classification_method == "dictionary"
def test_returns_none_when_not_dictionary(self):
"""Should return None when dict_signals says not a dictionary."""
geoms = [
_make_geom(0, _make_words(["cat", "dog"]), x=0),
_make_geom(1, _make_words(["Katze", "Hund"]), x=300),
]
dict_signals = {"is_dictionary": False, "confidence": 0.1}
lang_scores = [_score_language(g.words) for g in geoms]
result = _classify_dictionary_columns(geoms, dict_signals, lang_scores, 1000)
assert result is None

View File

@@ -1,394 +0,0 @@
'use client'
/**
* GPU Infrastructure Admin Page
*
* vast.ai GPU Management for LLM Processing
*
* SICHERHEIT: Der API-Key wird NIEMALS im Client-Code verwendet!
* Alle Anfragen gehen über den serverseitigen Proxy /api/admin/gpu
*/
import AdminLayout from '@/components/admin/AdminLayout'
import { useEffect, useState, useCallback } from 'react'
interface VastStatus {
instance_id: number | null
status: string
gpu_name: string | null
dph_total: number | null
endpoint_base_url: string | null
last_activity: string | null
auto_shutdown_in_minutes: number | null
total_runtime_hours: number | null
total_cost_usd: number | null
account_credit: number | null
account_total_spend: number | null
session_runtime_minutes: number | null
session_cost_usd: number | null
message: string | null
error?: string
}
export default function GPUInfrastructurePage() {
const [status, setStatus] = useState<VastStatus | null>(null)
const [loading, setLoading] = useState(true)
const [actionLoading, setActionLoading] = useState<string | null>(null)
const [error, setError] = useState<string | null>(null)
const [message, setMessage] = useState<string | null>(null)
// Sicherer Proxy-Endpoint (API-Key ist serverseitig)
const API_PROXY = '/api/admin/gpu'
// Fetch status über sicheren Proxy
const fetchStatus = useCallback(async () => {
setLoading(true)
setError(null)
try {
const response = await fetch(API_PROXY)
const data = await response.json()
if (!response.ok) {
throw new Error(data.error || `HTTP ${response.status}`)
}
setStatus(data)
} catch (err) {
setError(err instanceof Error ? err.message : 'Verbindungsfehler')
setStatus({
instance_id: null,
status: 'error',
gpu_name: null,
dph_total: null,
endpoint_base_url: null,
last_activity: null,
auto_shutdown_in_minutes: null,
total_runtime_hours: null,
total_cost_usd: null,
account_credit: null,
account_total_spend: null,
session_runtime_minutes: null,
session_cost_usd: null,
message: 'Verbindung fehlgeschlagen'
})
} finally {
setLoading(false)
}
}, [])
// Initial load
useEffect(() => {
fetchStatus()
}, [fetchStatus])
// Auto-refresh every 30 seconds
useEffect(() => {
const interval = setInterval(fetchStatus, 30000)
return () => clearInterval(interval)
}, [fetchStatus])
// Power On über sicheren Proxy
const powerOn = async () => {
setActionLoading('on')
setError(null)
setMessage(null)
try {
const response = await fetch(API_PROXY, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ action: 'on' }),
})
const data = await response.json()
if (!response.ok) {
throw new Error(data.error || data.detail || 'Aktion fehlgeschlagen')
}
setMessage('Start angefordert')
setTimeout(fetchStatus, 3000)
setTimeout(fetchStatus, 10000)
} catch (err) {
setError(err instanceof Error ? err.message : 'Fehler beim Starten')
fetchStatus()
} finally {
setActionLoading(null)
}
}
// Power Off über sicheren Proxy
const powerOff = async () => {
setActionLoading('off')
setError(null)
setMessage(null)
try {
const response = await fetch(API_PROXY, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ action: 'off' }),
})
const data = await response.json()
if (!response.ok) {
throw new Error(data.error || data.detail || 'Aktion fehlgeschlagen')
}
setMessage('Stop angefordert')
setTimeout(fetchStatus, 3000)
setTimeout(fetchStatus, 10000)
} catch (err) {
setError(err instanceof Error ? err.message : 'Fehler beim Stoppen')
fetchStatus()
} finally {
setActionLoading(null)
}
}
// Status badge styling
const getStatusBadge = (s: string) => {
const baseClasses = 'px-3 py-1 rounded-full text-sm font-semibold uppercase'
switch (s) {
case 'running':
return `${baseClasses} bg-green-100 text-green-800`
case 'stopped':
case 'exited':
return `${baseClasses} bg-red-100 text-red-800`
case 'loading':
case 'scheduling':
case 'creating':
case 'starting...':
case 'stopping...':
return `${baseClasses} bg-yellow-100 text-yellow-800`
default:
return `${baseClasses} bg-slate-100 text-slate-600`
}
}
// Credit color based on amount
const getCreditColor = (credit: number | null) => {
if (credit === null) return 'text-slate-500'
if (credit < 5) return 'text-red-600'
if (credit < 15) return 'text-yellow-600'
return 'text-green-600'
}
return (
<AdminLayout title="GPU Infrastruktur" description="vast.ai GPU Management">
{/* Status Cards */}
<div className="bg-white rounded-xl border border-slate-200 p-6 mb-6">
<div className="grid grid-cols-2 md:grid-cols-3 lg:grid-cols-6 gap-6">
{/* Status */}
<div>
<div className="text-sm text-slate-500 mb-2">Status</div>
{loading ? (
<span className="px-3 py-1 rounded-full text-sm font-semibold bg-slate-100 text-slate-600">
Laden...
</span>
) : (
<span className={getStatusBadge(
actionLoading === 'on' ? 'starting...' :
actionLoading === 'off' ? 'stopping...' :
status?.status || 'unknown'
)}>
{actionLoading === 'on' ? 'starting...' :
actionLoading === 'off' ? 'stopping...' :
status?.status || 'unbekannt'}
</span>
)}
</div>
{/* GPU */}
<div>
<div className="text-sm text-slate-500 mb-2">GPU</div>
<div className="font-semibold text-slate-900">
{status?.gpu_name || '-'}
</div>
</div>
{/* Kosten/h */}
<div>
<div className="text-sm text-slate-500 mb-2">Kosten/h</div>
<div className="font-semibold text-slate-900">
{status?.dph_total ? `$${status.dph_total.toFixed(3)}` : '-'}
</div>
</div>
{/* Auto-Stop */}
<div>
<div className="text-sm text-slate-500 mb-2">Auto-Stop</div>
<div className="font-semibold text-slate-900">
{status && status.auto_shutdown_in_minutes !== null
? `${status.auto_shutdown_in_minutes} min`
: '-'}
</div>
</div>
{/* Budget */}
<div>
<div className="text-sm text-slate-500 mb-2">Budget</div>
<div className={`font-bold text-lg ${getCreditColor(status?.account_credit ?? null)}`}>
{status && status.account_credit !== null
? `$${status.account_credit.toFixed(2)}`
: '-'}
</div>
</div>
{/* Session */}
<div>
<div className="text-sm text-slate-500 mb-2">Session</div>
<div className="font-semibold text-slate-900">
{status && status.session_runtime_minutes !== null && status.session_cost_usd !== null
? `${Math.round(status.session_runtime_minutes)} min / $${status.session_cost_usd.toFixed(3)}`
: '-'}
</div>
</div>
</div>
{/* Buttons */}
<div className="flex items-center gap-4 mt-6 pt-6 border-t border-slate-200">
<button
onClick={powerOn}
disabled={actionLoading !== null || status?.status === 'running'}
className="px-6 py-2 bg-primary-600 text-white rounded-lg font-medium hover:bg-primary-700 disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
>
Starten
</button>
<button
onClick={powerOff}
disabled={actionLoading !== null || status?.status !== 'running'}
className="px-6 py-2 bg-red-600 text-white rounded-lg font-medium hover:bg-red-700 disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
>
Stoppen
</button>
<button
onClick={fetchStatus}
disabled={loading}
className="px-4 py-2 border border-slate-300 text-slate-700 rounded-lg font-medium hover:bg-slate-50 disabled:opacity-50 transition-colors"
>
{loading ? 'Aktualisiere...' : 'Aktualisieren'}
</button>
{/* Message */}
{message && (
<span className="ml-4 text-sm text-green-600 font-medium">{message}</span>
)}
{error && (
<span className="ml-4 text-sm text-red-600 font-medium">{error}</span>
)}
</div>
</div>
{/* Extended Stats */}
<div className="grid grid-cols-1 lg:grid-cols-2 gap-6 mb-6">
{/* Kosten-Übersicht */}
<div className="bg-white rounded-xl border border-slate-200 p-6">
<h3 className="font-semibold text-slate-900 mb-4">Kosten-Übersicht</h3>
<div className="space-y-4">
<div className="flex justify-between items-center">
<span className="text-slate-600">Session Laufzeit</span>
<span className="font-semibold">
{status && status.session_runtime_minutes !== null
? `${Math.round(status.session_runtime_minutes)} Minuten`
: '-'}
</span>
</div>
<div className="flex justify-between items-center">
<span className="text-slate-600">Session Kosten</span>
<span className="font-semibold">
{status && status.session_cost_usd !== null
? `$${status.session_cost_usd.toFixed(4)}`
: '-'}
</span>
</div>
<div className="flex justify-between items-center pt-4 border-t border-slate-100">
<span className="text-slate-600">Gesamtlaufzeit</span>
<span className="font-semibold">
{status && status.total_runtime_hours !== null
? `${status.total_runtime_hours.toFixed(1)} Stunden`
: '-'}
</span>
</div>
<div className="flex justify-between items-center">
<span className="text-slate-600">Gesamtkosten</span>
<span className="font-semibold">
{status && status.total_cost_usd !== null
? `$${status.total_cost_usd.toFixed(2)}`
: '-'}
</span>
</div>
<div className="flex justify-between items-center">
<span className="text-slate-600">vast.ai Ausgaben</span>
<span className="font-semibold">
{status && status.account_total_spend !== null
? `$${status.account_total_spend.toFixed(2)}`
: '-'}
</span>
</div>
</div>
</div>
{/* Instanz-Details */}
<div className="bg-white rounded-xl border border-slate-200 p-6">
<h3 className="font-semibold text-slate-900 mb-4">Instanz-Details</h3>
<div className="space-y-4">
<div className="flex justify-between items-center">
<span className="text-slate-600">Instanz ID</span>
<span className="font-mono text-sm">
{status?.instance_id || '-'}
</span>
</div>
<div className="flex justify-between items-center">
<span className="text-slate-600">GPU</span>
<span className="font-semibold">
{status?.gpu_name || '-'}
</span>
</div>
<div className="flex justify-between items-center">
<span className="text-slate-600">Stundensatz</span>
<span className="font-semibold">
{status?.dph_total ? `$${status.dph_total.toFixed(4)}/h` : '-'}
</span>
</div>
<div className="flex justify-between items-center">
<span className="text-slate-600">Letzte Aktivität</span>
<span className="text-sm">
{status?.last_activity
? new Date(status.last_activity).toLocaleString('de-DE')
: '-'}
</span>
</div>
{status?.endpoint_base_url && status.status === 'running' && (
<div className="pt-4 border-t border-slate-100">
<div className="text-slate-600 text-sm mb-1">Endpoint</div>
<code className="text-xs bg-slate-100 px-2 py-1 rounded block overflow-x-auto">
{status.endpoint_base_url}
</code>
</div>
)}
</div>
</div>
</div>
{/* Info */}
<div className="bg-blue-50 border border-blue-200 rounded-xl p-4">
<div className="flex gap-3">
<svg className="w-5 h-5 text-blue-600 flex-shrink-0 mt-0.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
</svg>
<div>
<h4 className="font-semibold text-blue-900">Auto-Shutdown</h4>
<p className="text-sm text-blue-800 mt-1">
Die GPU-Instanz wird automatisch gestoppt, wenn sie längere Zeit inaktiv ist.
Der Auto-Shutdown Timer zeigt die verbleibende Zeit bis zum automatischen Stopp.
Der Status wird alle 30 Sekunden automatisch aktualisiert.
</p>
</div>
</div>
</div>
</AdminLayout>
)
}

View File

@@ -1,365 +0,0 @@
'use client'
import { useState } from 'react'
import AdminLayout from '@/components/admin/AdminLayout'
import {
WizardStepper,
WizardNavigation,
EducationCard,
ArchitectureContext,
TestRunner,
TestSummary,
type WizardStep,
type TestCategoryResult,
type FullTestResults,
type EducationContent,
type ArchitectureContextType,
} from '@/components/wizard'
// ==============================================
// Constants
// ==============================================
const BACKEND_URL = process.env.NEXT_PUBLIC_BACKEND_URL || 'http://localhost:8000'
const STEPS: WizardStep[] = [
{ id: 'welcome', name: 'Willkommen', icon: '👋', status: 'pending' },
{ id: 'api-health', name: 'API Status', icon: '💚', status: 'pending', category: 'api-health' },
{ id: 'detection', name: 'GPU Erkennung', icon: '🔍', status: 'pending', category: 'detection' },
{ id: 'cloud', name: 'Cloud GPU', icon: '☁️', status: 'pending', category: 'cloud' },
{ id: 'summary', name: 'Zusammenfassung', icon: '📊', status: 'pending' },
]
const EDUCATION_CONTENT: Record<string, EducationContent> = {
'welcome': {
title: 'Willkommen zum GPU Wizard',
content: [
'GPUs beschleunigen KI-Workloads um das 10-100fache.',
'',
'BreakPilot unterstuetzt:',
'• NVIDIA GPUs (CUDA) - GeForce, Tesla, A100',
'• AMD GPUs (ROCm) - Radeon, Instinct',
'• Cloud GPUs (vast.ai) - On-Demand Miete',
'',
'Anwendungsfaelle:',
'• LLM Inference (lokale Modelle)',
'• Embedding-Generierung',
'• Fine-Tuning von Modellen',
'• OCR und Bildverarbeitung',
],
},
'api-health': {
title: 'GPU Admin API - Verwaltungsschnittstelle',
content: [
'Die GPU API verwaltet lokale und Cloud-GPUs.',
'',
'Endpunkte:',
'• /api/gpu/status - Aktueller GPU-Status',
'• /api/gpu/jobs - Laufende GPU-Jobs',
'• /api/gpu/metrics - Auslastung und Temperatur',
'',
'vast.ai Integration:',
'• /vast/instances - Gemietete Instanzen',
'• /vast/offers - Verfuegbare Angebote',
'• /vast/start - Instanz starten',
],
},
'detection': {
title: 'GPU Hardware-Erkennung',
content: [
'Automatische Erkennung verfuegbarer GPUs.',
'',
'NVIDIA (nvidia-smi):',
'• GPU-Modell und VRAM',
'• Treiber-Version',
'• Aktuelle Auslastung',
'• Temperatur und Stromverbrauch',
'',
'AMD (rocm-smi):',
'• ROCm-Version',
'• GPU-Modell',
'• Memory-Statistiken',
'',
'PyTorch CUDA Check:',
'• torch.cuda.is_available()',
'• torch.cuda.device_count()',
],
},
'cloud': {
title: 'Cloud GPUs mit vast.ai',
content: [
'vast.ai bietet guenstige Cloud-GPUs auf Abruf.',
'',
'Vorteile:',
'• 3-10x guenstiger als AWS/GCP',
'• Peer-to-Peer Marktplatz',
'• Minutengenaue Abrechnung',
'• Zugriff auf A100, H100, etc.',
'',
'Typische Preise:',
'• RTX 3090: ~$0.20/Stunde',
'• A100 40GB: ~$1.00/Stunde',
'• H100: ~$2.50/Stunde',
'',
'Konfiguration: VAST_API_KEY Umgebungsvariable',
],
},
'summary': {
title: 'Test-Zusammenfassung',
content: [
'Hier sehen Sie eine Uebersicht aller durchgefuehrten Tests:',
'• Lokale GPU-Erkennung',
'• Cloud-GPU Verfuegbarkeit',
'• API-Status',
],
},
}
const ARCHITECTURE_CONTEXTS: Record<string, ArchitectureContextType> = {
'api-health': {
layer: 'api',
services: ['backend'],
dependencies: ['PostgreSQL', 'vast.ai API'],
dataFlow: ['Browser', 'FastAPI', 'GPU Manager', 'PostgreSQL'],
},
'detection': {
layer: 'service',
services: ['backend'],
dependencies: ['nvidia-smi', 'rocm-smi', 'PyTorch'],
dataFlow: ['FastAPI', 'Subprocess', 'GPU Driver', 'Hardware'],
},
'cloud': {
layer: 'service',
services: ['backend'],
dependencies: ['vast.ai API', 'SSH', 'Docker'],
dataFlow: ['FastAPI', 'vast.ai API', 'Cloud Instance', 'GPU'],
},
}
// ==============================================
// Main Component
// ==============================================
export default function GPUWizardPage() {
const [currentStep, setCurrentStep] = useState(0)
const [steps, setSteps] = useState<WizardStep[]>(STEPS)
const [categoryResults, setCategoryResults] = useState<Record<string, TestCategoryResult>>({})
const [fullResults, setFullResults] = useState<FullTestResults | null>(null)
const [isLoading, setIsLoading] = useState(false)
const [error, setError] = useState<string | null>(null)
const currentStepData = steps[currentStep]
const isTestStep = currentStepData?.category !== undefined
const isWelcome = currentStepData?.id === 'welcome'
const isSummary = currentStepData?.id === 'summary'
const runCategoryTest = async (category: string) => {
setIsLoading(true)
setError(null)
try {
const response = await fetch(`${BACKEND_URL}/api/admin/gpu-tests/${category}`, {
method: 'POST',
})
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`)
}
const result: TestCategoryResult = await response.json()
setCategoryResults((prev) => ({ ...prev, [category]: result }))
setSteps((prev) =>
prev.map((step) =>
step.category === category
? { ...step, status: result.failed === 0 ? 'completed' : 'failed' }
: step
)
)
} catch (err) {
setError(err instanceof Error ? err.message : 'Unbekannter Fehler')
} finally {
setIsLoading(false)
}
}
const runAllTests = async () => {
setIsLoading(true)
setError(null)
try {
const response = await fetch(`${BACKEND_URL}/api/admin/gpu-tests/run-all`, {
method: 'POST',
})
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`)
}
const results: FullTestResults = await response.json()
setFullResults(results)
setSteps((prev) =>
prev.map((step) => {
if (step.category) {
const catResult = results.categories.find((c) => c.category === step.category)
if (catResult) {
return { ...step, status: catResult.failed === 0 ? 'completed' : 'failed' }
}
}
return step
})
)
const newCategoryResults: Record<string, TestCategoryResult> = {}
results.categories.forEach((cat) => {
newCategoryResults[cat.category] = cat
})
setCategoryResults(newCategoryResults)
} catch (err) {
setError(err instanceof Error ? err.message : 'Unbekannter Fehler')
} finally {
setIsLoading(false)
}
}
const goToNext = () => {
if (currentStep < steps.length - 1) {
setSteps((prev) =>
prev.map((step, idx) =>
idx === currentStep && step.status === 'pending'
? { ...step, status: 'completed' }
: step
)
)
setCurrentStep((prev) => prev + 1)
}
}
const goToPrev = () => {
if (currentStep > 0) {
setCurrentStep((prev) => prev - 1)
}
}
const handleStepClick = (index: number) => {
if (index <= currentStep || steps[index - 1]?.status !== 'pending') {
setCurrentStep(index)
}
}
return (
<AdminLayout
title="GPU Wizard"
description="Interaktives Lernen und Testen der GPU-Infrastruktur"
>
{/* Header */}
<div className="bg-white rounded-lg shadow p-4 mb-6 flex items-center justify-between">
<div className="flex items-center">
<span className="text-3xl mr-3">🎮</span>
<div>
<h2 className="text-lg font-bold text-gray-800">GPU Infrastructure Wizard</h2>
<p className="text-sm text-gray-600">CUDA, ROCm & vast.ai</p>
</div>
</div>
<a href="/admin/gpu" className="text-blue-600 hover:text-blue-800 text-sm">
&larr; Zurueck zu GPU Management
</a>
</div>
{/* Stepper */}
<div className="bg-white rounded-lg shadow p-6 mb-6">
<WizardStepper steps={steps} currentStep={currentStep} onStepClick={handleStepClick} />
</div>
{/* Content */}
<div className="bg-white rounded-lg shadow p-6">
<div className="flex items-center mb-6">
<span className="text-3xl mr-3">{currentStepData?.icon}</span>
<div>
<h2 className="text-xl font-bold text-gray-800">
Schritt {currentStep + 1}: {currentStepData?.name}
</h2>
<p className="text-gray-500 text-sm">
{currentStep + 1} von {steps.length}
</p>
</div>
</div>
<EducationCard content={EDUCATION_CONTENT[currentStepData?.id || '']} />
{isTestStep && currentStepData?.category && ARCHITECTURE_CONTEXTS[currentStepData.category] && (
<ArchitectureContext
context={ARCHITECTURE_CONTEXTS[currentStepData.category]}
currentStep={currentStepData.name}
/>
)}
{error && (
<div className="bg-red-50 border border-red-200 text-red-700 rounded-lg p-4 mb-6">
<strong>Fehler:</strong> {error}
</div>
)}
{isWelcome && (
<div className="text-center py-8">
<button
onClick={goToNext}
className="bg-blue-600 text-white px-8 py-3 rounded-lg font-medium hover:bg-blue-700 transition-colors"
>
Wizard starten
</button>
</div>
)}
{isTestStep && currentStepData?.category && (
<TestRunner
category={currentStepData.category}
categoryResult={categoryResults[currentStepData.category]}
isLoading={isLoading}
onRunTests={() => runCategoryTest(currentStepData.category!)}
/>
)}
{isSummary && (
<div>
{!fullResults ? (
<div className="text-center py-8">
<p className="text-gray-600 mb-4">
Fuehren Sie alle Tests aus um eine Zusammenfassung zu sehen.
</p>
<button
onClick={runAllTests}
disabled={isLoading}
className={`px-6 py-3 rounded-lg font-medium transition-colors ${
isLoading
? 'bg-gray-400 cursor-not-allowed'
: 'bg-blue-600 text-white hover:bg-blue-700'
}`}
>
{isLoading ? 'Alle Tests laufen...' : 'Alle Tests ausfuehren'}
</button>
</div>
) : (
<TestSummary results={fullResults} />
)}
</div>
)}
<WizardNavigation
currentStep={currentStep}
totalSteps={steps.length}
onPrev={goToPrev}
onNext={goToNext}
showNext={!isSummary}
isLoading={isLoading}
/>
</div>
<div className="text-center text-gray-500 text-sm mt-6">
Diese Tests pruefen die GPU-Infrastruktur.
Bei Fragen wenden Sie sich an das DevOps-Team.
</div>
</AdminLayout>
)
}

View File

@@ -1,82 +0,0 @@
/**
* GPU Admin API Route - Secure Proxy
*
* Der vast.ai API-Key wird serverseitig aus der Umgebungsvariable geladen
* und niemals an den Client gesendet.
*/
import { NextRequest, NextResponse } from 'next/server'
// API Key für Backend-Authentifizierung (serverseitig, niemals im Client!)
// CONTROL_API_KEY ist der Backend-API-Key, VAST_API_KEY ist für vast.ai direkt
const BACKEND_API_KEY = process.env.CONTROL_API_KEY || process.env.VAST_API_KEY
// Backend URL
const BACKEND_URL = process.env.BACKEND_URL || 'http://localhost:8000'
export async function GET(request: NextRequest) {
if (!BACKEND_API_KEY) {
return NextResponse.json(
{ error: 'CONTROL_API_KEY nicht konfiguriert' },
{ status: 500 }
)
}
try {
const response = await fetch(`${BACKEND_URL}/infra/vast/status`, {
headers: {
'X-API-Key': BACKEND_API_KEY,
},
})
const data = await response.json()
return NextResponse.json(data, { status: response.status })
} catch (error) {
console.error('GPU Status fetch error:', error)
return NextResponse.json(
{ error: 'Backend nicht erreichbar', status: 'error' },
{ status: 503 }
)
}
}
export async function POST(request: NextRequest) {
if (!BACKEND_API_KEY) {
return NextResponse.json(
{ error: 'CONTROL_API_KEY nicht konfiguriert' },
{ status: 500 }
)
}
try {
const body = await request.json()
const action = body.action // 'on' or 'off'
if (!['on', 'off'].includes(action)) {
return NextResponse.json(
{ error: 'Ungültige Aktion' },
{ status: 400 }
)
}
const endpoint = action === 'on' ? '/infra/vast/power/on' : '/infra/vast/power/off'
const response = await fetch(`${BACKEND_URL}${endpoint}`, {
method: 'POST',
headers: {
'X-API-Key': BACKEND_API_KEY,
'Content-Type': 'application/json',
},
body: JSON.stringify({}),
})
const data = await response.json()
return NextResponse.json(data, { status: response.status })
} catch (error) {
console.error('GPU Power action error:', error)
return NextResponse.json(
{ error: 'Backend nicht erreichbar' },
{ status: 503 }
)
}
}

View File

@@ -1,142 +0,0 @@
import type { SystemInfoConfig } from './types'
export const gpuConfig: SystemInfoConfig = {
title: 'GPU Infrastruktur System-Info',
description: 'vast.ai GPU-Management fuer KI-Workloads und Training.',
version: '1.5',
privacyNotes: [
'GPU-Workloads werden isoliert in Docker-Containern ausgefuehrt',
'Keine persistente Speicherung von Trainingsdaten auf GPU-Instanzen',
'SSH-Keys werden regelmaessig rotiert',
'Audit-Log fuer alle GPU-Operationen',
],
architecture: {
layers: [
{ title: 'Admin UI', components: ['GPU Dashboard', 'Instance Manager', 'Cost Calculator'], color: '#3b82f6' },
{ title: 'vast.ai API', components: ['Instance API', 'Billing API', 'SSH Management'], color: '#8b5cf6' },
{ title: 'GPU Instances', components: ['RTX 4090', 'A100', 'H100'], color: '#10b981' },
{ title: 'Workloads', components: ['Model Training', 'Inference', 'Fine-Tuning'], color: '#f59e0b' },
],
},
features: [
{ name: 'Instance Management', status: 'active', description: 'Start/Stop/Destroy von GPU-Instanzen' },
{ name: 'SSH Key Management', status: 'active', description: 'Automatische SSH-Key Rotation' },
{ name: 'Cost Tracking', status: 'active', description: 'Echtzeit-Kostenüberwachung' },
{ name: 'Auto-Scaling', status: 'planned', description: 'Automatische Skalierung bei Last' },
{ name: 'Spot Instance Management', status: 'planned', description: 'Kostenoptimierung durch Spot-Instances' },
],
roadmap: [
{ phase: 'Phase 1: Automation (Q1)', priority: 'high', items: ['Auto-Start bei Bedarf', 'Auto-Stop bei Inaktivitaet', 'Scheduled Instances', 'Budget Alerts'] },
{ phase: 'Phase 2: Multi-Cloud (Q2)', priority: 'medium', items: ['Lambda Labs Integration', 'RunPod Integration', 'Cloud-uebergreifende Orchestrierung', 'Preisvergleich'] },
{ phase: 'Phase 3: ML Ops (Q3)', priority: 'medium', items: ['Model Registry', 'Experiment Tracking', 'A/B Testing', 'Model Versioning'] },
],
technicalDetails: [
{ component: 'API', technology: 'vast.ai REST API', version: 'v1', description: 'GPU-Marktplatz' },
{ component: 'SSH', technology: 'OpenSSH', version: '9.x', description: 'Sichere Verbindung' },
{ component: 'Container', technology: 'Docker', version: '24.x', description: 'Workload-Isolation' },
{ component: 'ML Framework', technology: 'PyTorch', version: '2.x', description: 'Model Training' },
],
auditInfo: [
{
category: 'Sicherheit & Zugriffskontrolle',
items: [
{ label: 'Authentifizierung', value: 'API-Key + SSH-Key', status: 'ok' },
{ label: 'Verschluesselung', value: 'TLS 1.3', status: 'ok' },
{ label: 'Key Rotation', value: 'Alle 30 Tage', status: 'ok' },
{ label: 'Audit-Log', value: 'Aktiviert', status: 'ok' },
],
},
{
category: 'Kosten & Ressourcen',
items: [
{ label: 'Budget-Limit', value: 'Konfigurierbar', status: 'ok' },
{ label: 'Auto-Shutdown', value: '30 Min Inaktivitaet', status: 'ok' },
{ label: 'Spot-Instances', value: 'Geplant', status: 'warning' },
{ label: 'Multi-Cloud', value: 'Geplant', status: 'warning' },
],
},
{
category: 'Compliance',
items: [
{ label: 'Daten-Residenz', value: 'EU/US waehlbar', status: 'ok' },
{ label: 'DSGVO-Konformitaet', value: 'Ja', status: 'ok' },
{ label: 'SOC 2 Type II', value: 'vast.ai zertifiziert', status: 'ok' },
],
},
],
fullDocumentation: `
<h2>GPU Infrastructure Management</h2>
<h3>1. Uebersicht</h3>
<p>Das GPU-Infrastruktur-Modul ermoeglicht die Verwaltung von Cloud-GPU-Ressourcen ueber vast.ai fuer KI-Workloads wie Model Training, Fine-Tuning und Inference.</p>
<h3>2. Architektur</h3>
<pre>
┌─────────────────────────────────────────────────────────────┐
│ Admin Dashboard │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────┐ │
│ │ Start/Stop │ │ Status │ │ Cost Tracking │ │
│ └──────┬──────┘ └──────┬──────┘ └──────────┬──────────┘ │
└─────────┼───────────────┼─────────────────────┼─────────────┘
│ │ │
v v v
┌─────────────────────────────────────────────────────────────┐
│ Backend API │
│ ┌─────────────────────────────────────────────────────┐ │
│ │ /infra/vast/* - vast.ai Proxy mit Auth │ │
│ └─────────────────────────────────────────────────────┘ │
└──────────────────────────┬──────────────────────────────────┘
v
┌─────────────────────────────────────────────────────────────┐
│ vast.ai Cloud │
│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │
│ │ RTX 4090 │ │ A100 │ │ H100 │ │ RTX 3090 │ │
│ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │
└─────────────────────────────────────────────────────────────┘
</pre>
<h3>3. API Endpoints</h3>
<table>
<tr><th>Endpoint</th><th>Methode</th><th>Beschreibung</th></tr>
<tr><td>/infra/vast/status</td><td>GET</td><td>Aktueller Status der GPU-Instanz</td></tr>
<tr><td>/infra/vast/power/on</td><td>POST</td><td>GPU-Instanz starten</td></tr>
<tr><td>/infra/vast/power/off</td><td>POST</td><td>GPU-Instanz stoppen</td></tr>
<tr><td>/infra/vast/audit</td><td>GET</td><td>Audit-Log der letzten Aktionen</td></tr>
</table>
<h3>4. Sicherheitskonzept</h3>
<ul>
<li><strong>Authentifizierung:</strong> API-Key basiert, keine Benutzer-Credentials auf GPU</li>
<li><strong>SSH-Zugriff:</strong> Key-basiert, automatische Rotation alle 30 Tage</li>
<li><strong>Netzwerk:</strong> Isolierte VPC, nur definierte Ports offen</li>
<li><strong>Datenpersistenz:</strong> Ephemeral Storage, keine persistenten Volumes</li>
</ul>
<h3>5. Kostenmanagement</h3>
<table>
<tr><th>GPU</th><th>Preis/Stunde</th><th>VRAM</th><th>Use Case</th></tr>
<tr><td>RTX 4090</td><td>~$0.40</td><td>24 GB</td><td>Training, Inference</td></tr>
<tr><td>A100</td><td>~$1.50</td><td>80 GB</td><td>Large Model Training</td></tr>
<tr><td>H100</td><td>~$3.00</td><td>80 GB</td><td>Enterprise Training</td></tr>
</table>
<h3>6. Auto-Shutdown Policy</h3>
<p>GPU-Instanzen werden automatisch nach 30 Minuten Inaktivitaet gestoppt, um Kosten zu sparen. Bei jedem LLM-Request wird die Aktivitaet aufgezeichnet.</p>
<h3>7. Audit-relevante Informationen</h3>
<ul>
<li>Alle Start/Stop-Aktionen werden mit Timestamp und User-ID geloggt</li>
<li>Kostenabrechnung erfolgt minutengenau</li>
<li>SSH-Key Rotation wird dokumentiert</li>
<li>Fehlgeschlagene Operationen werden mit Error-Details gespeichert</li>
</ul>
<h3>8. Notfallprozeduren</h3>
<ol>
<li><strong>Bei Kostenüberschreitung:</strong> Automatischer Stop aller Instanzen</li>
<li><strong>Bei API-Ausfall:</strong> Fallback auf manuelles vast.ai Dashboard</li>
<li><strong>Bei Sicherheitsvorfall:</strong> Sofortiges Destroy aller Instanzen</li>
</ol>
`,
}