From f39cbe92838a4c7a101bdf28fa68c038257c3b9a Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Thu, 23 Apr 2026 13:14:12 +0200 Subject: [PATCH] refactor: remove unused pages and backends (model-management, OCR legacy, GPU/vast.ai, video-chat, matrix) Deleted pages: - /ai/model-management (mock data only, no real backend) - /ai/ocr-compare (old /vocab/ backend, replaced by ocr-kombi) - /ai/ocr-pipeline (minimal session browser, redundant) - /ai/ocr-overlay (legacy monolith, redundant) - /ai/gpu (vast.ai GPU management, no longer used) - /infrastructure/gpu (same) - /communication/video-chat (moved to core) - /communication/matrix (moved to core) Deleted backends: - backend-lehrer/infra/vast_client.py + vast_power.py - backend-lehrer/meetings_api.py + jitsi_api.py - website/app/api/admin/gpu/ - edu-search-service/scripts/vast_ai_extractor.py Total: ~7,800 LOC removed. All code preserved in git history. Co-Authored-By: Claude Opus 4.6 (1M context) --- admin-lehrer/app/(admin)/ai/gpu/page.tsx | 395 ---- .../app/(admin)/ai/model-management/page.tsx | 549 ------ .../app/(admin)/ai/ocr-compare/page.tsx | 1637 ----------------- .../app/(admin)/ai/ocr-kombi/page.tsx | 1 - .../app/(admin)/ai/ocr-overlay/page.tsx | 751 -------- .../app/(admin)/ai/ocr-overlay/types.ts | 87 - .../app/(admin)/ai/ocr-pipeline/page.tsx | 443 ----- .../app/(admin)/ai/ocr-pipeline/types.ts | 430 ----- .../ai/ocr-pipeline/usePipelineNavigation.ts | 225 --- .../app/(admin)/communication/matrix/page.tsx | 593 ------ .../(admin)/communication/video-chat/page.tsx | 635 ------- .../communication/video-chat/wizard/page.tsx | 366 ---- .../app/(admin)/infrastructure/gpu/page.tsx | 390 ---- .../__tests__/usePixelWordPositions.test.ts | 328 ++++ admin-lehrer/lib/navigation.ts | 62 - backend-lehrer/infra/__init__.py | 11 +- backend-lehrer/infra/vast_client.py | 419 ----- backend-lehrer/infra/vast_power.py | 618 ------- backend-lehrer/jitsi_api.py | 199 -- backend-lehrer/main.py | 11 - backend-lehrer/meetings_api.py | 443 ----- .../scripts/vast_ai_extractor.py | 320 ---- .../backend/services/lighton_ocr_service.py | 119 ++ klausur-service/backend/tests/debug_shear.py | 100 + .../backend/tests/test_box_boundary_rows.py | 256 +++ .../tests/test_dictionary_detection.py | 285 +++ website/app/admin/gpu/page.tsx | 394 ---- website/app/admin/gpu/wizard/page.tsx | 365 ---- website/app/api/admin/gpu/route.ts | 82 - .../admin/system-info-configs/gpu-config.ts | 142 -- 30 files changed, 1089 insertions(+), 9567 deletions(-) delete mode 100644 admin-lehrer/app/(admin)/ai/gpu/page.tsx delete mode 100644 admin-lehrer/app/(admin)/ai/model-management/page.tsx delete mode 100644 admin-lehrer/app/(admin)/ai/ocr-compare/page.tsx delete mode 100644 admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx delete mode 100644 admin-lehrer/app/(admin)/ai/ocr-overlay/types.ts delete mode 100644 admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx delete mode 100644 admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts delete mode 100644 admin-lehrer/app/(admin)/ai/ocr-pipeline/usePipelineNavigation.ts delete mode 100644 admin-lehrer/app/(admin)/communication/matrix/page.tsx delete mode 100644 admin-lehrer/app/(admin)/communication/video-chat/page.tsx delete mode 100644 admin-lehrer/app/(admin)/communication/video-chat/wizard/page.tsx delete mode 100644 admin-lehrer/app/(admin)/infrastructure/gpu/page.tsx create mode 100644 admin-lehrer/components/ocr-pipeline/__tests__/usePixelWordPositions.test.ts delete mode 100644 backend-lehrer/infra/vast_client.py delete mode 100644 backend-lehrer/infra/vast_power.py delete mode 100644 backend-lehrer/jitsi_api.py delete mode 100644 backend-lehrer/meetings_api.py delete mode 100644 edu-search-service/scripts/vast_ai_extractor.py create mode 100644 klausur-service/backend/services/lighton_ocr_service.py create mode 100644 klausur-service/backend/tests/debug_shear.py create mode 100644 klausur-service/backend/tests/test_box_boundary_rows.py create mode 100644 klausur-service/backend/tests/test_dictionary_detection.py delete mode 100644 website/app/admin/gpu/page.tsx delete mode 100644 website/app/admin/gpu/wizard/page.tsx delete mode 100644 website/app/api/admin/gpu/route.ts delete mode 100644 website/components/admin/system-info-configs/gpu-config.ts diff --git a/admin-lehrer/app/(admin)/ai/gpu/page.tsx b/admin-lehrer/app/(admin)/ai/gpu/page.tsx deleted file mode 100644 index f77a5a7..0000000 --- a/admin-lehrer/app/(admin)/ai/gpu/page.tsx +++ /dev/null @@ -1,395 +0,0 @@ -'use client' - -/** - * GPU Infrastructure Admin Page - * - * vast.ai GPU Management for LLM Processing - * Part of KI-Werkzeuge - */ - -import { useEffect, useState, useCallback } from 'react' -import { PagePurpose } from '@/components/common/PagePurpose' -import { AIToolsSidebarResponsive } from '@/components/ai/AIToolsSidebar' - -interface VastStatus { - instance_id: number | null - status: string - gpu_name: string | null - dph_total: number | null - endpoint_base_url: string | null - last_activity: string | null - auto_shutdown_in_minutes: number | null - total_runtime_hours: number | null - total_cost_usd: number | null - account_credit: number | null - account_total_spend: number | null - session_runtime_minutes: number | null - session_cost_usd: number | null - message: string | null - error?: string -} - -export default function GPUInfrastructurePage() { - const [status, setStatus] = useState(null) - const [loading, setLoading] = useState(true) - const [actionLoading, setActionLoading] = useState(null) - const [error, setError] = useState(null) - const [message, setMessage] = useState(null) - - const API_PROXY = '/api/admin/gpu' - - const fetchStatus = useCallback(async () => { - setLoading(true) - setError(null) - - try { - const response = await fetch(API_PROXY) - const data = await response.json() - - if (!response.ok) { - throw new Error(data.error || `HTTP ${response.status}`) - } - - setStatus(data) - } catch (err) { - setError(err instanceof Error ? err.message : 'Verbindungsfehler') - setStatus({ - instance_id: null, - status: 'error', - gpu_name: null, - dph_total: null, - endpoint_base_url: null, - last_activity: null, - auto_shutdown_in_minutes: null, - total_runtime_hours: null, - total_cost_usd: null, - account_credit: null, - account_total_spend: null, - session_runtime_minutes: null, - session_cost_usd: null, - message: 'Verbindung fehlgeschlagen' - }) - } finally { - setLoading(false) - } - }, []) - - useEffect(() => { - fetchStatus() - }, [fetchStatus]) - - useEffect(() => { - const interval = setInterval(fetchStatus, 30000) - return () => clearInterval(interval) - }, [fetchStatus]) - - const powerOn = async () => { - setActionLoading('on') - setError(null) - setMessage(null) - - try { - const response = await fetch(API_PROXY, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ action: 'on' }), - }) - - const data = await response.json() - - if (!response.ok) { - throw new Error(data.error || data.detail || 'Aktion fehlgeschlagen') - } - - setMessage('Start angefordert') - setTimeout(fetchStatus, 3000) - setTimeout(fetchStatus, 10000) - } catch (err) { - setError(err instanceof Error ? err.message : 'Fehler beim Starten') - fetchStatus() - } finally { - setActionLoading(null) - } - } - - const powerOff = async () => { - setActionLoading('off') - setError(null) - setMessage(null) - - try { - const response = await fetch(API_PROXY, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ action: 'off' }), - }) - - const data = await response.json() - - if (!response.ok) { - throw new Error(data.error || data.detail || 'Aktion fehlgeschlagen') - } - - setMessage('Stop angefordert') - setTimeout(fetchStatus, 3000) - setTimeout(fetchStatus, 10000) - } catch (err) { - setError(err instanceof Error ? err.message : 'Fehler beim Stoppen') - fetchStatus() - } finally { - setActionLoading(null) - } - } - - const getStatusBadge = (s: string) => { - const baseClasses = 'px-3 py-1 rounded-full text-sm font-semibold uppercase' - switch (s) { - case 'running': - return `${baseClasses} bg-green-100 text-green-800` - case 'stopped': - case 'exited': - return `${baseClasses} bg-red-100 text-red-800` - case 'loading': - case 'scheduling': - case 'creating': - case 'starting...': - case 'stopping...': - return `${baseClasses} bg-yellow-100 text-yellow-800` - default: - return `${baseClasses} bg-slate-100 text-slate-600` - } - } - - const getCreditColor = (credit: number | null) => { - if (credit === null) return 'text-slate-500' - if (credit < 5) return 'text-red-600' - if (credit < 15) return 'text-yellow-600' - return 'text-green-600' - } - - return ( -
- {/* Page Purpose */} - - - {/* KI-Werkzeuge Sidebar */} - - - {/* Status Cards */} -
-
-
-
Status
- {loading ? ( - - Laden... - - ) : ( - - {actionLoading === 'on' ? 'starting...' : - actionLoading === 'off' ? 'stopping...' : - status?.status || 'unbekannt'} - - )} -
- -
-
GPU
-
- {status?.gpu_name || '-'} -
-
- -
-
Kosten/h
-
- {status?.dph_total ? `$${status.dph_total.toFixed(3)}` : '-'} -
-
- -
-
Auto-Stop
-
- {status && status.auto_shutdown_in_minutes !== null - ? `${status.auto_shutdown_in_minutes} min` - : '-'} -
-
- -
-
Budget
-
- {status && status.account_credit !== null - ? `$${status.account_credit.toFixed(2)}` - : '-'} -
-
- -
-
Session
-
- {status && status.session_runtime_minutes !== null && status.session_cost_usd !== null - ? `${Math.round(status.session_runtime_minutes)} min / $${status.session_cost_usd.toFixed(3)}` - : '-'} -
-
-
- - {/* Buttons */} -
- - - - - {message && ( - {message} - )} - {error && ( - {error} - )} -
-
- - {/* Extended Stats */} -
-
-

Kosten-Uebersicht

-
-
- Session Laufzeit - - {status && status.session_runtime_minutes !== null - ? `${Math.round(status.session_runtime_minutes)} Minuten` - : '-'} - -
-
- Session Kosten - - {status && status.session_cost_usd !== null - ? `$${status.session_cost_usd.toFixed(4)}` - : '-'} - -
-
- Gesamtlaufzeit - - {status && status.total_runtime_hours !== null - ? `${status.total_runtime_hours.toFixed(1)} Stunden` - : '-'} - -
-
- Gesamtkosten - - {status && status.total_cost_usd !== null - ? `$${status.total_cost_usd.toFixed(2)}` - : '-'} - -
-
- vast.ai Ausgaben - - {status && status.account_total_spend !== null - ? `$${status.account_total_spend.toFixed(2)}` - : '-'} - -
-
-
- -
-

Instanz-Details

-
-
- Instanz ID - - {status?.instance_id || '-'} - -
-
- GPU - - {status?.gpu_name || '-'} - -
-
- Stundensatz - - {status?.dph_total ? `$${status.dph_total.toFixed(4)}/h` : '-'} - -
-
- Letzte Aktivitaet - - {status?.last_activity - ? new Date(status.last_activity).toLocaleString('de-DE') - : '-'} - -
- {status?.endpoint_base_url && status.status === 'running' && ( -
-
Endpoint
- - {status.endpoint_base_url} - -
- )} -
-
-
- - {/* Info */} -
-
- - - -
-

Auto-Shutdown

-

- Die GPU-Instanz wird automatisch gestoppt, wenn sie laengere Zeit inaktiv ist. - Der Status wird alle 30 Sekunden automatisch aktualisiert. -

-
-
-
-
- ) -} diff --git a/admin-lehrer/app/(admin)/ai/model-management/page.tsx b/admin-lehrer/app/(admin)/ai/model-management/page.tsx deleted file mode 100644 index a14b817..0000000 --- a/admin-lehrer/app/(admin)/ai/model-management/page.tsx +++ /dev/null @@ -1,549 +0,0 @@ -'use client' - -/** - * Model Management Page - * - * Manage ML model backends (PyTorch vs ONNX), view status, - * run benchmarks, and configure inference settings. - */ - -import { useState, useEffect, useCallback } from 'react' -import { PagePurpose } from '@/components/common/PagePurpose' - -const KLAUSUR_API = '/klausur-api' - -// --------------------------------------------------------------------------- -// Types -// --------------------------------------------------------------------------- - -type BackendMode = 'auto' | 'pytorch' | 'onnx' -type ModelStatus = 'available' | 'not_found' | 'loading' | 'error' -type Tab = 'overview' | 'benchmarks' | 'configuration' - -interface ModelInfo { - name: string - key: string - pytorch: { status: ModelStatus; size_mb: number; ram_mb: number } - onnx: { status: ModelStatus; size_mb: number; ram_mb: number; quantized: boolean } -} - -interface BenchmarkRow { - model: string - backend: string - quantization: string - size_mb: number - ram_mb: number - inference_ms: number - load_time_s: number -} - -interface StatusInfo { - active_backend: BackendMode - loaded_models: string[] - cache_hits: number - cache_misses: number - uptime_s: number -} - -// --------------------------------------------------------------------------- -// Mock data (used when backend is not available) -// --------------------------------------------------------------------------- - -const MOCK_MODELS: ModelInfo[] = [ - { - name: 'TrOCR Printed', - key: 'trocr_printed', - pytorch: { status: 'available', size_mb: 892, ram_mb: 1800 }, - onnx: { status: 'available', size_mb: 234, ram_mb: 620, quantized: true }, - }, - { - name: 'TrOCR Handwritten', - key: 'trocr_handwritten', - pytorch: { status: 'available', size_mb: 892, ram_mb: 1800 }, - onnx: { status: 'not_found', size_mb: 0, ram_mb: 0, quantized: false }, - }, - { - name: 'PP-DocLayout', - key: 'pp_doclayout', - pytorch: { status: 'not_found', size_mb: 0, ram_mb: 0 }, - onnx: { status: 'available', size_mb: 48, ram_mb: 180, quantized: false }, - }, -] - -const MOCK_BENCHMARKS: BenchmarkRow[] = [ - { model: 'TrOCR Printed', backend: 'PyTorch', quantization: 'FP32', size_mb: 892, ram_mb: 1800, inference_ms: 142, load_time_s: 3.2 }, - { model: 'TrOCR Printed', backend: 'ONNX', quantization: 'INT8', size_mb: 234, ram_mb: 620, inference_ms: 38, load_time_s: 0.8 }, - { model: 'TrOCR Handwritten', backend: 'PyTorch', quantization: 'FP32', size_mb: 892, ram_mb: 1800, inference_ms: 156, load_time_s: 3.4 }, - { model: 'PP-DocLayout', backend: 'ONNX', quantization: 'FP32', size_mb: 48, ram_mb: 180, inference_ms: 22, load_time_s: 0.3 }, -] - -const MOCK_STATUS: StatusInfo = { - active_backend: 'auto', - loaded_models: ['trocr_printed (ONNX)', 'pp_doclayout (ONNX)'], - cache_hits: 1247, - cache_misses: 83, - uptime_s: 86400, -} - -// --------------------------------------------------------------------------- -// Helpers -// --------------------------------------------------------------------------- - -function StatusBadge({ status }: { status: ModelStatus }) { - const cls = - status === 'available' - ? 'bg-emerald-100 text-emerald-800 border-emerald-200' - : status === 'loading' - ? 'bg-blue-100 text-blue-800 border-blue-200' - : status === 'not_found' - ? 'bg-slate-100 text-slate-500 border-slate-200' - : 'bg-red-100 text-red-800 border-red-200' - const label = - status === 'available' ? 'Verfuegbar' - : status === 'loading' ? 'Laden...' - : status === 'not_found' ? 'Nicht vorhanden' - : 'Fehler' - return ( - - {label} - - ) -} - -function formatBytes(mb: number) { - if (mb === 0) return '--' - if (mb >= 1000) return `${(mb / 1000).toFixed(1)} GB` - return `${mb} MB` -} - -function formatUptime(seconds: number) { - const h = Math.floor(seconds / 3600) - const m = Math.floor((seconds % 3600) / 60) - if (h > 0) return `${h}h ${m}m` - return `${m}m` -} - -// --------------------------------------------------------------------------- -// Component -// --------------------------------------------------------------------------- - -export default function ModelManagementPage() { - const [tab, setTab] = useState('overview') - const [models, setModels] = useState(MOCK_MODELS) - const [benchmarks, setBenchmarks] = useState(MOCK_BENCHMARKS) - const [status, setStatus] = useState(MOCK_STATUS) - const [backend, setBackend] = useState('auto') - const [saving, setSaving] = useState(false) - const [benchmarkRunning, setBenchmarkRunning] = useState(false) - const [usingMock, setUsingMock] = useState(false) - - // Load status - const loadStatus = useCallback(async () => { - try { - const res = await fetch(`${KLAUSUR_API}/api/v1/models/status`) - if (res.ok) { - const data = await res.json() - setStatus(data) - setBackend(data.active_backend || 'auto') - setUsingMock(false) - } else { - setUsingMock(true) - } - } catch { - setUsingMock(true) - } - }, []) - - // Load models - const loadModels = useCallback(async () => { - try { - const res = await fetch(`${KLAUSUR_API}/api/v1/models`) - if (res.ok) { - const data = await res.json() - if (data.models?.length) setModels(data.models) - } - } catch { - // Keep mock data - } - }, []) - - // Load benchmarks - const loadBenchmarks = useCallback(async () => { - try { - const res = await fetch(`${KLAUSUR_API}/api/v1/models/benchmarks`) - if (res.ok) { - const data = await res.json() - if (data.benchmarks?.length) setBenchmarks(data.benchmarks) - } - } catch { - // Keep mock data - } - }, []) - - useEffect(() => { - loadStatus() - loadModels() - loadBenchmarks() - }, [loadStatus, loadModels, loadBenchmarks]) - - // Save backend preference - const saveBackend = async (mode: BackendMode) => { - setBackend(mode) - setSaving(true) - try { - await fetch(`${KLAUSUR_API}/api/v1/models/backend`, { - method: 'PUT', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ backend: mode }), - }) - await loadStatus() - } catch { - // Silently handle — mock mode - } finally { - setSaving(false) - } - } - - // Run benchmark - const runBenchmark = async () => { - setBenchmarkRunning(true) - try { - const res = await fetch(`${KLAUSUR_API}/api/v1/models/benchmark`, { - method: 'POST', - }) - if (res.ok) { - const data = await res.json() - if (data.benchmarks?.length) setBenchmarks(data.benchmarks) - } - await loadBenchmarks() - } catch { - // Keep existing data - } finally { - setBenchmarkRunning(false) - } - } - - const tabs: { key: Tab; label: string }[] = [ - { key: 'overview', label: 'Uebersicht' }, - { key: 'benchmarks', label: 'Benchmarks' }, - { key: 'configuration', label: 'Konfiguration' }, - ] - - return ( -
-
- - - {/* Header */} -
-
-

Model Management

-

- {models.length} Modelle konfiguriert - {usingMock && ( - - Mock-Daten (Backend nicht erreichbar) - - )} -

-
-
- - {/* Status Cards */} -
-
-

Aktives Backend

-

{status.active_backend.toUpperCase()}

-
-
-

Geladene Modelle

-

{status.loaded_models.length}

-
-
-

Cache Hit-Rate

-

- {status.cache_hits + status.cache_misses > 0 - ? `${((status.cache_hits / (status.cache_hits + status.cache_misses)) * 100).toFixed(1)}%` - : '--'} -

-
-
-

Uptime

-

{formatUptime(status.uptime_s)}

-
-
- - {/* Tabs */} -
- -
- - {/* Overview Tab */} - {tab === 'overview' && ( -
-

Verfuegbare Modelle

-
- {models.map(m => ( -
-
-

{m.name}

-

{m.key}

-
-
- {/* PyTorch */} -
-
- PyTorch - -
- {m.pytorch.status === 'available' && ( - - {formatBytes(m.pytorch.size_mb)} / {formatBytes(m.pytorch.ram_mb)} RAM - - )} -
- {/* ONNX */} -
-
- ONNX - -
- {m.onnx.status === 'available' && ( - - {formatBytes(m.onnx.size_mb)} / {formatBytes(m.onnx.ram_mb)} RAM - {m.onnx.quantized && ( - INT8 - )} - - )} -
-
-
- ))} -
- - {/* Loaded Models List */} - {status.loaded_models.length > 0 && ( -
-

Aktuell geladen

-
- {status.loaded_models.map((m, i) => ( - - {m} - - ))} -
-
- )} -
- )} - - {/* Benchmarks Tab */} - {tab === 'benchmarks' && ( -
-
-

PyTorch vs ONNX Vergleich

- -
- -
-
- - - - - - - - - - - - - - {benchmarks.map((b, i) => ( - - - - - - - - - - ))} - -
ModellBackendQuantisierungGroesseRAMInferenzLadezeit
{b.model} - - {b.backend} - - {b.quantization}{formatBytes(b.size_mb)}{formatBytes(b.ram_mb)} - - {b.inference_ms} ms - - {b.load_time_s.toFixed(1)}s
-
-
- - {benchmarks.length === 0 && ( -
-

Keine Benchmark-Daten

-

Klicken Sie "Benchmark starten" um einen Vergleich durchzufuehren.

-
- )} -
- )} - - {/* Configuration Tab */} - {tab === 'configuration' && ( -
- {/* Backend Selector */} -
-

Inference Backend

-

- Waehlen Sie welches Backend fuer die Modell-Inferenz verwendet werden soll. -

-
- {([ - { - mode: 'auto' as const, - label: 'Auto', - desc: 'ONNX wenn verfuegbar, Fallback auf PyTorch.', - }, - { - mode: 'pytorch' as const, - label: 'PyTorch', - desc: 'Immer PyTorch verwenden. Hoeherer RAM-Verbrauch, volle Flexibilitaet.', - }, - { - mode: 'onnx' as const, - label: 'ONNX', - desc: 'Immer ONNX verwenden. Schneller und weniger RAM, Fehler wenn nicht vorhanden.', - }, - ] as const).map(opt => ( - - ))} -
- {saving && ( -

Speichere...

- )} -
- - {/* Model Details Table */} -
-

Modell-Details

-
- - - - - - - - - - - - - {models.map(m => { - const ptAvail = m.pytorch.status === 'available' - const oxAvail = m.onnx.status === 'available' - const savings = ptAvail && oxAvail && m.pytorch.size_mb > 0 - ? Math.round((1 - m.onnx.size_mb / m.pytorch.size_mb) * 100) - : null - return ( - - - - - - - - - ) - })} - -
ModellPyTorchGroesse (PT)ONNXGroesse (ONNX)Einsparung
{m.name}{ptAvail ? formatBytes(m.pytorch.size_mb) : '--'}{oxAvail ? formatBytes(m.onnx.size_mb) : '--'} - {savings !== null ? ( - -{savings}% - ) : ( - -- - )} -
-
-
-
- )} -
-
- ) -} diff --git a/admin-lehrer/app/(admin)/ai/ocr-compare/page.tsx b/admin-lehrer/app/(admin)/ai/ocr-compare/page.tsx deleted file mode 100644 index 3828bd5..0000000 --- a/admin-lehrer/app/(admin)/ai/ocr-compare/page.tsx +++ /dev/null @@ -1,1637 +0,0 @@ -'use client' - -/** - * OCR Comparison Tool - * - * Zeigt Original-PDF neben den Extraktionsergebnissen von verschiedenen OCR-Methoden. - * Ermoeglicht direkten visuellen Vergleich mit voller Breite. - * Bietet Session-Historie fuer Verbesserungsvergleiche. - */ - -import { useState, useEffect, useCallback, useMemo } from 'react' -import { PagePurpose } from '@/components/common/PagePurpose' -import { AIToolsSidebarResponsive } from '@/components/ai/AIToolsSidebar' -import { QRCodeUpload, UploadedFile } from '@/components/QRCodeUpload' -import { GridOverlay, GridStats, GridLegend, CellCorrectionDialog, BlockReviewPanel, BlockReviewSummary, getCellBlockNumber, GroundTruthPanel } from '@/components/ocr' -import type { GridData, GridCell, BlockReviewData, BlockStatus } from '@/components/ocr' - -interface VocabEntry { - english: string - german: string - example?: string -} - -interface MethodResult { - name: string - model: string - duration_seconds: number - vocabulary_count: number - vocabulary: VocabEntry[] - confidence: number - error?: string - success: boolean -} - -interface ComparisonResult { - session_id: string - page_number: number - methods: Record - comparison: { - found_by_all_methods: Array<{ english: string; german: string; methods: string[] }> - found_by_some_methods: Array<{ english: string; german: string; methods: string[] }> - total_unique_vocabulary: number - agreement_rate: number - } - recommendation: { - best_method: string - reason: string - } -} - -interface SessionInfo { - id: string - name: string - created_at: string - page_count?: number -} - -// OCR-Methoden Konfiguration -const OCR_METHODS = { - local_llm: { - id: 'local_llm', - name: 'Loesung A: Lokales 32B LLM', - shortName: 'A: Local LLM', - model: 'qwen2.5:32b extern', - color: 'slate', - description: 'Externes 32B LLM', - enabled: true, - }, - vision_llm: { - id: 'vision_llm', - name: 'Loesung B: Vision LLM', - shortName: 'B: Vision LLM', - model: 'qwen2.5vl:32b', - color: 'blue', - description: 'Direkte Bild-zu-Text Extraktion', - enabled: true, - }, - paddleocr: { - id: 'paddleocr', - name: 'Loesung C: PaddleOCR', - shortName: 'C: PaddleOCR', - model: 'paddleocr (x86)', - color: 'red', - description: 'Aktuell deaktiviert (Rosetta)', - enabled: false, - }, - tesseract: { - id: 'tesseract', - name: 'Loesung D: Tesseract', - shortName: 'D: Tesseract', - model: 'tesseract + qwen2.5:14b', - color: 'purple', - description: 'ARM64-nativ, Standard', - enabled: true, - }, - cv_pipeline: { - id: 'cv_pipeline', - name: 'Loesung E: Document Reconstruction', - shortName: 'E: Doc Recon', - model: 'opencv + tesseract (multi-pass)', - color: 'green', - description: 'CV-Pipeline: Deskew, Dewarp, Binarisierung, Multi-Pass OCR', - enabled: true, - }, -} - -export default function OCRComparePage() { - // Session State - const [sessionId, setSessionId] = useState(null) - const [pageCount, setPageCount] = useState(0) - const [selectedPage, setSelectedPage] = useState(0) - const [thumbnails, setThumbnails] = useState([]) - const [loadingThumbnails, setLoadingThumbnails] = useState(false) - - // Session History - const [sessions, setSessions] = useState([]) - const [loadingSessions, setLoadingSessions] = useState(false) - const [showHistory, setShowHistory] = useState(false) - - // Comparison State - const [comparing, setComparing] = useState(false) - const [result, setResult] = useState(null) - const [error, setError] = useState(null) - const [uploading, setUploading] = useState(false) - - // Method Selection - const [selectedMethods, setSelectedMethods] = useState(['vision_llm', 'tesseract', 'cv_pipeline']) - - // QR Upload State - const [showQRModal, setShowQRModal] = useState(false) - const [qrUploadSessionId, setQrUploadSessionId] = useState('') - const [mobileUploadedFiles, setMobileUploadedFiles] = useState([]) - - // View Mode State - const [isFullscreen, setIsFullscreen] = useState(false) - const [expandedMethod, setExpandedMethod] = useState(null) // For single document view - const [visibleMethods, setVisibleMethods] = useState([]) // For custom multi-column view - - // Grid Detection State - const [gridData, setGridData] = useState(null) - const [analyzingGrid, setAnalyzingGrid] = useState(false) - const [showGridOverlay, setShowGridOverlay] = useState(true) - const [selectedCell, setSelectedCell] = useState(null) - const [showCellDialog, setShowCellDialog] = useState(false) - const [showMmGrid, setShowMmGrid] = useState(false) - const [showTextAtPosition, setShowTextAtPosition] = useState(false) - const [editableText, setEditableText] = useState(false) - - // Block Review State - const [blockReviewMode, setBlockReviewMode] = useState(false) - const [currentBlockNumber, setCurrentBlockNumber] = useState(1) - const [blockReviewData, setBlockReviewData] = useState>({}) - - // Export State - const [isExporting, setIsExporting] = useState(false) - const [exportSuccess, setExportSuccess] = useState(false) - - // Tab State (compare vs ground truth) - const [activeTab, setActiveTab] = useState<'compare' | 'groundtruth'>('compare') - - const KLAUSUR_API = '/klausur-api' - - // Load session history - const loadSessions = useCallback(async () => { - setLoadingSessions(true) - try { - const res = await fetch(`${KLAUSUR_API}/api/v1/vocab/sessions`) - if (res.ok) { - const data = await res.json() - // Filter to only show OCR Vergleich sessions and sort by date - const ocrSessions = (data.sessions || data || []) - .filter((s: SessionInfo) => s.name?.includes('OCR Vergleich')) - .sort((a: SessionInfo, b: SessionInfo) => - new Date(b.created_at).getTime() - new Date(a.created_at).getTime() - ) - .slice(0, 20) // Limit to 20 most recent - setSessions(ocrSessions) - } - } catch (e) { - console.error('Failed to load sessions:', e) - } finally { - setLoadingSessions(false) - } - }, []) - - // Initialize and restore session - useEffect(() => { - loadSessions() - - let sid = localStorage.getItem('ocr-compare-upload-session') - if (!sid) { - sid = `ocr-compare-${Date.now()}-${Math.random().toString(36).substr(2, 9)}` - localStorage.setItem('ocr-compare-upload-session', sid) - } - setQrUploadSessionId(sid) - - // Restore last active session if available - const lastSessionId = localStorage.getItem('ocr-compare-active-session') - if (lastSessionId) { - // Load the session data - fetch(`${KLAUSUR_API}/api/v1/vocab/sessions/${lastSessionId}`) - .then(res => { - if (res.ok) return res.json() - throw new Error('Session not found') - }) - .then(data => { - setSessionId(lastSessionId) - setPageCount(data.page_count || 1) - setSelectedPage(0) - loadAllThumbnails(lastSessionId, data.page_count || 1) - }) - .catch(() => { - // Session no longer exists, clear localStorage - localStorage.removeItem('ocr-compare-active-session') - }) - } - }, [loadSessions]) - - // ESC key to exit fullscreen - useEffect(() => { - const handleKeyDown = (e: KeyboardEvent) => { - if (e.key === 'Escape') { - if (expandedMethod) { - setExpandedMethod(null) - } else if (isFullscreen) { - setIsFullscreen(false) - } - } - } - window.addEventListener('keydown', handleKeyDown) - return () => window.removeEventListener('keydown', handleKeyDown) - }, [isFullscreen, expandedMethod]) - - // Load a session from history - const loadSession = async (session: SessionInfo) => { - setSessionId(session.id) - localStorage.setItem('ocr-compare-active-session', session.id) - setResult(null) - setThumbnails([]) - - try { - // Get session details - const res = await fetch(`${KLAUSUR_API}/api/v1/vocab/sessions/${session.id}`) - if (res.ok) { - const data = await res.json() - setPageCount(data.page_count || 1) - setSelectedPage(0) - - // Load thumbnails - await loadAllThumbnails(session.id, data.page_count || 1) - } - } catch (e) { - setError('Session konnte nicht geladen werden') - } - } - - // Handle mobile file upload - const handleMobileFile = useCallback(async (file: UploadedFile) => { - if (!file.dataUrl) return - - setUploading(true) - setError(null) - setResult(null) - setThumbnails([]) - - try { - // Create session - const sessionRes = await fetch(`${KLAUSUR_API}/api/v1/vocab/sessions`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ name: `OCR Vergleich - ${file.name}` }) - }) - - if (!sessionRes.ok) throw new Error('Session konnte nicht erstellt werden') - const sessionData = await sessionRes.json() - setSessionId(sessionData.id) - localStorage.setItem('ocr-compare-active-session', sessionData.id) - - // Convert dataUrl to blob and upload - const response = await fetch(file.dataUrl) - const blob = await response.blob() - - const formData = new FormData() - formData.append('file', blob, file.name) - - const uploadRes = await fetch( - `${KLAUSUR_API}/api/v1/vocab/sessions/${sessionData.id}/upload-pdf-info`, - { method: 'POST', body: formData } - ) - - if (!uploadRes.ok) throw new Error('PDF Upload fehlgeschlagen') - const uploadData = await uploadRes.json() - setPageCount(uploadData.page_count || 1) - setSelectedPage(0) - - // Load thumbnails - await loadAllThumbnails(sessionData.id, uploadData.page_count || 1) - - // Refresh session list - loadSessions() - - } catch (err) { - setError(err instanceof Error ? err.message : 'Upload fehlgeschlagen') - } finally { - setUploading(false) - } - }, [loadSessions]) - - // Watch for new mobile files - useEffect(() => { - if (mobileUploadedFiles.length > 0) { - const latestFile = mobileUploadedFiles[mobileUploadedFiles.length - 1] - handleMobileFile(latestFile) - setShowQRModal(false) - } - }, [mobileUploadedFiles, handleMobileFile]) - - const handleFileUpload = async (e: React.ChangeEvent) => { - const file = e.target.files?.[0] - if (!file) return - - setUploading(true) - setError(null) - setResult(null) - setThumbnails([]) - - try { - // Create session - const sessionRes = await fetch(`${KLAUSUR_API}/api/v1/vocab/sessions`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ name: `OCR Vergleich - ${file.name}` }) - }) - - if (!sessionRes.ok) throw new Error('Session konnte nicht erstellt werden') - const sessionData = await sessionRes.json() - setSessionId(sessionData.id) - localStorage.setItem('ocr-compare-active-session', sessionData.id) - - // Upload PDF - const formData = new FormData() - formData.append('file', file) - - const uploadRes = await fetch( - `${KLAUSUR_API}/api/v1/vocab/sessions/${sessionData.id}/upload-pdf-info`, - { method: 'POST', body: formData } - ) - - if (!uploadRes.ok) throw new Error('PDF Upload fehlgeschlagen') - const uploadData = await uploadRes.json() - setPageCount(uploadData.page_count || 1) - setSelectedPage(0) - - // Load all thumbnails - await loadAllThumbnails(sessionData.id, uploadData.page_count || 1) - - // Refresh session list - loadSessions() - - } catch (err) { - setError(err instanceof Error ? err.message : 'Upload fehlgeschlagen') - } finally { - setUploading(false) - } - } - - const loadAllThumbnails = async (sid: string, count: number) => { - setLoadingThumbnails(true) - const thumbs: string[] = [] - - for (let i = 0; i < count; i++) { - try { - const res = await fetch(`${KLAUSUR_API}/api/v1/vocab/sessions/${sid}/pdf-thumbnail/${i}?hires=true`) - if (res.ok) { - const blob = await res.blob() - thumbs.push(URL.createObjectURL(blob)) - } else { - thumbs.push('') - } - } catch { - thumbs.push('') - } - } - - setThumbnails(thumbs) - setLoadingThumbnails(false) - } - - const toggleMethod = (methodId: string) => { - setSelectedMethods(prev => - prev.includes(methodId) - ? prev.filter(m => m !== methodId) - : [...prev, methodId] - ) - } - - const runComparison = async () => { - if (!sessionId || selectedMethods.length === 0) return - - setComparing(true) - setError(null) - - try { - const res = await fetch( - `${KLAUSUR_API}/api/v1/vocab/sessions/${sessionId}/compare-ocr/${selectedPage}`, - { method: 'POST' } - ) - - if (!res.ok) throw new Error(`Vergleich fehlgeschlagen: ${res.status}`) - const data = await res.json() - setResult(data) - - } catch (err) { - setError(err instanceof Error ? err.message : 'Vergleich fehlgeschlagen') - } finally { - setComparing(false) - } - } - - // Grid Analysis - const analyzeGrid = async () => { - if (!sessionId) return - - setAnalyzingGrid(true) - setError(null) - - try { - const res = await fetch( - `${KLAUSUR_API}/api/v1/vocab/sessions/${sessionId}/analyze-grid/${selectedPage}`, - { method: 'POST' } - ) - - if (!res.ok) throw new Error(`Grid-Analyse fehlgeschlagen: ${res.status}`) - const data = await res.json() - - if (data.success && data.grid) { - setGridData(data.grid) - } else { - setError(data.error || 'Grid-Erkennung fehlgeschlagen') - } - } catch (err) { - setError(err instanceof Error ? err.message : 'Grid-Analyse fehlgeschlagen') - } finally { - setAnalyzingGrid(false) - } - } - - // Handle cell click for correction - const handleCellClick = useCallback((cell: GridCell) => { - setSelectedCell(cell) - setShowCellDialog(true) - }, []) - - // Handle cell save - const handleCellSave = useCallback((text: string) => { - if (!gridData || !selectedCell) return - - // Update local grid data - const updatedCells = gridData.cells.map(row => - row.map(cell => - cell.row === selectedCell.row && cell.col === selectedCell.col - ? { ...cell, text, status: 'manual' as const, confidence: 1.0 } - : cell - ) - ) - - // Recalculate stats - const recognized = updatedCells.flat().filter(c => c.status === 'recognized').length - const manual = updatedCells.flat().filter(c => c.status === 'manual').length - const problematic = updatedCells.flat().filter(c => c.status === 'problematic').length - const total = updatedCells.flat().length - - setGridData({ - ...gridData, - cells: updatedCells, - stats: { - ...gridData.stats, - recognized, - manual, - problematic, - empty: total - recognized - manual - problematic, - coverage: (recognized + manual) / total - } - }) - - setShowCellDialog(false) - setSelectedCell(null) - }, [gridData, selectedCell]) - - // Block Review Handlers - const handleBlockApprove = useCallback((blockNumber: number, methodId: string, text: string) => { - if (!gridData) return - - const cell = gridData.cells.flat().find(c => getCellBlockNumber(c, gridData) === blockNumber) - if (!cell) return - - setBlockReviewData(prev => ({ - ...prev, - [blockNumber]: { - blockNumber, - cell, - methodResults: [], - status: 'approved' as BlockStatus, - correctedText: text, - approvedMethodId: methodId, - } - })) - }, [gridData]) - - const handleBlockCorrect = useCallback((blockNumber: number, correctedText: string) => { - if (!gridData) return - - const cell = gridData.cells.flat().find(c => getCellBlockNumber(c, gridData) === blockNumber) - if (!cell) return - - setBlockReviewData(prev => ({ - ...prev, - [blockNumber]: { - blockNumber, - cell, - methodResults: [], - status: 'corrected' as BlockStatus, - correctedText, - } - })) - }, [gridData]) - - const handleBlockSkip = useCallback((blockNumber: number) => { - if (!gridData) return - - const cell = gridData.cells.flat().find(c => getCellBlockNumber(c, gridData) === blockNumber) - if (!cell) return - - setBlockReviewData(prev => ({ - ...prev, - [blockNumber]: { - blockNumber, - cell, - methodResults: [], - status: 'skipped' as BlockStatus, - } - })) - }, [gridData]) - - // Start block review mode - const startBlockReview = useCallback(() => { - if (!gridData) return - - // Find first non-empty block - const firstBlock = gridData.cells.flat().find(c => c.status !== 'empty') - if (firstBlock) { - setCurrentBlockNumber(getCellBlockNumber(firstBlock, gridData)) - setBlockReviewMode(true) - } - }, [gridData]) - - // Export to Worksheet Editor - const handleExportToEditor = useCallback(async () => { - if (!gridData || !sessionId) return - - setIsExporting(true) - setExportSuccess(false) - - try { - // Convert grid cells (percent coordinates) to mm for A4 - const A4_WIDTH_MM = 210 - const A4_HEIGHT_MM = 297 - - const words = gridData.cells.flat() - .filter(cell => cell.status !== 'empty' && cell.text) - .map(cell => ({ - text: cell.text, - x_mm: (cell.x / 100) * A4_WIDTH_MM, - y_mm: (cell.y / 100) * A4_HEIGHT_MM, - width_mm: (cell.width / 100) * A4_WIDTH_MM, - height_mm: (cell.height / 100) * A4_HEIGHT_MM, - column_type: cell.column_type || 'unknown', - logical_row: cell.row, - confidence: cell.confidence, - })) - - const detectedColumns = gridData.column_types.map((type, idx) => ({ - column_type: type, - x_start_mm: (gridData.column_boundaries[idx] / 100) * A4_WIDTH_MM, - x_end_mm: (gridData.column_boundaries[idx + 1] / 100) * A4_WIDTH_MM, - })) - - const exportData = { - version: '1.0', - source: 'ocr-compare', - exported_at: new Date().toISOString(), - session_id: sessionId, - page_number: selectedPage + 1, - page_dimensions: { - width_mm: A4_WIDTH_MM, - height_mm: A4_HEIGHT_MM, - format: 'A4', - }, - words, - detected_columns: detectedColumns, - } - - const res = await fetch( - `${KLAUSUR_API}/api/v1/vocab/sessions/${sessionId}/ocr-export/${selectedPage + 1}`, - { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify(exportData), - } - ) - - if (res.ok) { - setExportSuccess(true) - setTimeout(() => setExportSuccess(false), 3000) - } - } catch (e) { - console.error('Export failed:', e) - } finally { - setIsExporting(false) - } - }, [gridData, sessionId, selectedPage, KLAUSUR_API]) - - // Count non-empty blocks - const nonEmptyBlockCount = useMemo(() => { - if (!gridData) return 0 - return gridData.cells.flat().filter(c => c.status !== 'empty').length - }, [gridData]) - - const VocabList = ({ vocab, highlight }: { vocab: VocabEntry[]; highlight?: Set }) => ( -
- {vocab.map((v, idx) => { - const key = `${v.english}|${v.german}` - const isUnique = highlight?.has(key) - return ( -
-
{v.english}
-
{v.german}
- {v.example && ( -
{v.example}
- )} -
- ) - })} -
- ) - - const getUniqueVocab = (methodKey: string): Set => { - if (!result?.comparison?.found_by_some_methods) return new Set() - const unique = new Set() - result.comparison.found_by_some_methods.forEach(v => { - if (v.methods.includes(methodKey) && v.methods.length === 1) { - unique.add(`${v.english}|${v.german}`) - } - }) - return unique - } - - const getMethodColor = (color: string, type: 'bg' | 'border' | 'text') => { - const colors: Record> = { - slate: { bg: 'bg-slate-50', border: 'border-slate-300', text: 'text-slate-700' }, - blue: { bg: 'bg-blue-50', border: 'border-blue-300', text: 'text-blue-700' }, - red: { bg: 'bg-red-50', border: 'border-red-300', text: 'text-red-700' }, - purple: { bg: 'bg-purple-50', border: 'border-purple-300', text: 'text-purple-700' }, - green: { bg: 'bg-green-50', border: 'border-green-300', text: 'text-green-700' }, - } - return colors[color]?.[type] || colors.slate[type] - } - - // Anzahl der ausgewaehlten Methoden + 1 fuer das Original - const columnCount = selectedMethods.length + 1 - - return ( -
- - - {/* KI-Werkzeuge Sidebar */} - - -
- {/* Left Sidebar: Upload & History */} -
- {/* Upload Section */} -
-

PDF hochladen

- - - - - - {uploading && ( -
- - - - - Wird hochgeladen... -
- )} - - {error && ( -
- {error} -
- )} -
- - {/* Session History Panel */} -
- - - {showHistory && ( -
- {loadingSessions ? ( -
- - - - - Lade Sessions... -
- ) : sessions.length === 0 ? ( -
- Keine Sessions vorhanden -
- ) : ( - sessions.map(session => ( - - )) - )} -
- )} -
- - {/* Method Selection */} - {sessionId && pageCount > 0 && ( -
-

OCR-Methoden

- -
- {Object.values(OCR_METHODS).map(method => ( - - ))} -
- -
- - - {/* Grid Analysis Button */} - -
- - {/* Grid Overlay Toggle */} - {gridData && ( -
- - - - - - - {showTextAtPosition && ( - - )} - - {/* Block Review Button */} - {result && nonEmptyBlockCount > 0 && ( - - )} - - {/* Export to Editor Button */} - -
- )} -
- )} - - {/* Grid Stats */} - {gridData && ( -
-

Grid-Erkennung

- -
- -
-
- )} - - {/* Block Review Summary */} - {blockReviewMode && gridData && Object.keys(blockReviewData).length > 0 && ( -
- setCurrentBlockNumber(blockNumber)} - /> -
- )} -
- - {/* Main Content Area */} -
- {/* Page Thumbnails Grid */} - {sessionId && pageCount > 0 && ( -
-

- Seite auswaehlen ({pageCount} Seiten) -

- - {loadingThumbnails ? ( -
- - - - - Lade Seitenvorschau... -
- ) : ( -
- {thumbnails.map((thumb, idx) => ( - - ))} -
- )} -
- )} - - {/* Tab Bar */} - {sessionId && pageCount > 0 && ( -
- - -
- )} - - {/* Ground Truth Panel */} - {activeTab === 'groundtruth' && sessionId && ( - - )} - - {/* Full-Width Comparison View */} - {activeTab === 'compare' && (thumbnails[selectedPage] || result) && sessionId && ( -
- {/* Header with Controls */} -
-

- Vergleich - Seite {selectedPage + 1} -

- -
- {/* Layout Selector - only show after comparison */} - {result && ( -
- Ansicht: - {[1, 2, 3, 4].map(cols => ( - - ))} - -
- )} - - {/* Fullscreen Toggle */} - -
-
- - {/* Single Method Expanded View */} - {expandedMethod && ( -
- - - {expandedMethod === 'original' ? ( -
-
-

Original - Seite {selectedPage + 1}

-
-
- {thumbnails[selectedPage] ? ( - gridData && showGridOverlay ? ( - { - if (!gridData) return - const newCells = gridData.cells.map(row => - row.map(c => c.row === cell.row && c.col === cell.col - ? { ...c, text: newText, status: 'manual' as const } - : c - ) - ) - setGridData({ ...gridData, cells: newCells }) - }} - highlightedBlockNumber={blockReviewMode ? currentBlockNumber : null} - className={`rounded-lg border border-slate-200 overflow-hidden ${isFullscreen ? 'max-h-[80vh] mx-auto' : 'w-full max-w-2xl mx-auto'}`} - /> - ) : ( - {`Seite - ) - ) : ( -
- Kein Bild verfuegbar -
- )} -
-
- ) : ( - (() => { - const method = OCR_METHODS[expandedMethod as keyof typeof OCR_METHODS] - const methodResult = result?.methods?.[expandedMethod] - const isBest = result?.recommendation?.best_method === expandedMethod - return ( -
-
-
-

{method.name}

-

{method.model}

-
- {isBest && ( - - Beste Methode - - )} -
-
- {methodResult && ( -
-
-
-
-
{methodResult.duration_seconds}s
-
Dauer
-
-
-
{methodResult.vocabulary_count}
-
Vokabeln
-
-
-
{(methodResult.confidence * 100).toFixed(0)}%
-
Konfidenz
-
-
-
- {methodResult.vocabulary?.length > 0 && ( -
- -
- )} -
- )} -
-
- ) - })() - )} -
- )} - - {/* Grid View (Normal or Custom Selection) */} - {!expandedMethod && ( -
0 ? visibleMethods.length : columnCount - }, minmax(0, 1fr))` - }} - > - {/* Original PDF Column */} - {(visibleMethods.length === 0 || visibleMethods.includes('original')) && ( -
setExpandedMethod('original')} - > -
-
-

Original

-

Seite {selectedPage + 1}

-
- - - -
-
- {thumbnails[selectedPage] ? ( -
- {/* Show Grid Overlay if available */} - {gridData && showGridOverlay ? ( - { - if (!gridData) return - const newCells = gridData.cells.map(row => - row.map(c => c.row === cell.row && c.col === cell.col - ? { ...c, text: newText, status: 'manual' as const } - : c - ) - ) - setGridData({ ...gridData, cells: newCells }) - }} - highlightedBlockNumber={blockReviewMode ? currentBlockNumber : null} - className="rounded-lg border border-slate-200 overflow-hidden" - /> - ) : ( - {`Seite - )} -
- ) : ( -
- Kein Bild verfuegbar -
- )} -
-
- )} - - {/* Method Result Columns */} - {selectedMethods - .filter(methodId => visibleMethods.length === 0 || visibleMethods.includes(methodId)) - .map(methodId => { - const method = OCR_METHODS[methodId as keyof typeof OCR_METHODS] - const methodResult = result?.methods?.[methodId] - const isBest = result?.recommendation?.best_method === methodId - - return ( -
setExpandedMethod(methodId)} - > -
-
-

{method.shortName}

-

{method.model}

-
-
- {isBest && ( - - Beste - - )} - - - -
-
-
- {comparing && !methodResult && ( -
- - - - - Extrahiere... -
- )} - {methodResult && ( -
-
-
- Dauer: - {methodResult.duration_seconds}s -
-
- Vokabeln: - {methodResult.vocabulary_count} -
- {methodResult.error && ( -
{methodResult.error}
- )} -
- {methodResult.vocabulary?.length > 0 && ( - - )} -
- )} - {!comparing && !methodResult && ( -
- Noch keine Ergebnisse -
- )} -
-
- ) - })} -
- )} - - {/* Method Selector Chips (for custom view) */} - {result && visibleMethods.length > 0 && visibleMethods.length < selectedMethods.length + 1 && ( -
-
- Methoden ein-/ausblenden: - - {selectedMethods.map(methodId => { - const method = OCR_METHODS[methodId as keyof typeof OCR_METHODS] - return ( - - ) - })} -
-
- )} - - {/* Block Review Panel */} - {blockReviewMode && gridData && result && ( -
-
-

- - - - Block-Review -

-

- Prüfen Sie jeden Block und wählen Sie die korrekte Erkennung oder korrigieren Sie manuell. -

-
- -
- )} -
- )} - - {/* Comparison Summary */} - {activeTab === 'compare' && result?.comparison && ( -
-

Vergleichszusammenfassung

- -
-
-
- {result.comparison.total_unique_vocabulary} -
-
Gesamt eindeutig
-
-
-
- {result.comparison.found_by_all_methods?.length || 0} -
-
Von allen erkannt
-
-
-
- {result.comparison.found_by_some_methods?.length || 0} -
-
Unterschiede
-
-
-
- {(result.comparison.agreement_rate * 100).toFixed(0)}% -
-
Uebereinstimmung
-
-
- - {result.recommendation && ( -
-
- Empfehlung: - - {OCR_METHODS[result.recommendation.best_method as keyof typeof OCR_METHODS]?.name || result.recommendation.best_method} - -
-

{result.recommendation.reason}

-
- )} - - {result.comparison.found_by_some_methods?.length > 0 && ( -
-

- Unterschiede (gelb markiert): -

-
- {result.comparison.found_by_some_methods.map((v, idx) => ( -
- {v.english} = {v.german} - - (nur: {v.methods.join(', ')}) - -
- ))} -
-
- )} -
- )} - - {/* Empty State */} - {!sessionId && ( -
- - - -

PDF hochladen

-

- Laden Sie ein PDF hoch oder waehlen Sie eine Session aus der Historie, um OCR-Methoden zu vergleichen. -

-
- )} -
-
- - {/* QR Code Upload Modal */} - {showQRModal && ( -
-
setShowQRModal(false)} /> -
- setShowQRModal(false)} - onFilesChanged={(files) => { - setMobileUploadedFiles(files) - }} - /> -
-
- )} - - {/* Cell Correction Dialog */} - {showCellDialog && selectedCell && sessionId && gridData && ( - { - setShowCellDialog(false) - setSelectedCell(null) - }} - /> - )} -
- ) -} diff --git a/admin-lehrer/app/(admin)/ai/ocr-kombi/page.tsx b/admin-lehrer/app/(admin)/ai/ocr-kombi/page.tsx index 113e239..6c939ab 100644 --- a/admin-lehrer/app/(admin)/ai/ocr-kombi/page.tsx +++ b/admin-lehrer/app/(admin)/ai/ocr-kombi/page.tsx @@ -127,7 +127,6 @@ function OcrKombiContent() { databases: ['PostgreSQL Sessions'], }} relatedPages={[ - { name: 'OCR Overlay (Legacy)', href: '/ai/ocr-overlay', description: 'Alter 3-Modi-Monolith' }, { name: 'OCR Regression', href: '/ai/ocr-regression', description: 'Regressionstests' }, ]} defaultCollapsed diff --git a/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx b/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx deleted file mode 100644 index 8db122e..0000000 --- a/admin-lehrer/app/(admin)/ai/ocr-overlay/page.tsx +++ /dev/null @@ -1,751 +0,0 @@ -'use client' - -import { useCallback, useEffect, useState, useRef } from 'react' -import { useSearchParams } from 'next/navigation' -import { PagePurpose } from '@/components/common/PagePurpose' -import { PipelineStepper } from '@/components/ocr-pipeline/PipelineStepper' -import { StepOrientation } from '@/components/ocr-pipeline/StepOrientation' -import { StepDeskew } from '@/components/ocr-pipeline/StepDeskew' -import { StepDewarp } from '@/components/ocr-pipeline/StepDewarp' -import { StepCrop } from '@/components/ocr-pipeline/StepCrop' -import { StepStructureDetection } from '@/components/ocr-pipeline/StepStructureDetection' -import { StepRowDetection } from '@/components/ocr-pipeline/StepRowDetection' -import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition' -import { OverlayReconstruction } from '@/components/ocr-overlay/OverlayReconstruction' -import { PaddleDirectStep } from '@/components/ocr-overlay/PaddleDirectStep' -import { GridEditor } from '@/components/grid-editor/GridEditor' -import { StepGridReview } from '@/components/ocr-pipeline/StepGridReview' -import { BoxSessionTabs } from '@/components/ocr-pipeline/BoxSessionTabs' -import { OVERLAY_PIPELINE_STEPS, PADDLE_DIRECT_STEPS, KOMBI_STEPS, DOCUMENT_CATEGORIES, dbStepToOverlayUi, type PipelineStep, type SessionListItem, type DocumentCategory } from './types' -import type { SubSession } from '../ocr-pipeline/types' - -const KLAUSUR_API = '/klausur-api' - -export default function OcrOverlayPage() { - const [mode, setMode] = useState<'pipeline' | 'paddle-direct' | 'kombi'>('pipeline') - const [currentStep, setCurrentStep] = useState(0) - const [sessionId, setSessionId] = useState(null) - const [sessionName, setSessionName] = useState('') - const [sessions, setSessions] = useState([]) - const [loadingSessions, setLoadingSessions] = useState(true) - const [editingName, setEditingName] = useState(null) - const [editNameValue, setEditNameValue] = useState('') - const [editingCategory, setEditingCategory] = useState(null) - const [activeCategory, setActiveCategory] = useState(undefined) - const [editingActiveCategory, setEditingActiveCategory] = useState(false) - const [subSessions, setSubSessions] = useState([]) - const [parentSessionId, setParentSessionId] = useState(null) - const [isGroundTruth, setIsGroundTruth] = useState(false) - const [gtSaving, setGtSaving] = useState(false) - const [gtMessage, setGtMessage] = useState('') - const [steps, setSteps] = useState( - OVERLAY_PIPELINE_STEPS.map((s, i) => ({ - ...s, - status: i === 0 ? 'active' : 'pending', - })), - ) - - const searchParams = useSearchParams() - const deepLinkHandled = useRef(false) - const gridSaveRef = useRef<(() => Promise) | null>(null) - - useEffect(() => { - loadSessions() - }, []) - - const loadSessions = async () => { - setLoadingSessions(true) - try { - const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`) - if (res.ok) { - const data = await res.json() - // Filter to only show top-level sessions (no sub-sessions) - setSessions((data.sessions || []).filter((s: SessionListItem) => !s.parent_session_id)) - } - } catch (e) { - console.error('Failed to load sessions:', e) - } finally { - setLoadingSessions(false) - } - } - - const openSession = useCallback(async (sid: string, keepSubSessions?: boolean) => { - try { - const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`) - if (!res.ok) return - const data = await res.json() - - setSessionId(sid) - setSessionName(data.name || data.filename || '') - setActiveCategory(data.document_category || undefined) - setIsGroundTruth(!!data.ground_truth?.build_grid_reference) - setGtMessage('') - - // Sub-session handling - if (data.sub_sessions && data.sub_sessions.length > 0) { - setSubSessions(data.sub_sessions) - setParentSessionId(sid) - } else if (data.parent_session_id) { - setParentSessionId(data.parent_session_id) - } else if (!keepSubSessions) { - setSubSessions([]) - setParentSessionId(null) - } - - const isSubSession = !!data.parent_session_id - - // Mode detection for root sessions with word_result - const ocrEngine = data.word_result?.ocr_engine - const isPaddleDirect = ocrEngine === 'paddle_direct' - const isKombi = ocrEngine === 'kombi' || ocrEngine === 'rapid_kombi' - - let activeMode = mode // keep current mode for sub-sessions - if (!isSubSession && (isPaddleDirect || isKombi)) { - activeMode = isKombi ? 'kombi' : 'paddle-direct' - setMode(activeMode) - } else if (!isSubSession && !ocrEngine) { - // Unprocessed root session: keep the user's selected mode - activeMode = mode - } - - const baseSteps = activeMode === 'kombi' ? KOMBI_STEPS - : activeMode === 'paddle-direct' ? PADDLE_DIRECT_STEPS - : OVERLAY_PIPELINE_STEPS - - // Determine UI step - let uiStep: number - const skipIds: string[] = [] - - if (!isSubSession && (isPaddleDirect || isKombi)) { - const hasGrid = isKombi && data.grid_editor_result - const hasStructure = isKombi && data.structure_result - uiStep = hasGrid ? 6 : hasStructure ? 6 : data.word_result ? 5 : 4 - if (isPaddleDirect) uiStep = data.word_result ? 4 : 4 - } else { - const dbStep = data.current_step || 1 - if (dbStep <= 2) uiStep = 0 - else if (dbStep === 3) uiStep = 1 - else if (dbStep === 4) uiStep = 2 - else if (dbStep === 5) uiStep = 3 - else uiStep = 4 - - // Sub-session skip logic - if (isSubSession) { - if (dbStep >= 5) { - skipIds.push('orientation', 'deskew', 'dewarp', 'crop') - if (uiStep < 4) uiStep = 4 - } else if (dbStep >= 2) { - skipIds.push('orientation') - if (uiStep < 1) uiStep = 1 // advance past skipped orientation to deskew - } - } - } - - setSteps( - baseSteps.map((s, i) => ({ - ...s, - status: skipIds.includes(s.id) - ? 'skipped' - : i < uiStep ? 'completed' : i === uiStep ? 'active' : 'pending', - })), - ) - setCurrentStep(uiStep) - } catch (e) { - console.error('Failed to open session:', e) - } - }, [mode]) - - // Handle deep-link: ?session=xxx&mode=kombi (from GT Queue page) - useEffect(() => { - if (deepLinkHandled.current) return - const urlSession = searchParams.get('session') - const urlMode = searchParams.get('mode') - if (urlSession) { - deepLinkHandled.current = true - if (urlMode === 'kombi' || urlMode === 'paddle-direct') { - setMode(urlMode) - const baseSteps = urlMode === 'kombi' ? KOMBI_STEPS : PADDLE_DIRECT_STEPS - setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' }))) - } - openSession(urlSession) - } - }, [searchParams, openSession]) - - const deleteSession = useCallback(async (sid: string) => { - try { - await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, { method: 'DELETE' }) - setSessions((prev) => prev.filter((s) => s.id !== sid)) - if (sessionId === sid) { - setSessionId(null) - setCurrentStep(0) - setSubSessions([]) - setParentSessionId(null) - const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS - setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' }))) - } - } catch (e) { - console.error('Failed to delete session:', e) - } - }, [sessionId, mode]) - - const renameSession = useCallback(async (sid: string, newName: string) => { - try { - await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, { - method: 'PUT', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ name: newName }), - }) - setSessions((prev) => prev.map((s) => (s.id === sid ? { ...s, name: newName } : s))) - if (sessionId === sid) setSessionName(newName) - } catch (e) { - console.error('Failed to rename session:', e) - } - setEditingName(null) - }, [sessionId]) - - const updateCategory = useCallback(async (sid: string, category: DocumentCategory) => { - try { - await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, { - method: 'PUT', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ document_category: category }), - }) - setSessions((prev) => prev.map((s) => (s.id === sid ? { ...s, document_category: category } : s))) - if (sessionId === sid) setActiveCategory(category) - } catch (e) { - console.error('Failed to update category:', e) - } - setEditingCategory(null) - }, [sessionId]) - - const handleStepClick = (index: number) => { - if (index <= currentStep || steps[index].status === 'completed') { - setCurrentStep(index) - } - } - - const goToStep = (step: number) => { - setCurrentStep(step) - setSteps((prev) => - prev.map((s, i) => ({ - ...s, - status: i < step ? 'completed' : i === step ? 'active' : 'pending', - })), - ) - } - - const handleNext = () => { - if (currentStep >= steps.length - 1) { - // Sub-session completed — switch back to parent - if (parentSessionId && sessionId !== parentSessionId) { - setSubSessions((prev) => - prev.map((s) => s.id === sessionId ? { ...s, status: 'completed', current_step: 10 } : s) - ) - handleSessionChange(parentSessionId) - return - } - // Last step completed — return to session list - const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS - setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' }))) - setCurrentStep(0) - setSessionId(null) - setSubSessions([]) - setParentSessionId(null) - loadSessions() - return - } - - const nextStep = currentStep + 1 - setSteps((prev) => - prev.map((s, i) => { - if (i === currentStep) return { ...s, status: 'completed' } - if (i === nextStep) return { ...s, status: 'active' } - return s - }), - ) - setCurrentStep(nextStep) - } - - const handleOrientationComplete = async (sid: string) => { - setSessionId(sid) - loadSessions() - - // Check for page-split sub-sessions directly from API - try { - const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`) - if (res.ok) { - const data = await res.json() - if (data.sub_sessions?.length > 0) { - const subs: SubSession[] = data.sub_sessions.map((s: SubSession) => ({ - id: s.id, - name: s.name, - box_index: s.box_index, - current_step: s.current_step, - })) - setSubSessions(subs) - setParentSessionId(sid) - openSession(subs[0].id, true) - return - } - } - } catch (e) { - console.error('Failed to check for sub-sessions:', e) - } - - handleNext() - } - - const handleBoxSessionsCreated = useCallback((subs: SubSession[]) => { - setSubSessions(subs) - if (sessionId) setParentSessionId(sessionId) - }, [sessionId]) - - const handleSessionChange = useCallback((newSessionId: string) => { - openSession(newSessionId, true) - }, [openSession]) - - const handleNewSession = () => { - setSessionId(null) - setSessionName('') - setCurrentStep(0) - setSubSessions([]) - setParentSessionId(null) - const baseSteps = mode === 'kombi' ? KOMBI_STEPS : mode === 'paddle-direct' ? PADDLE_DIRECT_STEPS : OVERLAY_PIPELINE_STEPS - setSteps(baseSteps.map((s, i) => ({ ...s, status: i === 0 ? 'active' : 'pending' }))) - } - - const stepNames: Record = { - 1: 'Orientierung', - 2: 'Begradigung', - 3: 'Entzerrung', - 4: 'Zuschneiden', - 5: 'Zeilen', - 6: 'Woerter', - 7: 'Overlay', - } - - const reprocessFromStep = useCallback(async (uiStep: number) => { - if (!sessionId) return - // Map overlay UI step to DB step - const dbStepMap: Record = { 0: 2, 1: 3, 2: 4, 3: 5, 4: 7, 5: 8, 6: 9 } - const dbStep = dbStepMap[uiStep] || uiStep + 1 - if (!confirm(`Ab Schritt ${uiStep + 1} (${stepNames[uiStep + 1] || '?'}) neu verarbeiten?`)) return - try { - const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reprocess`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ from_step: dbStep }), - }) - if (!res.ok) { - const data = await res.json().catch(() => ({})) - console.error('Reprocess failed:', data.detail || res.status) - return - } - goToStep(uiStep) - } catch (e) { - console.error('Reprocess error:', e) - } - // eslint-disable-next-line react-hooks/exhaustive-deps - }, [sessionId, goToStep]) - - const handleMarkGroundTruth = async () => { - if (!sessionId) return - setGtSaving(true) - setGtMessage('') - try { - // Auto-save grid editor before marking GT (so DB has latest edits) - if (gridSaveRef.current) { - await gridSaveRef.current() - } - const resp = await fetch( - `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/mark-ground-truth?pipeline=${mode}`, - { method: 'POST' } - ) - if (!resp.ok) { - const body = await resp.text().catch(() => '') - throw new Error(`Ground Truth fehlgeschlagen (${resp.status}): ${body}`) - } - const data = await resp.json() - setIsGroundTruth(true) - setGtMessage(`Ground Truth gespeichert (${data.cells_saved} Zellen)`) - setTimeout(() => setGtMessage(''), 5000) - } catch (e) { - setGtMessage(e instanceof Error ? e.message : String(e)) - } finally { - setGtSaving(false) - } - } - - const isLastStep = currentStep === steps.length - 1 - const showGtButton = isLastStep && sessionId != null - - const renderStep = () => { - if (mode === 'paddle-direct' || mode === 'kombi') { - switch (currentStep) { - case 0: - return { loadSessions(); setSessionId(null) }} /> - case 1: - return - case 2: - return - case 3: - return - case 4: - if (mode === 'kombi') { - return ( - - ) - } - return - case 5: - return mode === 'kombi' ? ( - - ) : null - case 6: - return mode === 'kombi' ? ( - - ) : null - default: - return null - } - } - switch (currentStep) { - case 0: - return { loadSessions(); setSessionId(null) }} /> - case 1: - return - case 2: - return - case 3: - return - case 4: - return - case 5: - return - case 6: - return - default: - return null - } - } - - return ( -
- - - {/* Session List */} -
-
-

- Sessions ({sessions.length}) -

- -
- - {loadingSessions ? ( -
Lade Sessions...
- ) : sessions.length === 0 ? ( -
Noch keine Sessions vorhanden.
- ) : ( -
- {sessions.map((s) => { - const catInfo = DOCUMENT_CATEGORIES.find(c => c.value === s.document_category) - return ( -
- {/* Thumbnail */} -
openSession(s.id)} - > - {/* eslint-disable-next-line @next/next/no-img-element */} - { (e.target as HTMLImageElement).style.display = 'none' }} - /> -
- - {/* Info */} -
openSession(s.id)}> - {editingName === s.id ? ( - setEditNameValue(e.target.value)} - onBlur={() => renameSession(s.id, editNameValue)} - onKeyDown={(e) => { - if (e.key === 'Enter') renameSession(s.id, editNameValue) - if (e.key === 'Escape') setEditingName(null) - }} - onClick={(e) => e.stopPropagation()} - className="w-full px-1 py-0.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600" - /> - ) : ( -
- {s.name || s.filename} -
- )} - -
- {new Date(s.created_at).toLocaleDateString('de-DE', { day: '2-digit', month: '2-digit', year: '2-digit', hour: '2-digit', minute: '2-digit' })} -
-
- - {/* Category Badge */} -
e.stopPropagation()}> - -
- - {/* Actions */} -
- - -
- - {/* Category dropdown */} - {editingCategory === s.id && ( -
e.stopPropagation()} - > - {DOCUMENT_CATEGORIES.map((cat) => ( - - ))} -
- )} -
- ) - })} -
- )} -
- - {/* Active session info + category picker */} - {sessionId && sessionName && ( -
- Aktive Session: {sessionName} - - {isGroundTruth && ( - - GT - - )} - {editingActiveCategory && ( -
- {DOCUMENT_CATEGORIES.map((cat) => ( - - ))} -
- )} -
- )} - - {/* Mode Toggle */} -
- - - -
- - - - {subSessions.length > 0 && parentSessionId && sessionId && ( - - )} - -
{renderStep()}
- - {/* Ground Truth button bar — visible on last step */} - {showGtButton && ( -
-
- {gtMessage && ( - - {gtMessage} - - )} -
- -
- )} -
- ) -} diff --git a/admin-lehrer/app/(admin)/ai/ocr-overlay/types.ts b/admin-lehrer/app/(admin)/ai/ocr-overlay/types.ts deleted file mode 100644 index 45492ce..0000000 --- a/admin-lehrer/app/(admin)/ai/ocr-overlay/types.ts +++ /dev/null @@ -1,87 +0,0 @@ -import type { PipelineStep } from '../ocr-pipeline/types' - -// Re-export types used by overlay components -export type { - PipelineStep, - PipelineStepStatus, - SessionListItem, - SessionInfo, - DocumentCategory, - DocumentTypeResult, - OrientationResult, - CropResult, - DeskewResult, - DewarpResult, - RowResult, - RowItem, - GridResult, - GridCell, - OcrWordBox, - WordBbox, - ColumnMeta, -} from '../ocr-pipeline/types' - -export { DOCUMENT_CATEGORIES } from '../ocr-pipeline/types' - -/** - * 7-step pipeline for full-page overlay reconstruction. - * Skips: Spalten (columns), LLM-Review (Korrektur), Ground-Truth (Validierung) - */ -export const OVERLAY_PIPELINE_STEPS: PipelineStep[] = [ - { id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' }, - { id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' }, - { id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' }, - { id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' }, - { id: 'rows', name: 'Zeilen', icon: '📏', status: 'pending' }, - { id: 'words', name: 'Woerter', icon: '🔤', status: 'pending' }, - { id: 'reconstruction', name: 'Overlay', icon: '🏗️', status: 'pending' }, -] - -/** Map from overlay UI step index to DB step number (1-indexed) */ -export const OVERLAY_UI_TO_DB: Record = { - 0: 2, // orientation - 1: 3, // deskew - 2: 4, // dewarp - 3: 5, // crop - 4: 6, // rows (skip columns=6 in DB, rows=7 — but we reuse DB step numbering) - 5: 7, // words - 6: 9, // reconstruction -} - -/** - * 5-step pipeline for Paddle Direct mode. - * Same preprocessing (orient/deskew/dewarp/crop), then PaddleOCR replaces rows+words+overlay. - */ -export const PADDLE_DIRECT_STEPS: PipelineStep[] = [ - { id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' }, - { id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' }, - { id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' }, - { id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' }, - { id: 'paddle-direct', name: 'PP-OCRv5 + Overlay', icon: '⚡', status: 'pending' }, -] - -/** - * 5-step pipeline for Kombi mode (PP-OCRv5 + Tesseract). - * Same preprocessing, then both engines run and results are merged. - */ -export const KOMBI_STEPS: PipelineStep[] = [ - { id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' }, - { id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' }, - { id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' }, - { id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' }, - { id: 'kombi', name: 'PP-OCRv5 + Tesseract', icon: '🔀', status: 'pending' }, - { id: 'structure', name: 'Struktur', icon: '🔍', status: 'pending' }, - { id: 'grid-editor', name: 'Review & GT', icon: '📊', status: 'pending' }, -] - -/** Map from DB step to overlay UI step index */ -export function dbStepToOverlayUi(dbStep: number): number { - // DB: 1=start, 2=orient, 3=deskew, 4=dewarp, 5=crop, 6=columns, 7=rows, 8=words, 9=recon, 10=gt - if (dbStep <= 2) return 0 // orientation - if (dbStep === 3) return 1 // deskew - if (dbStep === 4) return 2 // dewarp - if (dbStep === 5) return 3 // crop - if (dbStep <= 7) return 4 // rows (skip columns) - if (dbStep === 8) return 5 // words - return 6 // reconstruction -} diff --git a/admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx b/admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx deleted file mode 100644 index 9214a09..0000000 --- a/admin-lehrer/app/(admin)/ai/ocr-pipeline/page.tsx +++ /dev/null @@ -1,443 +0,0 @@ -'use client' - -import { Suspense, useCallback, useEffect, useState } from 'react' -import { PagePurpose } from '@/components/common/PagePurpose' -import { PipelineStepper } from '@/components/ocr-pipeline/PipelineStepper' -import { StepOrientation } from '@/components/ocr-pipeline/StepOrientation' -import { StepCrop } from '@/components/ocr-pipeline/StepCrop' -import { StepDeskew } from '@/components/ocr-pipeline/StepDeskew' -import { StepDewarp } from '@/components/ocr-pipeline/StepDewarp' -import { StepStructureDetection } from '@/components/ocr-pipeline/StepStructureDetection' -import { StepColumnDetection } from '@/components/ocr-pipeline/StepColumnDetection' -import { StepRowDetection } from '@/components/ocr-pipeline/StepRowDetection' -import { StepWordRecognition } from '@/components/ocr-pipeline/StepWordRecognition' -import { StepLlmReview } from '@/components/ocr-pipeline/StepLlmReview' -import { StepReconstruction } from '@/components/ocr-pipeline/StepReconstruction' -import { StepGroundTruth } from '@/components/ocr-pipeline/StepGroundTruth' -import { DOCUMENT_CATEGORIES, type SessionListItem, type DocumentTypeResult, type DocumentCategory, type SubSession } from './types' -import { usePipelineNavigation } from './usePipelineNavigation' - -const KLAUSUR_API = '/klausur-api' - -const STEP_NAMES: Record = { - 1: 'Orientierung', 2: 'Begradigung', 3: 'Entzerrung', 4: 'Zuschneiden', - 5: 'Spalten', 6: 'Zeilen', 7: 'Woerter', 8: 'Struktur', - 9: 'Korrektur', 10: 'Rekonstruktion', 11: 'Validierung', -} - -function OcrPipelineContent() { - const nav = usePipelineNavigation() - const [sessions, setSessions] = useState([]) - const [loadingSessions, setLoadingSessions] = useState(true) - const [editingName, setEditingName] = useState(null) - const [editNameValue, setEditNameValue] = useState('') - const [editingCategory, setEditingCategory] = useState(null) - const [sessionName, setSessionName] = useState('') - const [activeCategory, setActiveCategory] = useState(undefined) - - const loadSessions = useCallback(async () => { - setLoadingSessions(true) - try { - const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`) - if (res.ok) { - const data = await res.json() - setSessions(data.sessions || []) - } - } catch (e) { - console.error('Failed to load sessions:', e) - } finally { - setLoadingSessions(false) - } - }, []) - - useEffect(() => { loadSessions() }, [loadSessions]) - - // Sync session name when nav.sessionId changes - useEffect(() => { - if (!nav.sessionId) { - setSessionName('') - setActiveCategory(undefined) - return - } - const load = async () => { - try { - const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${nav.sessionId}`) - if (!res.ok) return - const data = await res.json() - setSessionName(data.name || data.filename || '') - setActiveCategory(data.document_category || undefined) - } catch { /* ignore */ } - } - load() - }, [nav.sessionId]) - - const openSession = useCallback((sid: string) => { - nav.goToSession(sid) - }, [nav]) - - const deleteSession = useCallback(async (sid: string) => { - try { - await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, { method: 'DELETE' }) - setSessions(prev => prev.filter(s => s.id !== sid)) - if (nav.sessionId === sid) nav.goToSessionList() - } catch (e) { - console.error('Failed to delete session:', e) - } - }, [nav]) - - const renameSession = useCallback(async (sid: string, newName: string) => { - try { - await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, { - method: 'PUT', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ name: newName }), - }) - setSessions(prev => prev.map(s => (s.id === sid ? { ...s, name: newName } : s))) - if (nav.sessionId === sid) setSessionName(newName) - } catch (e) { - console.error('Failed to rename session:', e) - } - setEditingName(null) - }, [nav.sessionId]) - - const updateCategory = useCallback(async (sid: string, category: DocumentCategory) => { - try { - await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sid}`, { - method: 'PUT', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ document_category: category }), - }) - setSessions(prev => prev.map(s => (s.id === sid ? { ...s, document_category: category } : s))) - if (nav.sessionId === sid) setActiveCategory(category) - } catch (e) { - console.error('Failed to update category:', e) - } - setEditingCategory(null) - }, [nav.sessionId]) - - const deleteAllSessions = useCallback(async () => { - if (!confirm('Alle Sessions loeschen? Dies kann nicht rueckgaengig gemacht werden.')) return - try { - await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions`, { method: 'DELETE' }) - setSessions([]) - nav.goToSessionList() - } catch (e) { - console.error('Failed to delete all sessions:', e) - } - }, [nav]) - - const handleStepClick = (index: number) => { - if (index <= nav.currentStepIndex || nav.steps[index].status === 'completed') { - nav.goToStep(index) - } - } - - // Orientation: after upload, navigate to session at deskew step - const handleOrientationComplete = useCallback(async (sid: string) => { - loadSessions() - // Navigate directly to deskew step (index 1) for this session - nav.goToSession(sid) - }, [nav, loadSessions]) - - // Crop: detect doc type then advance - const handleCropNext = useCallback(async () => { - if (nav.sessionId) { - try { - const res = await fetch( - `${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${nav.sessionId}/detect-type`, - { method: 'POST' }, - ) - if (res.ok) { - const data: DocumentTypeResult = await res.json() - nav.setDocType(data) - } - } catch (e) { - console.error('Doc type detection failed:', e) - } - } - nav.goToNextStep() - }, [nav]) - - const handleDocTypeChange = (newDocType: DocumentTypeResult['doc_type']) => { - if (!nav.docTypeResult) return - let skipSteps: string[] = [] - if (newDocType === 'full_text') skipSteps = ['columns', 'rows'] - - nav.setDocType({ - ...nav.docTypeResult, - doc_type: newDocType, - skip_steps: skipSteps, - pipeline: newDocType === 'full_text' ? 'full_page' : 'cell_first', - }) - } - - // Box sub-sessions (column detection) — still supported - const handleBoxSessionsCreated = useCallback((_subs: SubSession[]) => { - // Box sub-sessions are tracked by the backend; no client-side state needed anymore - }, []) - - const renderStep = () => { - const sid = nav.sessionId - switch (nav.currentStepIndex) { - case 0: - return ( - { loadSessions(); nav.goToSessionList() }} - /> - ) - case 1: - return - case 2: - return - case 3: - return - case 4: - return - case 5: - return - case 6: - return - case 7: - return - case 8: - return - case 9: - return - case 10: - return - default: - return null - } - } - - return ( -
- - - {/* Session List */} -
-
-

- Sessions ({sessions.length}) -

-
- {sessions.length > 0 && ( - - )} - -
-
- - {loadingSessions ? ( -
Lade Sessions...
- ) : sessions.length === 0 ? ( -
Noch keine Sessions vorhanden.
- ) : ( -
- {sessions.map((s) => { - const catInfo = DOCUMENT_CATEGORIES.find(c => c.value === s.document_category) - return ( -
- {/* Thumbnail */} -
openSession(s.id)} - > - {/* eslint-disable-next-line @next/next/no-img-element */} - { (e.target as HTMLImageElement).style.display = 'none' }} - /> -
- - {/* Info */} -
openSession(s.id)}> - {editingName === s.id ? ( - setEditNameValue(e.target.value)} - onBlur={() => renameSession(s.id, editNameValue)} - onKeyDown={(e) => { - if (e.key === 'Enter') renameSession(s.id, editNameValue) - if (e.key === 'Escape') setEditingName(null) - }} - onClick={(e) => e.stopPropagation()} - className="w-full px-1 py-0.5 text-sm border rounded dark:bg-gray-700 dark:border-gray-600" - /> - ) : ( -
- {s.name || s.filename} -
- )} - {/* ID row */} - -
- {new Date(s.created_at).toLocaleDateString('de-DE', { day: '2-digit', month: '2-digit', year: '2-digit', hour: '2-digit', minute: '2-digit' })} - Schritt {s.current_step}: {STEP_NAMES[s.current_step] || '?'} -
-
- - {/* Badges */} -
e.stopPropagation()}> - - {s.doc_type && ( - - {s.doc_type} - - )} -
- - {/* Action buttons */} -
- - -
- - {/* Category dropdown */} - {editingCategory === s.id && ( -
e.stopPropagation()} - > - {DOCUMENT_CATEGORIES.map((cat) => ( - - ))} -
- )} -
- ) - })} -
- )} -
- - {/* Active session info */} - {nav.sessionId && sessionName && ( -
- Aktive Session: {sessionName} - {activeCategory && (() => { - const cat = DOCUMENT_CATEGORIES.find(c => c.value === activeCategory) - return cat ? {cat.icon} {cat.label} : null - })()} - {nav.docTypeResult && ( - - {nav.docTypeResult.doc_type} - - )} -
- )} - - - -
{renderStep()}
-
- ) -} - -export default function OcrPipelinePage() { - return ( - Lade Pipeline...
}> - - - ) -} diff --git a/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts b/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts deleted file mode 100644 index 3da6e4c..0000000 --- a/admin-lehrer/app/(admin)/ai/ocr-pipeline/types.ts +++ /dev/null @@ -1,430 +0,0 @@ -export type PipelineStepStatus = 'pending' | 'active' | 'completed' | 'failed' | 'skipped' - -export interface PipelineStep { - id: string - name: string - icon: string - status: PipelineStepStatus -} - -export type DocumentCategory = - | 'vokabelseite' | 'woerterbuch' | 'buchseite' | 'arbeitsblatt' | 'klausurseite' - | 'mathearbeit' | 'statistik' | 'zeitung' | 'formular' | 'handschrift' | 'sonstiges' - -export const DOCUMENT_CATEGORIES: { value: DocumentCategory; label: string; icon: string }[] = [ - { value: 'vokabelseite', label: 'Vokabelseite', icon: '📖' }, - { value: 'woerterbuch', label: 'Woerterbuch', icon: '📕' }, - { value: 'buchseite', label: 'Buchseite', icon: '📚' }, - { value: 'arbeitsblatt', label: 'Arbeitsblatt', icon: '📝' }, - { value: 'klausurseite', label: 'Klausurseite', icon: '📄' }, - { value: 'mathearbeit', label: 'Mathearbeit', icon: '🔢' }, - { value: 'statistik', label: 'Statistik', icon: '📊' }, - { value: 'zeitung', label: 'Zeitung', icon: '📰' }, - { value: 'formular', label: 'Formular', icon: '📋' }, - { value: 'handschrift', label: 'Handschrift', icon: '✍️' }, - { value: 'sonstiges', label: 'Sonstiges', icon: '📎' }, -] - -export interface SessionListItem { - id: string - name: string - filename: string - status: string - current_step: number - document_category?: DocumentCategory - doc_type?: string - parent_session_id?: string - document_group_id?: string - page_number?: number - is_ground_truth?: boolean - created_at: string - updated_at?: string -} - -/** Box sub-session (from column detection zone_type='box') */ -export interface SubSession { - id: string - name: string - box_index: number - current_step?: number - status?: string -} - -export interface PipelineLogEntry { - step: string - completed_at: string - success: boolean - duration_ms?: number - metrics: Record -} - -export interface PipelineLog { - steps: PipelineLogEntry[] -} - -export interface DocumentTypeResult { - doc_type: 'vocab_table' | 'full_text' | 'generic_table' - confidence: number - pipeline: 'cell_first' | 'full_page' - skip_steps: string[] - features?: Record - duration_seconds?: number -} - -export interface OrientationResult { - orientation_degrees: number - corrected: boolean - duration_seconds: number -} - -export interface CropResult { - crop_applied: boolean - crop_rect?: { x: number; y: number; width: number; height: number } - crop_rect_pct?: { x: number; y: number; width: number; height: number } - original_size: { width: number; height: number } - cropped_size: { width: number; height: number } - detected_format?: string - format_confidence?: number - aspect_ratio?: number - border_fractions?: { top: number; bottom: number; left: number; right: number } - skipped?: boolean - duration_seconds?: number -} - -export interface SessionInfo { - session_id: string - filename: string - name?: string - image_width: number - image_height: number - original_image_url: string - current_step?: number - document_category?: DocumentCategory - doc_type?: string - orientation_result?: OrientationResult - crop_result?: CropResult - deskew_result?: DeskewResult - dewarp_result?: DewarpResult - column_result?: ColumnResult - row_result?: RowResult - word_result?: GridResult - doc_type_result?: DocumentTypeResult - sub_sessions?: SubSession[] - parent_session_id?: string - box_index?: number - document_group_id?: string - page_number?: number -} - -export interface DeskewResult { - session_id: string - angle_hough: number - angle_word_alignment: number - angle_iterative?: number - angle_residual?: number - angle_textline?: number - angle_applied: number - method_used: 'hough' | 'word_alignment' | 'manual' | 'iterative' | 'two_pass' | 'three_pass' | 'manual_combined' - confidence: number - duration_seconds: number - deskewed_image_url: string - binarized_image_url: string -} - -export interface DeskewGroundTruth { - is_correct: boolean - corrected_angle?: number - notes?: string -} - -export interface DewarpDetection { - method: string - shear_degrees: number - confidence: number -} - -export interface DewarpResult { - session_id: string - method_used: string - shear_degrees: number - confidence: number - duration_seconds: number - dewarped_image_url: string - detections?: DewarpDetection[] -} - -export interface DewarpGroundTruth { - is_correct: boolean - corrected_shear?: number - notes?: string -} - -export interface PageRegion { - type: 'column_en' | 'column_de' | 'column_example' | 'page_ref' - | 'column_marker' | 'column_text' | 'column_ignore' | 'header' | 'footer' - x: number - y: number - width: number - height: number - classification_confidence?: number - classification_method?: string -} - -export interface PageZone { - zone_type: 'content' | 'box' - y_start: number - y_end: number - box?: { x: number; y: number; width: number; height: number } -} - -export interface ColumnResult { - columns: PageRegion[] - duration_seconds: number - zones?: PageZone[] -} - -export interface ColumnGroundTruth { - is_correct: boolean - corrected_columns?: PageRegion[] - notes?: string -} - -export interface ManualColumnDivider { - xPercent: number // Position in % of image width (0-100) -} - -export type ColumnTypeKey = PageRegion['type'] - -export interface RowResult { - rows: RowItem[] - summary: Record - total_rows: number - duration_seconds: number -} - -export interface RowItem { - index: number - x: number - y: number - width: number - height: number - word_count: number - row_type: 'content' | 'header' | 'footer' - gap_before: number -} - -export interface RowGroundTruth { - is_correct: boolean - corrected_rows?: RowItem[] - notes?: string -} - -export interface StructureGraphic { - x: number - y: number - w: number - h: number - area: number - shape: string // image, illustration - color_name: string - color_hex: string - confidence: number -} - -export interface ExcludeRegion { - x: number - y: number - w: number - h: number - label?: string -} - -export interface DocLayoutRegion { - x: number - y: number - w: number - h: number - class_name: string - confidence: number -} - -export interface StructureResult { - image_width: number - image_height: number - content_bounds: { x: number; y: number; w: number; h: number } - boxes: StructureBox[] - zones: StructureZone[] - graphics: StructureGraphic[] - exclude_regions?: ExcludeRegion[] - color_pixel_counts: Record - has_words: boolean - word_count: number - border_ghosts_removed?: number - duration_seconds: number - /** PP-DocLayout regions (only present when method=ppdoclayout) */ - layout_regions?: DocLayoutRegion[] - detection_method?: 'opencv' | 'ppdoclayout' -} - -export interface StructureBox { - x: number - y: number - w: number - h: number - confidence: number - border_thickness: number - bg_color_name?: string - bg_color_hex?: string -} - -export interface StructureZone { - index: number - zone_type: 'content' | 'box' - x: number - y: number - w: number - h: number -} - -export interface WordBbox { - x: number - y: number - w: number - h: number -} - -export interface OcrWordBox { - text: string - left: number // absolute image x in px - top: number // absolute image y in px - width: number // px - height: number // px - conf: number - color?: string // hex color of detected text, e.g. '#dc2626' - color_name?: string // 'black' | 'red' | 'blue' | 'green' | 'orange' | 'purple' | 'yellow' - recovered?: boolean // true if this word was recovered via color detection -} - -export interface GridCell { - cell_id: string // "R03_C1" - row_index: number - col_index: number - col_type: string - text: string - confidence: number - bbox_px: WordBbox - bbox_pct: WordBbox - ocr_engine?: string - is_bold?: boolean - status?: 'pending' | 'confirmed' | 'edited' | 'skipped' - word_boxes?: OcrWordBox[] // per-word bounding boxes from OCR engine -} - -export interface ColumnMeta { - index: number - type: string - x: number - width: number -} - -export interface GridResult { - cells: GridCell[] - grid_shape: { rows: number; cols: number; total_cells: number } - columns_used: ColumnMeta[] - layout: 'vocab' | 'generic' - image_width: number - image_height: number - duration_seconds: number - ocr_engine?: string - vocab_entries?: WordEntry[] // Only when layout='vocab' - entries?: WordEntry[] // Backwards compat alias for vocab_entries - entry_count?: number - summary: { - total_cells: number - non_empty_cells: number - low_confidence: number - // Only when layout='vocab': - total_entries?: number - with_english?: number - with_german?: number - } - llm_review?: { - changes: { row_index: number; field: string; old: string; new: string }[] - model_used: string - duration_ms: number - entries_corrected: number - applied_count?: number - applied_at?: string - } -} - -export interface WordEntry { - row_index: number - english: string - german: string - example: string - source_page?: string - marker?: string - confidence: number - bbox: WordBbox - bbox_en: WordBbox | null - bbox_de: WordBbox | null - bbox_ex: WordBbox | null - bbox_ref?: WordBbox | null - bbox_marker?: WordBbox | null - status?: 'pending' | 'confirmed' | 'edited' | 'skipped' -} - -/** @deprecated Use GridResult instead */ -export interface WordResult { - entries: WordEntry[] - entry_count: number - image_width: number - image_height: number - duration_seconds: number - ocr_engine?: string - summary: { - total_entries: number - with_english: number - with_german: number - low_confidence: number - } -} - -export interface WordGroundTruth { - is_correct: boolean - corrected_entries?: WordEntry[] - notes?: string -} - -export interface ImageRegion { - bbox_pct: { x: number; y: number; w: number; h: number } - prompt: string - description: string - image_b64: string | null - style: 'educational' | 'cartoon' | 'sketch' | 'clipart' | 'realistic' -} - -export type ImageStyle = ImageRegion['style'] - -export const IMAGE_STYLES: { value: ImageStyle; label: string }[] = [ - { value: 'educational', label: 'Lehrbuch' }, - { value: 'cartoon', label: 'Cartoon' }, - { value: 'sketch', label: 'Skizze' }, - { value: 'clipart', label: 'Clipart' }, - { value: 'realistic', label: 'Realistisch' }, -] - -export const PIPELINE_STEPS: PipelineStep[] = [ - { id: 'orientation', name: 'Orientierung', icon: '🔄', status: 'pending' }, - { id: 'deskew', name: 'Begradigung', icon: '📐', status: 'pending' }, - { id: 'dewarp', name: 'Entzerrung', icon: '🔧', status: 'pending' }, - { id: 'crop', name: 'Zuschneiden', icon: '✂️', status: 'pending' }, - { id: 'columns', name: 'Spalten', icon: '📊', status: 'pending' }, - { id: 'rows', name: 'Zeilen', icon: '📏', status: 'pending' }, - { id: 'words', name: 'Woerter', icon: '🔤', status: 'pending' }, - { id: 'structure', name: 'Struktur', icon: '🔍', status: 'pending' }, - { id: 'llm-review', name: 'Korrektur', icon: '✏️', status: 'pending' }, - { id: 'reconstruction', name: 'Rekonstruktion', icon: '🏗️', status: 'pending' }, - { id: 'ground-truth', name: 'Validierung', icon: '✅', status: 'pending' }, -] diff --git a/admin-lehrer/app/(admin)/ai/ocr-pipeline/usePipelineNavigation.ts b/admin-lehrer/app/(admin)/ai/ocr-pipeline/usePipelineNavigation.ts deleted file mode 100644 index f1f6e13..0000000 --- a/admin-lehrer/app/(admin)/ai/ocr-pipeline/usePipelineNavigation.ts +++ /dev/null @@ -1,225 +0,0 @@ -'use client' - -import { useCallback, useEffect, useState } from 'react' -import { useRouter, useSearchParams } from 'next/navigation' -import { PIPELINE_STEPS, type PipelineStep, type PipelineStepStatus, type DocumentTypeResult } from './types' - -const KLAUSUR_API = '/klausur-api' - -export interface PipelineNav { - sessionId: string | null - currentStepIndex: number - currentStepId: string - steps: PipelineStep[] - docTypeResult: DocumentTypeResult | null - - goToNextStep: () => void - goToStep: (index: number) => void - goToSession: (sessionId: string) => void - goToSessionList: () => void - setDocType: (result: DocumentTypeResult) => void - reprocessFromStep: (uiStep: number) => Promise -} - -const STEP_NAMES: Record = { - 1: 'Orientierung', 2: 'Begradigung', 3: 'Entzerrung', 4: 'Zuschneiden', - 5: 'Spalten', 6: 'Zeilen', 7: 'Woerter', 8: 'Struktur', - 9: 'Korrektur', 10: 'Rekonstruktion', 11: 'Validierung', -} - -function buildSteps(uiStep: number, skipSteps: string[]): PipelineStep[] { - return PIPELINE_STEPS.map((s, i) => ({ - ...s, - status: ( - skipSteps.includes(s.id) ? 'skipped' - : i < uiStep ? 'completed' - : i === uiStep ? 'active' - : 'pending' - ) as PipelineStepStatus, - })) -} - -export function usePipelineNavigation(): PipelineNav { - const router = useRouter() - const searchParams = useSearchParams() - - const paramSession = searchParams.get('session') - const paramStep = searchParams.get('step') - - const [sessionId, setSessionId] = useState(paramSession) - const [currentStepIndex, setCurrentStepIndex] = useState(0) - const [docTypeResult, setDocTypeResult] = useState(null) - const [steps, setSteps] = useState(buildSteps(0, [])) - const [loaded, setLoaded] = useState(false) - - // Load session info when session param changes - useEffect(() => { - if (!paramSession) { - setSessionId(null) - setCurrentStepIndex(0) - setDocTypeResult(null) - setSteps(buildSteps(0, [])) - setLoaded(true) - return - } - - const load = async () => { - try { - const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${paramSession}`) - if (!res.ok) return - const data = await res.json() - - setSessionId(paramSession) - - const savedDocType: DocumentTypeResult | null = data.doc_type_result || null - setDocTypeResult(savedDocType) - - const dbStep = data.current_step || 1 - let uiStep = Math.max(0, dbStep - 1) - const skipSteps = [...(savedDocType?.skip_steps || [])] - - // Box sub-sessions (from column detection) skip pre-processing - const isBoxSubSession = !!data.parent_session_id - if (isBoxSubSession && dbStep >= 5) { - const SUB_SESSION_SKIP = ['orientation', 'deskew', 'dewarp', 'crop'] - for (const s of SUB_SESSION_SKIP) { - if (!skipSteps.includes(s)) skipSteps.push(s) - } - if (uiStep < 4) uiStep = 4 - } - - // If URL has a step param, use that instead - if (paramStep) { - const stepIdx = PIPELINE_STEPS.findIndex(s => s.id === paramStep) - if (stepIdx >= 0) uiStep = stepIdx - } - - setCurrentStepIndex(uiStep) - setSteps(buildSteps(uiStep, skipSteps)) - } catch (e) { - console.error('Failed to load session:', e) - } finally { - setLoaded(true) - } - } - - load() - }, [paramSession, paramStep]) - - const updateUrl = useCallback((sid: string | null, stepIdx?: number) => { - if (!sid) { - router.push('/ai/ocr-pipeline') - return - } - const stepId = stepIdx !== undefined ? PIPELINE_STEPS[stepIdx]?.id : undefined - const params = new URLSearchParams() - params.set('session', sid) - if (stepId) params.set('step', stepId) - router.push(`/ai/ocr-pipeline?${params.toString()}`) - }, [router]) - - const goToNextStep = useCallback(() => { - if (currentStepIndex >= steps.length - 1) { - // Last step — return to session list - setSessionId(null) - setCurrentStepIndex(0) - setDocTypeResult(null) - setSteps(buildSteps(0, [])) - router.push('/ai/ocr-pipeline') - return - } - - const skipSteps = docTypeResult?.skip_steps || [] - let nextStep = currentStepIndex + 1 - while (nextStep < steps.length && skipSteps.includes(PIPELINE_STEPS[nextStep]?.id)) { - nextStep++ - } - if (nextStep >= steps.length) nextStep = steps.length - 1 - - setSteps(prev => - prev.map((s, i) => { - if (i === currentStepIndex) return { ...s, status: 'completed' as PipelineStepStatus } - if (i === nextStep) return { ...s, status: 'active' as PipelineStepStatus } - if (i > currentStepIndex && i < nextStep && skipSteps.includes(PIPELINE_STEPS[i]?.id)) { - return { ...s, status: 'skipped' as PipelineStepStatus } - } - return s - }), - ) - setCurrentStepIndex(nextStep) - if (sessionId) updateUrl(sessionId, nextStep) - }, [currentStepIndex, steps.length, docTypeResult, sessionId, updateUrl, router]) - - const goToStep = useCallback((index: number) => { - setCurrentStepIndex(index) - setSteps(prev => - prev.map((s, i) => ({ - ...s, - status: s.status === 'skipped' ? 'skipped' - : i < index ? 'completed' - : i === index ? 'active' - : 'pending' as PipelineStepStatus, - })), - ) - if (sessionId) updateUrl(sessionId, index) - }, [sessionId, updateUrl]) - - const goToSession = useCallback((sid: string) => { - updateUrl(sid) - }, [updateUrl]) - - const goToSessionList = useCallback(() => { - setSessionId(null) - setCurrentStepIndex(0) - setDocTypeResult(null) - setSteps(buildSteps(0, [])) - router.push('/ai/ocr-pipeline') - }, [router]) - - const setDocType = useCallback((result: DocumentTypeResult) => { - setDocTypeResult(result) - const skipSteps = result.skip_steps || [] - if (skipSteps.length > 0) { - setSteps(prev => - prev.map(s => - skipSteps.includes(s.id) ? { ...s, status: 'skipped' as PipelineStepStatus } : s, - ), - ) - } - }, []) - - const reprocessFromStep = useCallback(async (uiStep: number) => { - if (!sessionId) return - const dbStep = uiStep + 1 - if (!confirm(`Ab Schritt ${dbStep} (${STEP_NAMES[dbStep] || '?'}) neu verarbeiten? Nachfolgende Daten werden geloescht.`)) return - try { - const res = await fetch(`${KLAUSUR_API}/api/v1/ocr-pipeline/sessions/${sessionId}/reprocess`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ from_step: dbStep }), - }) - if (!res.ok) { - const data = await res.json().catch(() => ({})) - console.error('Reprocess failed:', data.detail || res.status) - return - } - goToStep(uiStep) - } catch (e) { - console.error('Reprocess error:', e) - } - }, [sessionId, goToStep]) - - return { - sessionId, - currentStepIndex, - currentStepId: PIPELINE_STEPS[currentStepIndex]?.id || 'orientation', - steps, - docTypeResult, - goToNextStep, - goToStep, - goToSession, - goToSessionList, - setDocType, - reprocessFromStep, - } -} diff --git a/admin-lehrer/app/(admin)/communication/matrix/page.tsx b/admin-lehrer/app/(admin)/communication/matrix/page.tsx deleted file mode 100644 index a6286f6..0000000 --- a/admin-lehrer/app/(admin)/communication/matrix/page.tsx +++ /dev/null @@ -1,593 +0,0 @@ -'use client' - -/** - * Voice Service Admin Page (migrated from website/admin/voice) - * - * Displays: - * - Voice-First Architecture Overview - * - Developer Guide Content - * - Live Voice Demo (embedded from studio-v2) - * - Task State Machine Documentation - * - DSGVO Compliance Information - */ - -import { useState } from 'react' -import Link from 'next/link' -import { PagePurpose } from '@/components/common/PagePurpose' - -type TabType = 'overview' | 'demo' | 'tasks' | 'intents' | 'dsgvo' | 'api' - -// Task State Machine data -const TASK_STATES = [ - { state: 'DRAFT', description: 'Task erstellt, noch nicht verarbeitet', color: 'bg-gray-100 text-gray-800', next: ['QUEUED', 'PAUSED'] }, - { state: 'QUEUED', description: 'In Warteschlange fuer Verarbeitung', color: 'bg-blue-100 text-blue-800', next: ['RUNNING', 'PAUSED'] }, - { state: 'RUNNING', description: 'Wird aktuell verarbeitet', color: 'bg-yellow-100 text-yellow-800', next: ['READY', 'PAUSED'] }, - { state: 'READY', description: 'Fertig, wartet auf User-Bestaetigung', color: 'bg-green-100 text-green-800', next: ['APPROVED', 'REJECTED', 'PAUSED'] }, - { state: 'APPROVED', description: 'Vom User bestaetigt', color: 'bg-emerald-100 text-emerald-800', next: ['COMPLETED'] }, - { state: 'REJECTED', description: 'Vom User abgelehnt', color: 'bg-red-100 text-red-800', next: ['DRAFT'] }, - { state: 'COMPLETED', description: 'Erfolgreich abgeschlossen', color: 'bg-teal-100 text-teal-800', next: [] }, - { state: 'EXPIRED', description: 'TTL ueberschritten', color: 'bg-orange-100 text-orange-800', next: [] }, - { state: 'PAUSED', description: 'Vom User pausiert', color: 'bg-purple-100 text-purple-800', next: ['DRAFT', 'QUEUED', 'RUNNING', 'READY'] }, -] - -// Intent Types (22 types organized by group) -const INTENT_GROUPS = [ - { - group: 'Notizen', - color: 'bg-blue-50 border-blue-200', - intents: [ - { type: 'student_observation', example: 'Notiz zu Max: heute wiederholt gestoert', description: 'Schuelerbeobachtungen' }, - { type: 'reminder', example: 'Erinner mich morgen an Konferenz', description: 'Erinnerungen setzen' }, - { type: 'homework_check', example: '7b Mathe Hausaufgabe kontrollieren', description: 'Hausaufgaben pruefen' }, - { type: 'conference_topic', example: 'Thema Lehrerkonferenz: iPad-Regeln', description: 'Konferenzthemen' }, - { type: 'correction_thought', example: 'Aufgabe 3: haeufiger Fehler erklaeren', description: 'Korrekturgedanken' }, - ] - }, - { - group: 'Content-Generierung', - color: 'bg-green-50 border-green-200', - intents: [ - { type: 'worksheet_generate', example: 'Erstelle 3 Lueckentexte zu Vokabeln', description: 'Arbeitsblaetter erstellen' }, - { type: 'quiz_generate', example: '10-Minuten Vokabeltest mit Loesungen', description: 'Quiz/Tests erstellen' }, - { type: 'quick_activity', example: '10 Minuten Einstieg, 5 Aufgaben', description: 'Schnelle Aktivitaeten' }, - { type: 'differentiation', example: 'Zwei Schwierigkeitsstufen: Basis und Plus', description: 'Differenzierung' }, - ] - }, - { - group: 'Kommunikation', - color: 'bg-yellow-50 border-yellow-200', - intents: [ - { type: 'parent_letter', example: 'Neutraler Elternbrief wegen Stoerungen', description: 'Elternbriefe erstellen' }, - { type: 'class_message', example: 'Nachricht an 8a: Hausaufgaben bis Mittwoch', description: 'Klassennachrichten' }, - ] - }, - { - group: 'Canvas-Editor', - color: 'bg-purple-50 border-purple-200', - intents: [ - { type: 'canvas_edit', example: 'Ueberschriften groesser, Zeilenabstand kleiner', description: 'Formatierung aendern' }, - { type: 'canvas_layout', example: 'Alles auf eine Seite, Drucklayout A4', description: 'Layout anpassen' }, - { type: 'canvas_element', example: 'Kasten fuer Merke hinzufuegen', description: 'Elemente hinzufuegen' }, - { type: 'canvas_image', example: 'Bild 2 nach links, Pfeil auf Aufgabe 3', description: 'Bilder positionieren' }, - ] - }, - { - group: 'RAG & Korrektur', - color: 'bg-pink-50 border-pink-200', - intents: [ - { type: 'operator_checklist', example: 'Operatoren-Checkliste fuer diese Aufgabe', description: 'Operatoren abrufen' }, - { type: 'eh_passage', example: 'Erwartungshorizont-Passage zu diesem Thema', description: 'EH-Passagen suchen' }, - { type: 'feedback_suggestion', example: 'Kurze Feedbackformulierung vorschlagen', description: 'Feedback vorschlagen' }, - ] - }, - { - group: 'Follow-up (TaskOrchestrator)', - color: 'bg-teal-50 border-teal-200', - intents: [ - { type: 'task_summary', example: 'Fasse alle offenen Tasks zusammen', description: 'Task-Uebersicht' }, - { type: 'convert_note', example: 'Mach aus der Notiz von gestern einen Elternbrief', description: 'Notizen konvertieren' }, - { type: 'schedule_reminder', example: 'Erinner mich morgen an das Gespraech mit Max', description: 'Erinnerungen planen' }, - ] - }, -] - -// DSGVO Data Categories -const DSGVO_CATEGORIES = [ - { category: 'Audio', processing: 'NUR transient im RAM, NIEMALS persistiert', storage: 'Keine', ttl: '-', icon: '🎤', risk: 'low' }, - { category: 'PII (Schuelernamen)', processing: 'NUR auf Lehrergeraet', storage: 'Client-side', ttl: '-', icon: '👤', risk: 'high' }, - { category: 'Pseudonyme', processing: 'Server erlaubt (student_ref, class_ref)', storage: 'Valkey Cache', ttl: '24h', icon: '🔢', risk: 'low' }, - { category: 'Transkripte', processing: 'NUR verschluesselt (AES-256-GCM)', storage: 'PostgreSQL', ttl: '7 Tage', icon: '📝', risk: 'medium' }, - { category: 'Task States', processing: 'TaskOrchestrator', storage: 'Valkey', ttl: '30 Tage', icon: '📋', risk: 'low' }, - { category: 'Audit Logs', processing: 'Nur truncated IDs, keine PII', storage: 'PostgreSQL', ttl: '90 Tage', icon: '📊', risk: 'low' }, -] - -// API Endpoints -const API_ENDPOINTS = [ - { method: 'POST', path: '/api/v1/sessions', description: 'Voice Session erstellen' }, - { method: 'GET', path: '/api/v1/sessions/{id}', description: 'Session Status abrufen' }, - { method: 'DELETE', path: '/api/v1/sessions/{id}', description: 'Session beenden' }, - { method: 'GET', path: '/api/v1/sessions/{id}/tasks', description: 'Pending Tasks abrufen' }, - { method: 'POST', path: '/api/v1/tasks', description: 'Task erstellen' }, - { method: 'GET', path: '/api/v1/tasks/{id}', description: 'Task Status abrufen' }, - { method: 'PUT', path: '/api/v1/tasks/{id}/transition', description: 'Task State aendern' }, - { method: 'DELETE', path: '/api/v1/tasks/{id}', description: 'Task loeschen' }, - { method: 'WS', path: '/ws/voice', description: 'Voice Streaming (WebSocket)' }, - { method: 'GET', path: '/health', description: 'Health Check' }, -] - -export default function VoiceMatrixPage() { - const [activeTab, setActiveTab] = useState('overview') - const [demoLoaded, setDemoLoaded] = useState(false) - - const tabs = [ - { id: 'overview', name: 'Architektur', icon: '🏗️' }, - { id: 'demo', name: 'Live Demo', icon: '🎤' }, - { id: 'tasks', name: 'Task States', icon: '📋' }, - { id: 'intents', name: 'Intents (22)', icon: '🎯' }, - { id: 'dsgvo', name: 'DSGVO', icon: '🔒' }, - { id: 'api', name: 'API', icon: '🔌' }, - ] - - return ( -
- {/* Page Purpose */} - - - {/* Quick Links */} - - - {/* Stats Overview */} -
-
-
8091
-
Port
-
-
-
22
-
Task Types
-
-
-
9
-
Task States
-
-
-
24kHz
-
Audio Rate
-
-
-
80ms
-
Frame Size
-
-
-
0
-
Audio Persist
-
-
- - {/* Tabs */} -
-
-
- {tabs.map((tab) => ( - - ))} -
-
- -
- {/* Overview Tab */} - {activeTab === 'overview' && ( -
-

Voice-First Architektur

- - {/* Architecture Diagram */} -
-
{`
-┌──────────────────────────────────────────────────────────────────┐
-│                    LEHRERGERAET (PWA / App)                       │
-│  ┌────────────────────────────────────────────────────────────┐  │
-│  │ VoiceCapture.tsx │ voice-encryption.ts │ voice-api.ts      │  │
-│  │ Mikrofon         │ AES-256-GCM         │ WebSocket Client  │  │
-│  └────────────────────────────────────────────────────────────┘  │
-└───────────────────────────┬──────────────────────────────────────┘
-                            │ WebSocket (wss://)
-                            ▼
-┌──────────────────────────────────────────────────────────────────┐
-│                    VOICE SERVICE (Port 8091)                      │
-│  ┌────────────────────────────────────────────────────────────┐  │
-│  │ main.py │ streaming.py │ sessions.py │ tasks.py            │  │
-│  └────────────────────────────────────────────────────────────┘  │
-│  ┌────────────────────────────────────────────────────────────┐  │
-│  │ task_orchestrator.py │ intent_router.py │ encryption        │  │
-│  └────────────────────────────────────────────────────────────┘  │
-└───────────────────────────┬──────────────────────────────────────┘
-                            │
-         ┌──────────────────┼──────────────────┐
-         ▼                  ▼                  ▼
-┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
-│ PersonaPlex-7B  │ │ Ollama Fallback │ │ Valkey Cache    │
-│ (A100 GPU)      │ │ (Mac Mini)      │ │ (Sessions)      │
-└─────────────────┘ └─────────────────┘ └─────────────────┘
-`}
-
- - {/* Technology Stack */} -
-
-

Voice Model (Produktion)

-

PersonaPlex-7B (NVIDIA)

-

Full-Duplex Speech-to-Speech

-

Lizenz: MIT + NVIDIA Open Model

-
-
-

Agent Orchestration

-

TaskOrchestrator

-

Task State Machine

-

Lizenz: Proprietary

-
-
-

Audio Codec

-

Mimi (24kHz, 80ms)

-

Low-Latency Streaming

-

Lizenz: MIT

-
-
- - {/* Key Files */} -
-

Wichtige Dateien

-
- - - - - - - - - - - - - - - -
DateiBeschreibung
voice-service/main.pyFastAPI Entry, WebSocket Handler
voice-service/services/task_orchestrator.pyTask State Machine
voice-service/services/intent_router.pyIntent Detection (22 Types)
voice-service/services/encryption_service.pyNamespace Key Management
studio-v2/components/voice/VoiceCapture.tsxFrontend Mikrofon + Crypto
studio-v2/lib/voice/voice-encryption.tsAES-256-GCM Client-side
-
-
-
- )} - - {/* Demo Tab */} - {activeTab === 'demo' && ( -
-
-

Live Voice Demo

- - In neuem Tab oeffnen - - - - -
- -
-

Hinweis: Die Demo erfordert, dass der Voice Service (Port 8091) und das Studio-v2 Frontend (Port 3001) laufen.

- docker compose up -d voice-service && cd studio-v2 && npm run dev -
- - {/* Embedded Demo */} -
- {!demoLoaded && ( -
- -
- )} - {demoLoaded && ( -