feat: voice-service von lehrer nach core verschoben, Pipeline erweitert (voice, BQAS, embedding, night-scheduler)

2026-02-15 13:26:06 +01:00
parent a7e4500ea6
commit 1089c73b46
59 changed files with 12921 additions and 20 deletions
@@ -0,0 +1,12 @@
+"""
+Voice Service API Routes
+"""
+from api.sessions import router as sessions_router
+from api.tasks import router as tasks_router
+from api.streaming import router as streaming_router
+
+__all__ = [
+    "sessions_router",
+    "tasks_router",
+    "streaming_router",
+]
@@ -0,0 +1,365 @@
+"""
+BQAS API - Quality Assurance Endpoints
+"""
+import structlog
+import subprocess
+from fastapi import APIRouter, HTTPException, BackgroundTasks
+from pydantic import BaseModel
+from typing import Optional, List, Dict, Any
+from datetime import datetime
+
+from bqas.runner import get_runner, BQASRunner
+
+logger = structlog.get_logger(__name__)
+
+router = APIRouter()
+
+
+# Response Models
+class TestRunResponse(BaseModel):
+    id: int
+    timestamp: str
+    git_commit: Optional[str] = None
+    suite: str
+    golden_score: float
+    synthetic_score: float
+    rag_score: float = 0.0
+    total_tests: int
+    passed_tests: int
+    failed_tests: int
+    duration_seconds: float
+
+
+class MetricsResponse(BaseModel):
+    total_tests: int
+    passed_tests: int
+    failed_tests: int
+    avg_intent_accuracy: float
+    avg_faithfulness: float
+    avg_relevance: float
+    avg_coherence: float
+    safety_pass_rate: float
+    avg_composite_score: float
+    scores_by_intent: Dict[str, float]
+    failed_test_ids: List[str]
+
+
+class TrendResponse(BaseModel):
+    dates: List[str]
+    scores: List[float]
+    trend: str  # improving, stable, declining, insufficient_data
+
+
+class LatestMetricsResponse(BaseModel):
+    golden: Optional[MetricsResponse] = None
+    synthetic: Optional[MetricsResponse] = None
+    rag: Optional[MetricsResponse] = None
+
+
+class RunResultResponse(BaseModel):
+    success: bool
+    message: str
+    metrics: Optional[MetricsResponse] = None
+    run_id: Optional[int] = None
+
+
+# State tracking for running tests
+_is_running: Dict[str, bool] = {"golden": False, "synthetic": False, "rag": False}
+
+
+def _get_git_commit() -> Optional[str]:
+    """Get current git commit hash."""
+    try:
+        result = subprocess.run(
+            ["git", "rev-parse", "--short", "HEAD"],
+            capture_output=True,
+            text=True,
+            timeout=5,
+        )
+        if result.returncode == 0:
+            return result.stdout.strip()
+    except Exception:
+        pass
+    return None
+
+
+def _metrics_to_response(metrics) -> MetricsResponse:
+    """Convert BQASMetrics to API response."""
+    return MetricsResponse(
+        total_tests=metrics.total_tests,
+        passed_tests=metrics.passed_tests,
+        failed_tests=metrics.failed_tests,
+        avg_intent_accuracy=round(metrics.avg_intent_accuracy, 2),
+        avg_faithfulness=round(metrics.avg_faithfulness, 2),
+        avg_relevance=round(metrics.avg_relevance, 2),
+        avg_coherence=round(metrics.avg_coherence, 2),
+        safety_pass_rate=round(metrics.safety_pass_rate, 3),
+        avg_composite_score=round(metrics.avg_composite_score, 3),
+        scores_by_intent={k: round(v, 3) for k, v in metrics.scores_by_intent.items()},
+        failed_test_ids=metrics.failed_test_ids,
+    )
+
+
+def _run_to_response(run) -> TestRunResponse:
+    """Convert TestRun to API response."""
+    return TestRunResponse(
+        id=run.id,
+        timestamp=run.timestamp.isoformat() + "Z",
+        git_commit=run.git_commit,
+        suite=run.suite,
+        golden_score=round(run.metrics.avg_composite_score, 3) if run.suite == "golden" else 0.0,
+        synthetic_score=round(run.metrics.avg_composite_score, 3) if run.suite == "synthetic" else 0.0,
+        rag_score=round(run.metrics.avg_composite_score, 3) if run.suite == "rag" else 0.0,
+        total_tests=run.metrics.total_tests,
+        passed_tests=run.metrics.passed_tests,
+        failed_tests=run.metrics.failed_tests,
+        duration_seconds=round(run.duration_seconds, 1),
+    )
+
+
+@router.get("/runs", response_model=Dict[str, Any])
+async def get_test_runs(limit: int = 20):
+    """Get recent test runs."""
+    runner = get_runner()
+    runs = runner.get_test_runs(limit)
+
+    return {
+        "runs": [_run_to_response(r) for r in runs],
+        "total": len(runs),
+    }
+
+
+@router.get("/run/{run_id}", response_model=TestRunResponse)
+async def get_test_run(run_id: int):
+    """Get a specific test run."""
+    runner = get_runner()
+    runs = runner.get_test_runs(100)
+
+    for run in runs:
+        if run.id == run_id:
+            return _run_to_response(run)
+
+    raise HTTPException(status_code=404, detail="Test run not found")
+
+
+@router.get("/trend", response_model=TrendResponse)
+async def get_trend(days: int = 30):
+    """Get score trend over time."""
+    runner = get_runner()
+    runs = runner.get_test_runs(100)
+
+    # Filter golden suite runs
+    golden_runs = [r for r in runs if r.suite == "golden"]
+
+    if len(golden_runs) < 3:
+        return TrendResponse(
+            dates=[],
+            scores=[],
+            trend="insufficient_data"
+        )
+
+    # Sort by timestamp
+    golden_runs.sort(key=lambda r: r.timestamp)
+
+    dates = [r.timestamp.isoformat() + "Z" for r in golden_runs]
+    scores = [round(r.metrics.avg_composite_score, 3) for r in golden_runs]
+
+    # Calculate trend
+    if len(scores) >= 6:
+        recent_avg = sum(scores[-3:]) / 3
+        old_avg = sum(scores[:3]) / 3
+        diff = recent_avg - old_avg
+
+        if diff > 0.1:
+            trend = "improving"
+        elif diff < -0.1:
+            trend = "declining"
+        else:
+            trend = "stable"
+    else:
+        trend = "stable"
+
+    return TrendResponse(dates=dates, scores=scores, trend=trend)
+
+
+@router.get("/latest-metrics", response_model=LatestMetricsResponse)
+async def get_latest_metrics():
+    """Get latest metrics from all test suites."""
+    runner = get_runner()
+    latest = runner.get_latest_metrics()
+
+    return LatestMetricsResponse(
+        golden=_metrics_to_response(latest["golden"]) if latest["golden"] else None,
+        synthetic=_metrics_to_response(latest["synthetic"]) if latest["synthetic"] else None,
+        rag=_metrics_to_response(latest["rag"]) if latest["rag"] else None,
+    )
+
+
+@router.post("/run/golden", response_model=RunResultResponse)
+async def run_golden_suite(background_tasks: BackgroundTasks):
+    """Run the golden test suite."""
+    if _is_running["golden"]:
+        return RunResultResponse(
+            success=False,
+            message="Golden suite is already running"
+        )
+
+    _is_running["golden"] = True
+    logger.info("Starting Golden Suite via API")
+
+    try:
+        runner = get_runner()
+        git_commit = _get_git_commit()
+
+        # Run the suite
+        run = await runner.run_golden_suite(git_commit=git_commit)
+
+        metrics = _metrics_to_response(run.metrics)
+
+        return RunResultResponse(
+            success=True,
+            message=f"Golden suite completed: {run.metrics.passed_tests}/{run.metrics.total_tests} passed ({run.metrics.avg_composite_score:.2f} avg score)",
+            metrics=metrics,
+            run_id=run.id,
+        )
+
+    except Exception as e:
+        logger.error("Golden suite failed", error=str(e))
+        return RunResultResponse(
+            success=False,
+            message=f"Golden suite failed: {str(e)}"
+        )
+
+    finally:
+        _is_running["golden"] = False
+
+
+@router.post("/run/synthetic", response_model=RunResultResponse)
+async def run_synthetic_suite(background_tasks: BackgroundTasks):
+    """Run the synthetic test suite."""
+    if _is_running["synthetic"]:
+        return RunResultResponse(
+            success=False,
+            message="Synthetic suite is already running"
+        )
+
+    _is_running["synthetic"] = True
+    logger.info("Starting Synthetic Suite via API")
+
+    try:
+        runner = get_runner()
+        git_commit = _get_git_commit()
+
+        # Run the suite
+        run = await runner.run_synthetic_suite(git_commit=git_commit)
+
+        metrics = _metrics_to_response(run.metrics)
+
+        return RunResultResponse(
+            success=True,
+            message=f"Synthetic suite completed: {run.metrics.passed_tests}/{run.metrics.total_tests} passed ({run.metrics.avg_composite_score:.2f} avg score)",
+            metrics=metrics,
+            run_id=run.id,
+        )
+
+    except Exception as e:
+        logger.error("Synthetic suite failed", error=str(e))
+        return RunResultResponse(
+            success=False,
+            message=f"Synthetic suite failed: {str(e)}"
+        )
+
+    finally:
+        _is_running["synthetic"] = False
+
+
+@router.post("/run/rag", response_model=RunResultResponse)
+async def run_rag_suite(background_tasks: BackgroundTasks):
+    """Run the RAG/Correction test suite."""
+    if _is_running["rag"]:
+        return RunResultResponse(
+            success=False,
+            message="RAG suite is already running"
+        )
+
+    _is_running["rag"] = True
+    logger.info("Starting RAG Suite via API")
+
+    try:
+        runner = get_runner()
+        git_commit = _get_git_commit()
+
+        # Run the suite
+        run = await runner.run_rag_suite(git_commit=git_commit)
+
+        metrics = _metrics_to_response(run.metrics)
+
+        return RunResultResponse(
+            success=True,
+            message=f"RAG suite completed: {run.metrics.passed_tests}/{run.metrics.total_tests} passed ({run.metrics.avg_composite_score:.2f} avg score)",
+            metrics=metrics,
+            run_id=run.id,
+        )
+
+    except Exception as e:
+        logger.error("RAG suite failed", error=str(e))
+        return RunResultResponse(
+            success=False,
+            message=f"RAG suite failed: {str(e)}"
+        )
+
+    finally:
+        _is_running["rag"] = False
+
+
+@router.get("/regression-check")
+async def check_regression(threshold: float = 0.1):
+    """Check for regression in recent scores."""
+    runner = get_runner()
+    runs = runner.get_test_runs(20)
+
+    golden_runs = [r for r in runs if r.suite == "golden"]
+
+    if len(golden_runs) < 2:
+        return {
+            "is_regression": False,
+            "message": "Not enough data for regression check",
+            "current_score": None,
+            "previous_avg": None,
+            "delta": None,
+        }
+
+    # Sort by timestamp (newest first)
+    golden_runs.sort(key=lambda r: r.timestamp, reverse=True)
+
+    current_score = golden_runs[0].metrics.avg_composite_score if golden_runs else 0
+    previous_scores = [r.metrics.avg_composite_score for r in golden_runs[1:6]]
+    previous_avg = sum(previous_scores) / len(previous_scores) if previous_scores else 0
+    delta = previous_avg - current_score
+
+    is_regression = delta > threshold
+
+    return {
+        "is_regression": is_regression,
+        "message": f"Regression detected: score dropped by {delta:.2f}" if is_regression else "No regression detected",
+        "current_score": round(current_score, 3),
+        "previous_avg": round(previous_avg, 3),
+        "delta": round(delta, 3),
+        "threshold": threshold,
+    }
+
+
+@router.get("/health")
+async def bqas_health():
+    """BQAS health check."""
+    runner = get_runner()
+    health = await runner.health_check()
+
+    return {
+        "status": "healthy",
+        "judge_available": health["judge_available"],
+        "rag_judge_available": health["rag_judge_available"],
+        "test_runs_count": health["test_runs_count"],
+        "is_running": _is_running,
+        "config": health["config"],
+    }
@@ -0,0 +1,220 @@
+"""
+Session Management API
+Handles voice session lifecycle
+
+Endpoints:
+- POST   /api/v1/sessions              # Session erstellen
+- GET    /api/v1/sessions/{id}         # Session Status
+- DELETE /api/v1/sessions/{id}         # Session beenden
+- GET    /api/v1/sessions/{id}/tasks   # Pending Tasks
+"""
+import structlog
+from fastapi import APIRouter, HTTPException, Request, Depends
+from typing import List, Optional
+from datetime import datetime, timedelta
+
+from config import settings
+from models.session import (
+    VoiceSession,
+    SessionCreate,
+    SessionResponse,
+    SessionStatus,
+)
+from models.task import TaskResponse, TaskState
+
+logger = structlog.get_logger(__name__)
+
+router = APIRouter()
+
+
+# In-memory session store (will be replaced with Valkey in production)
+# This is transient - sessions are never persisted to disk
+_sessions: dict[str, VoiceSession] = {}
+
+
+async def get_session(session_id: str) -> VoiceSession:
+    """Get session by ID or raise 404."""
+    session = _sessions.get(session_id)
+    if not session:
+        raise HTTPException(status_code=404, detail="Session not found")
+    return session
+
+
+@router.post("", response_model=SessionResponse)
+async def create_session(request: Request, session_data: SessionCreate):
+    """
+    Create a new voice session.
+
+    Returns a session ID and WebSocket URL for audio streaming.
+    The client must connect to the WebSocket within 30 seconds.
+    """
+    logger.info(
+        "Creating voice session",
+        namespace_id=session_data.namespace_id[:8] + "...",
+        device_type=session_data.device_type,
+    )
+
+    # Verify namespace key hash
+    orchestrator = request.app.state.orchestrator
+    encryption = request.app.state.encryption
+
+    if settings.encryption_enabled:
+        if not encryption.verify_key_hash(session_data.key_hash):
+            logger.warning("Invalid key hash", namespace_id=session_data.namespace_id[:8])
+            raise HTTPException(status_code=401, detail="Invalid encryption key hash")
+
+    # Check rate limits
+    namespace_sessions = [
+        s for s in _sessions.values()
+        if s.namespace_id == session_data.namespace_id
+        and s.status not in [SessionStatus.CLOSED, SessionStatus.ERROR]
+    ]
+    if len(namespace_sessions) >= settings.max_sessions_per_user:
+        raise HTTPException(
+            status_code=429,
+            detail=f"Maximum {settings.max_sessions_per_user} concurrent sessions allowed"
+        )
+
+    # Create session
+    session = VoiceSession(
+        namespace_id=session_data.namespace_id,
+        key_hash=session_data.key_hash,
+        device_type=session_data.device_type,
+        client_version=session_data.client_version,
+    )
+
+    # Store session (in RAM only)
+    _sessions[session.id] = session
+
+    logger.info(
+        "Voice session created",
+        session_id=session.id[:8],
+        namespace_id=session_data.namespace_id[:8],
+    )
+
+    # Build WebSocket URL
+    # Use X-Forwarded-Proto if behind a reverse proxy (nginx), otherwise use request scheme
+    forwarded_proto = request.headers.get("x-forwarded-proto", request.url.scheme)
+    host = request.headers.get("host", f"localhost:{settings.port}")
+    ws_scheme = "wss" if forwarded_proto == "https" else "ws"
+    ws_url = f"{ws_scheme}://{host}/ws/voice?session_id={session.id}"
+
+    return SessionResponse(
+        id=session.id,
+        namespace_id=session.namespace_id,
+        status=session.status,
+        created_at=session.created_at,
+        websocket_url=ws_url,
+    )
+
+
+@router.get("/{session_id}", response_model=SessionResponse)
+async def get_session_status(session_id: str, request: Request):
+    """
+    Get session status.
+
+    Returns current session state including message count and pending tasks.
+    """
+    session = await get_session(session_id)
+
+    # Check if session expired
+    session_age = datetime.utcnow() - session.created_at
+    if session_age > timedelta(hours=settings.session_ttl_hours):
+        session.status = SessionStatus.CLOSED
+        logger.info("Session expired", session_id=session_id[:8])
+
+    # Build WebSocket URL
+    # Use X-Forwarded-Proto if behind a reverse proxy (nginx), otherwise use request scheme
+    forwarded_proto = request.headers.get("x-forwarded-proto", request.url.scheme)
+    host = request.headers.get("host", f"localhost:{settings.port}")
+    ws_scheme = "wss" if forwarded_proto == "https" else "ws"
+    ws_url = f"{ws_scheme}://{host}/ws/voice?session_id={session.id}"
+
+    return SessionResponse(
+        id=session.id,
+        namespace_id=session.namespace_id,
+        status=session.status,
+        created_at=session.created_at,
+        websocket_url=ws_url,
+    )
+
+
+@router.delete("/{session_id}")
+async def close_session(session_id: str):
+    """
+    Close and delete a session.
+
+    All transient data (messages, audio state) is discarded.
+    This is the expected cleanup path.
+    """
+    session = await get_session(session_id)
+
+    logger.info(
+        "Closing session",
+        session_id=session_id[:8],
+        messages_count=len(session.messages),
+        tasks_count=len(session.pending_tasks),
+    )
+
+    # Mark as closed
+    session.status = SessionStatus.CLOSED
+
+    # Remove from active sessions
+    del _sessions[session_id]
+
+    return {"status": "closed", "session_id": session_id}
+
+
+@router.get("/{session_id}/tasks", response_model=List[TaskResponse])
+async def get_session_tasks(session_id: str, request: Request, state: Optional[TaskState] = None):
+    """
+    Get tasks for a session.
+
+    Optionally filter by task state.
+    """
+    session = await get_session(session_id)
+
+    # Get tasks from the in-memory task store
+    from api.tasks import _tasks
+
+    # Filter tasks by session_id and optionally by state
+    tasks = [
+        task for task in _tasks.values()
+        if task.session_id == session_id
+        and (state is None or task.state == state)
+    ]
+
+    return [
+        TaskResponse(
+            id=task.id,
+            session_id=task.session_id,
+            type=task.type,
+            state=task.state,
+            created_at=task.created_at,
+            updated_at=task.updated_at,
+            result_available=task.result_ref is not None,
+            error_message=task.error_message,
+        )
+        for task in tasks
+    ]
+
+
+@router.get("/{session_id}/stats")
+async def get_session_stats(session_id: str):
+    """
+    Get session statistics (for debugging/monitoring).
+
+    No PII is returned - only aggregate counts.
+    """
+    session = await get_session(session_id)
+
+    return {
+        "session_id_truncated": session_id[:8],
+        "status": session.status.value,
+        "age_seconds": (datetime.utcnow() - session.created_at).total_seconds(),
+        "message_count": len(session.messages),
+        "pending_tasks_count": len(session.pending_tasks),
+        "audio_chunks_received": session.audio_chunks_received,
+        "audio_chunks_processed": session.audio_chunks_processed,
+        "device_type": session.device_type,
+    }
@@ -0,0 +1,325 @@
+"""
+WebSocket Streaming API
+Handles real-time audio streaming for voice interface
+
+WebSocket Protocol:
+- Binary frames: Int16 PCM Audio (24kHz, 80ms frames)
+- JSON frames: {"type": "config|end_turn|interrupt"}
+
+Server -> Client:
+- Binary: Audio Response (base64)
+- JSON: {"type": "transcript|intent|status|error"}
+"""
+import structlog
+import asyncio
+import json
+import base64
+from fastapi import APIRouter, WebSocket, WebSocketDisconnect, Query
+from typing import Optional
+from datetime import datetime
+
+from config import settings
+from models.session import SessionStatus, TranscriptMessage, AudioChunk
+from models.task import TaskCreate, TaskType
+
+logger = structlog.get_logger(__name__)
+
+router = APIRouter()
+
+# Active WebSocket connections (transient)
+active_connections: dict[str, WebSocket] = {}
+
+
+@router.websocket("/ws/voice")
+async def voice_websocket(
+    websocket: WebSocket,
+    session_id: str = Query(..., description="Session ID from /api/v1/sessions"),
+    namespace: Optional[str] = Query(None, description="Namespace ID"),
+    key_hash: Optional[str] = Query(None, description="Encryption key hash"),
+):
+    """
+    WebSocket endpoint for voice streaming.
+
+    Protocol:
+    1. Client connects with session_id
+    2. Client sends binary audio frames (Int16 PCM, 24kHz)
+    3. Server responds with transcripts, intents, and audio
+
+    Audio Processing:
+    - Chunks are processed in RAM only
+    - No audio is ever persisted
+    - Transcripts are encrypted before any storage
+    """
+    # Get session
+    from api.sessions import _sessions
+    session = _sessions.get(session_id)
+
+    if not session:
+        await websocket.close(code=4004, reason="Session not found")
+        return
+
+    # Accept connection
+    await websocket.accept()
+
+    logger.info(
+        "WebSocket connected",
+        session_id=session_id[:8],
+        namespace_id=session.namespace_id[:8],
+    )
+
+    # Update session status
+    session.status = SessionStatus.CONNECTED
+    active_connections[session_id] = websocket
+
+    # Audio buffer for accumulating chunks
+    audio_buffer = bytearray()
+    chunk_sequence = 0
+
+    try:
+        # Send initial status
+        await websocket.send_json({
+            "type": "status",
+            "status": "connected",
+            "session_id": session_id,
+            "audio_config": {
+                "sample_rate": settings.audio_sample_rate,
+                "frame_size_ms": settings.audio_frame_size_ms,
+                "encoding": "pcm_s16le",
+            },
+        })
+
+        while True:
+            # Receive message (binary or text)
+            message = await websocket.receive()
+
+            if "bytes" in message:
+                # Binary audio data
+                audio_data = message["bytes"]
+                session.audio_chunks_received += 1
+
+                # Create audio chunk (transient - never persisted)
+                chunk = AudioChunk(
+                    sequence=chunk_sequence,
+                    timestamp_ms=int((datetime.utcnow().timestamp() * 1000) % (24 * 60 * 60 * 1000)),
+                    data=audio_data,
+                )
+                chunk_sequence += 1
+
+                # Accumulate in buffer
+                audio_buffer.extend(audio_data)
+
+                # Process when we have enough data (e.g., 500ms worth)
+                samples_needed = settings.audio_sample_rate // 2  # 500ms
+                bytes_needed = samples_needed * 2  # 16-bit = 2 bytes
+
+                if len(audio_buffer) >= bytes_needed:
+                    session.status = SessionStatus.PROCESSING
+
+                    # Process audio chunk
+                    await process_audio_chunk(
+                        websocket,
+                        session,
+                        bytes(audio_buffer[:bytes_needed]),
+                    )
+
+                    # Remove processed data
+                    audio_buffer = audio_buffer[bytes_needed:]
+                    session.audio_chunks_processed += 1
+
+            elif "text" in message:
+                # JSON control message
+                try:
+                    data = json.loads(message["text"])
+                    msg_type = data.get("type")
+
+                    if msg_type == "config":
+                        # Client configuration
+                        logger.debug("Received config", config=data)
+
+                    elif msg_type == "end_turn":
+                        # User finished speaking
+                        session.status = SessionStatus.PROCESSING
+
+                        # Process remaining audio buffer
+                        if audio_buffer:
+                            await process_audio_chunk(
+                                websocket,
+                                session,
+                                bytes(audio_buffer),
+                            )
+                            audio_buffer.clear()
+
+                        # Signal end of user turn
+                        await websocket.send_json({
+                            "type": "status",
+                            "status": "processing",
+                        })
+
+                    elif msg_type == "interrupt":
+                        # User interrupted response
+                        session.status = SessionStatus.LISTENING
+                        await websocket.send_json({
+                            "type": "status",
+                            "status": "interrupted",
+                        })
+
+                    elif msg_type == "ping":
+                        # Keep-alive ping
+                        await websocket.send_json({"type": "pong"})
+
+                except json.JSONDecodeError:
+                    logger.warning("Invalid JSON message", message=message["text"][:100])
+
+            # Update activity
+            session.update_activity()
+
+    except WebSocketDisconnect:
+        logger.info("WebSocket disconnected", session_id=session_id[:8])
+    except Exception as e:
+        logger.error("WebSocket error", session_id=session_id[:8], error=str(e))
+        session.status = SessionStatus.ERROR
+    finally:
+        # Cleanup
+        session.status = SessionStatus.CLOSED
+        if session_id in active_connections:
+            del active_connections[session_id]
+
+
+async def process_audio_chunk(
+    websocket: WebSocket,
+    session,
+    audio_data: bytes,
+):
+    """
+    Process an audio chunk through the voice pipeline.
+
+    1. PersonaPlex/Ollama for transcription + understanding
+    2. Intent detection
+    3. Task creation if needed
+    4. Response generation
+    5. Audio synthesis (if PersonaPlex)
+    """
+    from services.task_orchestrator import TaskOrchestrator
+    from services.intent_router import IntentRouter
+
+    orchestrator = TaskOrchestrator()
+    intent_router = IntentRouter()
+
+    try:
+        # Transcribe audio
+        if settings.use_personaplex:
+            # Use PersonaPlex for transcription
+            from services.personaplex_client import PersonaPlexClient
+            client = PersonaPlexClient()
+            transcript = await client.transcribe(audio_data)
+        else:
+            # Use Ollama fallback (text-only, requires separate ASR)
+            # For MVP, we'll simulate with a placeholder
+            # In production, integrate with Whisper or similar
+            from services.fallback_llm_client import FallbackLLMClient
+            llm_client = FallbackLLMClient()
+            transcript = await llm_client.process_audio_description(audio_data)
+
+        if not transcript or not transcript.strip():
+            return
+
+        # Send transcript to client
+        await websocket.send_json({
+            "type": "transcript",
+            "text": transcript,
+            "final": True,
+            "confidence": 0.95,
+        })
+
+        # Add to session messages
+        user_message = TranscriptMessage(
+            role="user",
+            content=transcript,
+            confidence=0.95,
+        )
+        session.messages.append(user_message)
+
+        # Detect intent
+        intent = await intent_router.detect_intent(transcript, session.messages)
+
+        if intent:
+            await websocket.send_json({
+                "type": "intent",
+                "intent": intent.type.value,
+                "confidence": intent.confidence,
+                "parameters": intent.parameters,
+            })
+
+            # Create task if intent is actionable
+            if intent.is_actionable:
+                task = await orchestrator.create_task_from_intent(
+                    session_id=session.id,
+                    namespace_id=session.namespace_id,
+                    intent=intent,
+                    transcript=transcript,
+                )
+
+                await websocket.send_json({
+                    "type": "task_created",
+                    "task_id": task.id,
+                    "task_type": task.type.value,
+                    "state": task.state.value,
+                })
+
+        # Generate response
+        response_text = await orchestrator.generate_response(
+            session_messages=session.messages,
+            intent=intent,
+            namespace_id=session.namespace_id,
+        )
+
+        # Send text response
+        await websocket.send_json({
+            "type": "response",
+            "text": response_text,
+        })
+
+        # Add to session messages
+        assistant_message = TranscriptMessage(
+            role="assistant",
+            content=response_text,
+        )
+        session.messages.append(assistant_message)
+
+        # Generate audio response if PersonaPlex is available
+        if settings.use_personaplex:
+            from services.personaplex_client import PersonaPlexClient
+            client = PersonaPlexClient()
+            audio_response = await client.synthesize(response_text)
+
+            if audio_response:
+                # Send audio in chunks
+                chunk_size = settings.audio_frame_samples * 2  # 16-bit
+                for i in range(0, len(audio_response), chunk_size):
+                    chunk = audio_response[i:i + chunk_size]
+                    await websocket.send_bytes(chunk)
+
+        # Update session status
+        session.status = SessionStatus.LISTENING
+
+        await websocket.send_json({
+            "type": "status",
+            "status": "listening",
+        })
+
+    except Exception as e:
+        logger.error("Audio processing error", error=str(e))
+        await websocket.send_json({
+            "type": "error",
+            "message": "Failed to process audio",
+            "code": "processing_error",
+        })
+
+
+@router.get("/ws/stats")
+async def get_websocket_stats():
+    """Get WebSocket connection statistics."""
+    return {
+        "active_connections": len(active_connections),
+        "connection_ids": [cid[:8] for cid in active_connections.keys()],
+    }
@@ -0,0 +1,262 @@
+"""
+Task Management API
+Handles TaskOrchestrator task lifecycle
+
+Endpoints:
+- POST   /api/v1/tasks                 # Task erstellen
+- GET    /api/v1/tasks/{id}            # Task Status
+- PUT    /api/v1/tasks/{id}/transition # Status aendern
+- DELETE /api/v1/tasks/{id}            # Task loeschen
+"""
+import structlog
+from fastapi import APIRouter, HTTPException, Request
+from typing import Optional
+from datetime import datetime
+
+from config import settings
+from models.task import (
+    Task,
+    TaskCreate,
+    TaskResponse,
+    TaskTransition,
+    TaskState,
+    TaskType,
+    is_valid_transition,
+)
+
+logger = structlog.get_logger(__name__)
+
+router = APIRouter()
+
+# In-memory task store (will be replaced with Valkey in production)
+_tasks: dict[str, Task] = {}
+
+
+async def get_task(task_id: str) -> Task:
+    """Get task by ID or raise 404."""
+    task = _tasks.get(task_id)
+    if not task:
+        raise HTTPException(status_code=404, detail="Task not found")
+    return task
+
+
+@router.post("", response_model=TaskResponse)
+async def create_task(request: Request, task_data: TaskCreate):
+    """
+    Create a new task.
+
+    The task will be queued for processing by TaskOrchestrator.
+    Intent text is encrypted before storage.
+    """
+    logger.info(
+        "Creating task",
+        session_id=task_data.session_id[:8],
+        task_type=task_data.type.value,
+    )
+
+    # Get encryption service
+    encryption = request.app.state.encryption
+
+    # Get session to validate and get namespace
+    from api.sessions import _sessions
+    session = _sessions.get(task_data.session_id)
+    if not session:
+        raise HTTPException(status_code=404, detail="Session not found")
+
+    # Encrypt intent text if encryption is enabled
+    encrypted_intent = task_data.intent_text
+    if settings.encryption_enabled:
+        encrypted_intent = encryption.encrypt_content(
+            task_data.intent_text,
+            session.namespace_id,
+        )
+
+    # Encrypt any PII in parameters
+    encrypted_params = {}
+    pii_fields = ["student_name", "class_name", "parent_name", "content"]
+    for key, value in task_data.parameters.items():
+        if key in pii_fields and settings.encryption_enabled:
+            encrypted_params[key] = encryption.encrypt_content(
+                str(value),
+                session.namespace_id,
+            )
+        else:
+            encrypted_params[key] = value
+
+    # Create task
+    task = Task(
+        session_id=task_data.session_id,
+        namespace_id=session.namespace_id,
+        type=task_data.type,
+        intent_text=encrypted_intent,
+        parameters=encrypted_params,
+    )
+
+    # Store task
+    _tasks[task.id] = task
+
+    # Add to session's pending tasks
+    session.pending_tasks.append(task.id)
+
+    # Queue task for processing
+    orchestrator = request.app.state.orchestrator
+    await orchestrator.queue_task(task)
+
+    logger.info(
+        "Task created",
+        task_id=task.id[:8],
+        session_id=task_data.session_id[:8],
+        task_type=task_data.type.value,
+    )
+
+    return TaskResponse(
+        id=task.id,
+        session_id=task.session_id,
+        type=task.type,
+        state=task.state,
+        created_at=task.created_at,
+        updated_at=task.updated_at,
+        result_available=False,
+    )
+
+
+@router.get("/{task_id}", response_model=TaskResponse)
+async def get_task_status(task_id: str):
+    """
+    Get task status.
+
+    Returns current state and whether results are available.
+    """
+    task = await get_task(task_id)
+
+    return TaskResponse(
+        id=task.id,
+        session_id=task.session_id,
+        type=task.type,
+        state=task.state,
+        created_at=task.created_at,
+        updated_at=task.updated_at,
+        result_available=task.result_ref is not None,
+        error_message=task.error_message,
+    )
+
+
+@router.put("/{task_id}/transition", response_model=TaskResponse)
+async def transition_task(task_id: str, transition: TaskTransition):
+    """
+    Transition task to a new state.
+
+    Only valid transitions are allowed according to the state machine.
+    """
+    task = await get_task(task_id)
+
+    # Validate transition
+    if not is_valid_transition(task.state, transition.new_state):
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invalid transition from {task.state.value} to {transition.new_state.value}"
+        )
+
+    logger.info(
+        "Transitioning task",
+        task_id=task_id[:8],
+        from_state=task.state.value,
+        to_state=transition.new_state.value,
+        reason=transition.reason,
+    )
+
+    # Apply transition
+    task.transition_to(transition.new_state, transition.reason)
+
+    # If approved, execute the task
+    if transition.new_state == TaskState.APPROVED:
+        from services.task_orchestrator import TaskOrchestrator
+        orchestrator = TaskOrchestrator()
+        await orchestrator.execute_task(task)
+
+    return TaskResponse(
+        id=task.id,
+        session_id=task.session_id,
+        type=task.type,
+        state=task.state,
+        created_at=task.created_at,
+        updated_at=task.updated_at,
+        result_available=task.result_ref is not None,
+        error_message=task.error_message,
+    )
+
+
+@router.delete("/{task_id}")
+async def delete_task(task_id: str):
+    """
+    Delete a task.
+
+    Only allowed for tasks in DRAFT, COMPLETED, or EXPIRED state.
+    """
+    task = await get_task(task_id)
+
+    # Check if deletion is allowed
+    if task.state not in [TaskState.DRAFT, TaskState.COMPLETED, TaskState.EXPIRED, TaskState.REJECTED]:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Cannot delete task in {task.state.value} state"
+        )
+
+    logger.info(
+        "Deleting task",
+        task_id=task_id[:8],
+        state=task.state.value,
+    )
+
+    # Remove from session's pending tasks
+    from api.sessions import _sessions
+    session = _sessions.get(task.session_id)
+    if session and task_id in session.pending_tasks:
+        session.pending_tasks.remove(task_id)
+
+    # Delete task
+    del _tasks[task_id]
+
+    return {"status": "deleted", "task_id": task_id}
+
+
+@router.get("/{task_id}/result")
+async def get_task_result(task_id: str, request: Request):
+    """
+    Get task result.
+
+    Result is decrypted using the session's namespace key.
+    Only available for completed tasks.
+    """
+    task = await get_task(task_id)
+
+    if task.state != TaskState.COMPLETED:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Task is in {task.state.value} state, not completed"
+        )
+
+    if not task.result_ref:
+        raise HTTPException(
+            status_code=404,
+            detail="No result available for this task"
+        )
+
+    # Get encryption service to decrypt result
+    encryption = request.app.state.encryption
+
+    # Decrypt result reference
+    if settings.encryption_enabled:
+        result = encryption.decrypt_content(
+            task.result_ref,
+            task.namespace_id,
+        )
+    else:
+        result = task.result_ref
+
+    return {
+        "task_id": task_id,
+        "type": task.type.value,
+        "result": result,
+        "completed_at": task.completed_at.isoformat() if task.completed_at else None,
+    }