feat: voice-service von lehrer nach core verschoben, Pipeline erweitert (voice, BQAS, embedding, night-scheduler)
This commit is contained in:
12
voice-service/api/__init__.py
Normal file
12
voice-service/api/__init__.py
Normal file
@@ -0,0 +1,12 @@
|
||||
"""
|
||||
Voice Service API Routes
|
||||
"""
|
||||
from api.sessions import router as sessions_router
|
||||
from api.tasks import router as tasks_router
|
||||
from api.streaming import router as streaming_router
|
||||
|
||||
__all__ = [
|
||||
"sessions_router",
|
||||
"tasks_router",
|
||||
"streaming_router",
|
||||
]
|
||||
365
voice-service/api/bqas.py
Normal file
365
voice-service/api/bqas.py
Normal file
@@ -0,0 +1,365 @@
|
||||
"""
|
||||
BQAS API - Quality Assurance Endpoints
|
||||
"""
|
||||
import structlog
|
||||
import subprocess
|
||||
from fastapi import APIRouter, HTTPException, BackgroundTasks
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, List, Dict, Any
|
||||
from datetime import datetime
|
||||
|
||||
from bqas.runner import get_runner, BQASRunner
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
# Response Models
|
||||
class TestRunResponse(BaseModel):
|
||||
id: int
|
||||
timestamp: str
|
||||
git_commit: Optional[str] = None
|
||||
suite: str
|
||||
golden_score: float
|
||||
synthetic_score: float
|
||||
rag_score: float = 0.0
|
||||
total_tests: int
|
||||
passed_tests: int
|
||||
failed_tests: int
|
||||
duration_seconds: float
|
||||
|
||||
|
||||
class MetricsResponse(BaseModel):
|
||||
total_tests: int
|
||||
passed_tests: int
|
||||
failed_tests: int
|
||||
avg_intent_accuracy: float
|
||||
avg_faithfulness: float
|
||||
avg_relevance: float
|
||||
avg_coherence: float
|
||||
safety_pass_rate: float
|
||||
avg_composite_score: float
|
||||
scores_by_intent: Dict[str, float]
|
||||
failed_test_ids: List[str]
|
||||
|
||||
|
||||
class TrendResponse(BaseModel):
|
||||
dates: List[str]
|
||||
scores: List[float]
|
||||
trend: str # improving, stable, declining, insufficient_data
|
||||
|
||||
|
||||
class LatestMetricsResponse(BaseModel):
|
||||
golden: Optional[MetricsResponse] = None
|
||||
synthetic: Optional[MetricsResponse] = None
|
||||
rag: Optional[MetricsResponse] = None
|
||||
|
||||
|
||||
class RunResultResponse(BaseModel):
|
||||
success: bool
|
||||
message: str
|
||||
metrics: Optional[MetricsResponse] = None
|
||||
run_id: Optional[int] = None
|
||||
|
||||
|
||||
# State tracking for running tests
|
||||
_is_running: Dict[str, bool] = {"golden": False, "synthetic": False, "rag": False}
|
||||
|
||||
|
||||
def _get_git_commit() -> Optional[str]:
|
||||
"""Get current git commit hash."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "rev-parse", "--short", "HEAD"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
if result.returncode == 0:
|
||||
return result.stdout.strip()
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def _metrics_to_response(metrics) -> MetricsResponse:
|
||||
"""Convert BQASMetrics to API response."""
|
||||
return MetricsResponse(
|
||||
total_tests=metrics.total_tests,
|
||||
passed_tests=metrics.passed_tests,
|
||||
failed_tests=metrics.failed_tests,
|
||||
avg_intent_accuracy=round(metrics.avg_intent_accuracy, 2),
|
||||
avg_faithfulness=round(metrics.avg_faithfulness, 2),
|
||||
avg_relevance=round(metrics.avg_relevance, 2),
|
||||
avg_coherence=round(metrics.avg_coherence, 2),
|
||||
safety_pass_rate=round(metrics.safety_pass_rate, 3),
|
||||
avg_composite_score=round(metrics.avg_composite_score, 3),
|
||||
scores_by_intent={k: round(v, 3) for k, v in metrics.scores_by_intent.items()},
|
||||
failed_test_ids=metrics.failed_test_ids,
|
||||
)
|
||||
|
||||
|
||||
def _run_to_response(run) -> TestRunResponse:
|
||||
"""Convert TestRun to API response."""
|
||||
return TestRunResponse(
|
||||
id=run.id,
|
||||
timestamp=run.timestamp.isoformat() + "Z",
|
||||
git_commit=run.git_commit,
|
||||
suite=run.suite,
|
||||
golden_score=round(run.metrics.avg_composite_score, 3) if run.suite == "golden" else 0.0,
|
||||
synthetic_score=round(run.metrics.avg_composite_score, 3) if run.suite == "synthetic" else 0.0,
|
||||
rag_score=round(run.metrics.avg_composite_score, 3) if run.suite == "rag" else 0.0,
|
||||
total_tests=run.metrics.total_tests,
|
||||
passed_tests=run.metrics.passed_tests,
|
||||
failed_tests=run.metrics.failed_tests,
|
||||
duration_seconds=round(run.duration_seconds, 1),
|
||||
)
|
||||
|
||||
|
||||
@router.get("/runs", response_model=Dict[str, Any])
|
||||
async def get_test_runs(limit: int = 20):
|
||||
"""Get recent test runs."""
|
||||
runner = get_runner()
|
||||
runs = runner.get_test_runs(limit)
|
||||
|
||||
return {
|
||||
"runs": [_run_to_response(r) for r in runs],
|
||||
"total": len(runs),
|
||||
}
|
||||
|
||||
|
||||
@router.get("/run/{run_id}", response_model=TestRunResponse)
|
||||
async def get_test_run(run_id: int):
|
||||
"""Get a specific test run."""
|
||||
runner = get_runner()
|
||||
runs = runner.get_test_runs(100)
|
||||
|
||||
for run in runs:
|
||||
if run.id == run_id:
|
||||
return _run_to_response(run)
|
||||
|
||||
raise HTTPException(status_code=404, detail="Test run not found")
|
||||
|
||||
|
||||
@router.get("/trend", response_model=TrendResponse)
|
||||
async def get_trend(days: int = 30):
|
||||
"""Get score trend over time."""
|
||||
runner = get_runner()
|
||||
runs = runner.get_test_runs(100)
|
||||
|
||||
# Filter golden suite runs
|
||||
golden_runs = [r for r in runs if r.suite == "golden"]
|
||||
|
||||
if len(golden_runs) < 3:
|
||||
return TrendResponse(
|
||||
dates=[],
|
||||
scores=[],
|
||||
trend="insufficient_data"
|
||||
)
|
||||
|
||||
# Sort by timestamp
|
||||
golden_runs.sort(key=lambda r: r.timestamp)
|
||||
|
||||
dates = [r.timestamp.isoformat() + "Z" for r in golden_runs]
|
||||
scores = [round(r.metrics.avg_composite_score, 3) for r in golden_runs]
|
||||
|
||||
# Calculate trend
|
||||
if len(scores) >= 6:
|
||||
recent_avg = sum(scores[-3:]) / 3
|
||||
old_avg = sum(scores[:3]) / 3
|
||||
diff = recent_avg - old_avg
|
||||
|
||||
if diff > 0.1:
|
||||
trend = "improving"
|
||||
elif diff < -0.1:
|
||||
trend = "declining"
|
||||
else:
|
||||
trend = "stable"
|
||||
else:
|
||||
trend = "stable"
|
||||
|
||||
return TrendResponse(dates=dates, scores=scores, trend=trend)
|
||||
|
||||
|
||||
@router.get("/latest-metrics", response_model=LatestMetricsResponse)
|
||||
async def get_latest_metrics():
|
||||
"""Get latest metrics from all test suites."""
|
||||
runner = get_runner()
|
||||
latest = runner.get_latest_metrics()
|
||||
|
||||
return LatestMetricsResponse(
|
||||
golden=_metrics_to_response(latest["golden"]) if latest["golden"] else None,
|
||||
synthetic=_metrics_to_response(latest["synthetic"]) if latest["synthetic"] else None,
|
||||
rag=_metrics_to_response(latest["rag"]) if latest["rag"] else None,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/run/golden", response_model=RunResultResponse)
|
||||
async def run_golden_suite(background_tasks: BackgroundTasks):
|
||||
"""Run the golden test suite."""
|
||||
if _is_running["golden"]:
|
||||
return RunResultResponse(
|
||||
success=False,
|
||||
message="Golden suite is already running"
|
||||
)
|
||||
|
||||
_is_running["golden"] = True
|
||||
logger.info("Starting Golden Suite via API")
|
||||
|
||||
try:
|
||||
runner = get_runner()
|
||||
git_commit = _get_git_commit()
|
||||
|
||||
# Run the suite
|
||||
run = await runner.run_golden_suite(git_commit=git_commit)
|
||||
|
||||
metrics = _metrics_to_response(run.metrics)
|
||||
|
||||
return RunResultResponse(
|
||||
success=True,
|
||||
message=f"Golden suite completed: {run.metrics.passed_tests}/{run.metrics.total_tests} passed ({run.metrics.avg_composite_score:.2f} avg score)",
|
||||
metrics=metrics,
|
||||
run_id=run.id,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Golden suite failed", error=str(e))
|
||||
return RunResultResponse(
|
||||
success=False,
|
||||
message=f"Golden suite failed: {str(e)}"
|
||||
)
|
||||
|
||||
finally:
|
||||
_is_running["golden"] = False
|
||||
|
||||
|
||||
@router.post("/run/synthetic", response_model=RunResultResponse)
|
||||
async def run_synthetic_suite(background_tasks: BackgroundTasks):
|
||||
"""Run the synthetic test suite."""
|
||||
if _is_running["synthetic"]:
|
||||
return RunResultResponse(
|
||||
success=False,
|
||||
message="Synthetic suite is already running"
|
||||
)
|
||||
|
||||
_is_running["synthetic"] = True
|
||||
logger.info("Starting Synthetic Suite via API")
|
||||
|
||||
try:
|
||||
runner = get_runner()
|
||||
git_commit = _get_git_commit()
|
||||
|
||||
# Run the suite
|
||||
run = await runner.run_synthetic_suite(git_commit=git_commit)
|
||||
|
||||
metrics = _metrics_to_response(run.metrics)
|
||||
|
||||
return RunResultResponse(
|
||||
success=True,
|
||||
message=f"Synthetic suite completed: {run.metrics.passed_tests}/{run.metrics.total_tests} passed ({run.metrics.avg_composite_score:.2f} avg score)",
|
||||
metrics=metrics,
|
||||
run_id=run.id,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Synthetic suite failed", error=str(e))
|
||||
return RunResultResponse(
|
||||
success=False,
|
||||
message=f"Synthetic suite failed: {str(e)}"
|
||||
)
|
||||
|
||||
finally:
|
||||
_is_running["synthetic"] = False
|
||||
|
||||
|
||||
@router.post("/run/rag", response_model=RunResultResponse)
|
||||
async def run_rag_suite(background_tasks: BackgroundTasks):
|
||||
"""Run the RAG/Correction test suite."""
|
||||
if _is_running["rag"]:
|
||||
return RunResultResponse(
|
||||
success=False,
|
||||
message="RAG suite is already running"
|
||||
)
|
||||
|
||||
_is_running["rag"] = True
|
||||
logger.info("Starting RAG Suite via API")
|
||||
|
||||
try:
|
||||
runner = get_runner()
|
||||
git_commit = _get_git_commit()
|
||||
|
||||
# Run the suite
|
||||
run = await runner.run_rag_suite(git_commit=git_commit)
|
||||
|
||||
metrics = _metrics_to_response(run.metrics)
|
||||
|
||||
return RunResultResponse(
|
||||
success=True,
|
||||
message=f"RAG suite completed: {run.metrics.passed_tests}/{run.metrics.total_tests} passed ({run.metrics.avg_composite_score:.2f} avg score)",
|
||||
metrics=metrics,
|
||||
run_id=run.id,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("RAG suite failed", error=str(e))
|
||||
return RunResultResponse(
|
||||
success=False,
|
||||
message=f"RAG suite failed: {str(e)}"
|
||||
)
|
||||
|
||||
finally:
|
||||
_is_running["rag"] = False
|
||||
|
||||
|
||||
@router.get("/regression-check")
|
||||
async def check_regression(threshold: float = 0.1):
|
||||
"""Check for regression in recent scores."""
|
||||
runner = get_runner()
|
||||
runs = runner.get_test_runs(20)
|
||||
|
||||
golden_runs = [r for r in runs if r.suite == "golden"]
|
||||
|
||||
if len(golden_runs) < 2:
|
||||
return {
|
||||
"is_regression": False,
|
||||
"message": "Not enough data for regression check",
|
||||
"current_score": None,
|
||||
"previous_avg": None,
|
||||
"delta": None,
|
||||
}
|
||||
|
||||
# Sort by timestamp (newest first)
|
||||
golden_runs.sort(key=lambda r: r.timestamp, reverse=True)
|
||||
|
||||
current_score = golden_runs[0].metrics.avg_composite_score if golden_runs else 0
|
||||
previous_scores = [r.metrics.avg_composite_score for r in golden_runs[1:6]]
|
||||
previous_avg = sum(previous_scores) / len(previous_scores) if previous_scores else 0
|
||||
delta = previous_avg - current_score
|
||||
|
||||
is_regression = delta > threshold
|
||||
|
||||
return {
|
||||
"is_regression": is_regression,
|
||||
"message": f"Regression detected: score dropped by {delta:.2f}" if is_regression else "No regression detected",
|
||||
"current_score": round(current_score, 3),
|
||||
"previous_avg": round(previous_avg, 3),
|
||||
"delta": round(delta, 3),
|
||||
"threshold": threshold,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/health")
|
||||
async def bqas_health():
|
||||
"""BQAS health check."""
|
||||
runner = get_runner()
|
||||
health = await runner.health_check()
|
||||
|
||||
return {
|
||||
"status": "healthy",
|
||||
"judge_available": health["judge_available"],
|
||||
"rag_judge_available": health["rag_judge_available"],
|
||||
"test_runs_count": health["test_runs_count"],
|
||||
"is_running": _is_running,
|
||||
"config": health["config"],
|
||||
}
|
||||
220
voice-service/api/sessions.py
Normal file
220
voice-service/api/sessions.py
Normal file
@@ -0,0 +1,220 @@
|
||||
"""
|
||||
Session Management API
|
||||
Handles voice session lifecycle
|
||||
|
||||
Endpoints:
|
||||
- POST /api/v1/sessions # Session erstellen
|
||||
- GET /api/v1/sessions/{id} # Session Status
|
||||
- DELETE /api/v1/sessions/{id} # Session beenden
|
||||
- GET /api/v1/sessions/{id}/tasks # Pending Tasks
|
||||
"""
|
||||
import structlog
|
||||
from fastapi import APIRouter, HTTPException, Request, Depends
|
||||
from typing import List, Optional
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from config import settings
|
||||
from models.session import (
|
||||
VoiceSession,
|
||||
SessionCreate,
|
||||
SessionResponse,
|
||||
SessionStatus,
|
||||
)
|
||||
from models.task import TaskResponse, TaskState
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
# In-memory session store (will be replaced with Valkey in production)
|
||||
# This is transient - sessions are never persisted to disk
|
||||
_sessions: dict[str, VoiceSession] = {}
|
||||
|
||||
|
||||
async def get_session(session_id: str) -> VoiceSession:
|
||||
"""Get session by ID or raise 404."""
|
||||
session = _sessions.get(session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail="Session not found")
|
||||
return session
|
||||
|
||||
|
||||
@router.post("", response_model=SessionResponse)
|
||||
async def create_session(request: Request, session_data: SessionCreate):
|
||||
"""
|
||||
Create a new voice session.
|
||||
|
||||
Returns a session ID and WebSocket URL for audio streaming.
|
||||
The client must connect to the WebSocket within 30 seconds.
|
||||
"""
|
||||
logger.info(
|
||||
"Creating voice session",
|
||||
namespace_id=session_data.namespace_id[:8] + "...",
|
||||
device_type=session_data.device_type,
|
||||
)
|
||||
|
||||
# Verify namespace key hash
|
||||
orchestrator = request.app.state.orchestrator
|
||||
encryption = request.app.state.encryption
|
||||
|
||||
if settings.encryption_enabled:
|
||||
if not encryption.verify_key_hash(session_data.key_hash):
|
||||
logger.warning("Invalid key hash", namespace_id=session_data.namespace_id[:8])
|
||||
raise HTTPException(status_code=401, detail="Invalid encryption key hash")
|
||||
|
||||
# Check rate limits
|
||||
namespace_sessions = [
|
||||
s for s in _sessions.values()
|
||||
if s.namespace_id == session_data.namespace_id
|
||||
and s.status not in [SessionStatus.CLOSED, SessionStatus.ERROR]
|
||||
]
|
||||
if len(namespace_sessions) >= settings.max_sessions_per_user:
|
||||
raise HTTPException(
|
||||
status_code=429,
|
||||
detail=f"Maximum {settings.max_sessions_per_user} concurrent sessions allowed"
|
||||
)
|
||||
|
||||
# Create session
|
||||
session = VoiceSession(
|
||||
namespace_id=session_data.namespace_id,
|
||||
key_hash=session_data.key_hash,
|
||||
device_type=session_data.device_type,
|
||||
client_version=session_data.client_version,
|
||||
)
|
||||
|
||||
# Store session (in RAM only)
|
||||
_sessions[session.id] = session
|
||||
|
||||
logger.info(
|
||||
"Voice session created",
|
||||
session_id=session.id[:8],
|
||||
namespace_id=session_data.namespace_id[:8],
|
||||
)
|
||||
|
||||
# Build WebSocket URL
|
||||
# Use X-Forwarded-Proto if behind a reverse proxy (nginx), otherwise use request scheme
|
||||
forwarded_proto = request.headers.get("x-forwarded-proto", request.url.scheme)
|
||||
host = request.headers.get("host", f"localhost:{settings.port}")
|
||||
ws_scheme = "wss" if forwarded_proto == "https" else "ws"
|
||||
ws_url = f"{ws_scheme}://{host}/ws/voice?session_id={session.id}"
|
||||
|
||||
return SessionResponse(
|
||||
id=session.id,
|
||||
namespace_id=session.namespace_id,
|
||||
status=session.status,
|
||||
created_at=session.created_at,
|
||||
websocket_url=ws_url,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{session_id}", response_model=SessionResponse)
|
||||
async def get_session_status(session_id: str, request: Request):
|
||||
"""
|
||||
Get session status.
|
||||
|
||||
Returns current session state including message count and pending tasks.
|
||||
"""
|
||||
session = await get_session(session_id)
|
||||
|
||||
# Check if session expired
|
||||
session_age = datetime.utcnow() - session.created_at
|
||||
if session_age > timedelta(hours=settings.session_ttl_hours):
|
||||
session.status = SessionStatus.CLOSED
|
||||
logger.info("Session expired", session_id=session_id[:8])
|
||||
|
||||
# Build WebSocket URL
|
||||
# Use X-Forwarded-Proto if behind a reverse proxy (nginx), otherwise use request scheme
|
||||
forwarded_proto = request.headers.get("x-forwarded-proto", request.url.scheme)
|
||||
host = request.headers.get("host", f"localhost:{settings.port}")
|
||||
ws_scheme = "wss" if forwarded_proto == "https" else "ws"
|
||||
ws_url = f"{ws_scheme}://{host}/ws/voice?session_id={session.id}"
|
||||
|
||||
return SessionResponse(
|
||||
id=session.id,
|
||||
namespace_id=session.namespace_id,
|
||||
status=session.status,
|
||||
created_at=session.created_at,
|
||||
websocket_url=ws_url,
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/{session_id}")
|
||||
async def close_session(session_id: str):
|
||||
"""
|
||||
Close and delete a session.
|
||||
|
||||
All transient data (messages, audio state) is discarded.
|
||||
This is the expected cleanup path.
|
||||
"""
|
||||
session = await get_session(session_id)
|
||||
|
||||
logger.info(
|
||||
"Closing session",
|
||||
session_id=session_id[:8],
|
||||
messages_count=len(session.messages),
|
||||
tasks_count=len(session.pending_tasks),
|
||||
)
|
||||
|
||||
# Mark as closed
|
||||
session.status = SessionStatus.CLOSED
|
||||
|
||||
# Remove from active sessions
|
||||
del _sessions[session_id]
|
||||
|
||||
return {"status": "closed", "session_id": session_id}
|
||||
|
||||
|
||||
@router.get("/{session_id}/tasks", response_model=List[TaskResponse])
|
||||
async def get_session_tasks(session_id: str, request: Request, state: Optional[TaskState] = None):
|
||||
"""
|
||||
Get tasks for a session.
|
||||
|
||||
Optionally filter by task state.
|
||||
"""
|
||||
session = await get_session(session_id)
|
||||
|
||||
# Get tasks from the in-memory task store
|
||||
from api.tasks import _tasks
|
||||
|
||||
# Filter tasks by session_id and optionally by state
|
||||
tasks = [
|
||||
task for task in _tasks.values()
|
||||
if task.session_id == session_id
|
||||
and (state is None or task.state == state)
|
||||
]
|
||||
|
||||
return [
|
||||
TaskResponse(
|
||||
id=task.id,
|
||||
session_id=task.session_id,
|
||||
type=task.type,
|
||||
state=task.state,
|
||||
created_at=task.created_at,
|
||||
updated_at=task.updated_at,
|
||||
result_available=task.result_ref is not None,
|
||||
error_message=task.error_message,
|
||||
)
|
||||
for task in tasks
|
||||
]
|
||||
|
||||
|
||||
@router.get("/{session_id}/stats")
|
||||
async def get_session_stats(session_id: str):
|
||||
"""
|
||||
Get session statistics (for debugging/monitoring).
|
||||
|
||||
No PII is returned - only aggregate counts.
|
||||
"""
|
||||
session = await get_session(session_id)
|
||||
|
||||
return {
|
||||
"session_id_truncated": session_id[:8],
|
||||
"status": session.status.value,
|
||||
"age_seconds": (datetime.utcnow() - session.created_at).total_seconds(),
|
||||
"message_count": len(session.messages),
|
||||
"pending_tasks_count": len(session.pending_tasks),
|
||||
"audio_chunks_received": session.audio_chunks_received,
|
||||
"audio_chunks_processed": session.audio_chunks_processed,
|
||||
"device_type": session.device_type,
|
||||
}
|
||||
325
voice-service/api/streaming.py
Normal file
325
voice-service/api/streaming.py
Normal file
@@ -0,0 +1,325 @@
|
||||
"""
|
||||
WebSocket Streaming API
|
||||
Handles real-time audio streaming for voice interface
|
||||
|
||||
WebSocket Protocol:
|
||||
- Binary frames: Int16 PCM Audio (24kHz, 80ms frames)
|
||||
- JSON frames: {"type": "config|end_turn|interrupt"}
|
||||
|
||||
Server -> Client:
|
||||
- Binary: Audio Response (base64)
|
||||
- JSON: {"type": "transcript|intent|status|error"}
|
||||
"""
|
||||
import structlog
|
||||
import asyncio
|
||||
import json
|
||||
import base64
|
||||
from fastapi import APIRouter, WebSocket, WebSocketDisconnect, Query
|
||||
from typing import Optional
|
||||
from datetime import datetime
|
||||
|
||||
from config import settings
|
||||
from models.session import SessionStatus, TranscriptMessage, AudioChunk
|
||||
from models.task import TaskCreate, TaskType
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
# Active WebSocket connections (transient)
|
||||
active_connections: dict[str, WebSocket] = {}
|
||||
|
||||
|
||||
@router.websocket("/ws/voice")
|
||||
async def voice_websocket(
|
||||
websocket: WebSocket,
|
||||
session_id: str = Query(..., description="Session ID from /api/v1/sessions"),
|
||||
namespace: Optional[str] = Query(None, description="Namespace ID"),
|
||||
key_hash: Optional[str] = Query(None, description="Encryption key hash"),
|
||||
):
|
||||
"""
|
||||
WebSocket endpoint for voice streaming.
|
||||
|
||||
Protocol:
|
||||
1. Client connects with session_id
|
||||
2. Client sends binary audio frames (Int16 PCM, 24kHz)
|
||||
3. Server responds with transcripts, intents, and audio
|
||||
|
||||
Audio Processing:
|
||||
- Chunks are processed in RAM only
|
||||
- No audio is ever persisted
|
||||
- Transcripts are encrypted before any storage
|
||||
"""
|
||||
# Get session
|
||||
from api.sessions import _sessions
|
||||
session = _sessions.get(session_id)
|
||||
|
||||
if not session:
|
||||
await websocket.close(code=4004, reason="Session not found")
|
||||
return
|
||||
|
||||
# Accept connection
|
||||
await websocket.accept()
|
||||
|
||||
logger.info(
|
||||
"WebSocket connected",
|
||||
session_id=session_id[:8],
|
||||
namespace_id=session.namespace_id[:8],
|
||||
)
|
||||
|
||||
# Update session status
|
||||
session.status = SessionStatus.CONNECTED
|
||||
active_connections[session_id] = websocket
|
||||
|
||||
# Audio buffer for accumulating chunks
|
||||
audio_buffer = bytearray()
|
||||
chunk_sequence = 0
|
||||
|
||||
try:
|
||||
# Send initial status
|
||||
await websocket.send_json({
|
||||
"type": "status",
|
||||
"status": "connected",
|
||||
"session_id": session_id,
|
||||
"audio_config": {
|
||||
"sample_rate": settings.audio_sample_rate,
|
||||
"frame_size_ms": settings.audio_frame_size_ms,
|
||||
"encoding": "pcm_s16le",
|
||||
},
|
||||
})
|
||||
|
||||
while True:
|
||||
# Receive message (binary or text)
|
||||
message = await websocket.receive()
|
||||
|
||||
if "bytes" in message:
|
||||
# Binary audio data
|
||||
audio_data = message["bytes"]
|
||||
session.audio_chunks_received += 1
|
||||
|
||||
# Create audio chunk (transient - never persisted)
|
||||
chunk = AudioChunk(
|
||||
sequence=chunk_sequence,
|
||||
timestamp_ms=int((datetime.utcnow().timestamp() * 1000) % (24 * 60 * 60 * 1000)),
|
||||
data=audio_data,
|
||||
)
|
||||
chunk_sequence += 1
|
||||
|
||||
# Accumulate in buffer
|
||||
audio_buffer.extend(audio_data)
|
||||
|
||||
# Process when we have enough data (e.g., 500ms worth)
|
||||
samples_needed = settings.audio_sample_rate // 2 # 500ms
|
||||
bytes_needed = samples_needed * 2 # 16-bit = 2 bytes
|
||||
|
||||
if len(audio_buffer) >= bytes_needed:
|
||||
session.status = SessionStatus.PROCESSING
|
||||
|
||||
# Process audio chunk
|
||||
await process_audio_chunk(
|
||||
websocket,
|
||||
session,
|
||||
bytes(audio_buffer[:bytes_needed]),
|
||||
)
|
||||
|
||||
# Remove processed data
|
||||
audio_buffer = audio_buffer[bytes_needed:]
|
||||
session.audio_chunks_processed += 1
|
||||
|
||||
elif "text" in message:
|
||||
# JSON control message
|
||||
try:
|
||||
data = json.loads(message["text"])
|
||||
msg_type = data.get("type")
|
||||
|
||||
if msg_type == "config":
|
||||
# Client configuration
|
||||
logger.debug("Received config", config=data)
|
||||
|
||||
elif msg_type == "end_turn":
|
||||
# User finished speaking
|
||||
session.status = SessionStatus.PROCESSING
|
||||
|
||||
# Process remaining audio buffer
|
||||
if audio_buffer:
|
||||
await process_audio_chunk(
|
||||
websocket,
|
||||
session,
|
||||
bytes(audio_buffer),
|
||||
)
|
||||
audio_buffer.clear()
|
||||
|
||||
# Signal end of user turn
|
||||
await websocket.send_json({
|
||||
"type": "status",
|
||||
"status": "processing",
|
||||
})
|
||||
|
||||
elif msg_type == "interrupt":
|
||||
# User interrupted response
|
||||
session.status = SessionStatus.LISTENING
|
||||
await websocket.send_json({
|
||||
"type": "status",
|
||||
"status": "interrupted",
|
||||
})
|
||||
|
||||
elif msg_type == "ping":
|
||||
# Keep-alive ping
|
||||
await websocket.send_json({"type": "pong"})
|
||||
|
||||
except json.JSONDecodeError:
|
||||
logger.warning("Invalid JSON message", message=message["text"][:100])
|
||||
|
||||
# Update activity
|
||||
session.update_activity()
|
||||
|
||||
except WebSocketDisconnect:
|
||||
logger.info("WebSocket disconnected", session_id=session_id[:8])
|
||||
except Exception as e:
|
||||
logger.error("WebSocket error", session_id=session_id[:8], error=str(e))
|
||||
session.status = SessionStatus.ERROR
|
||||
finally:
|
||||
# Cleanup
|
||||
session.status = SessionStatus.CLOSED
|
||||
if session_id in active_connections:
|
||||
del active_connections[session_id]
|
||||
|
||||
|
||||
async def process_audio_chunk(
|
||||
websocket: WebSocket,
|
||||
session,
|
||||
audio_data: bytes,
|
||||
):
|
||||
"""
|
||||
Process an audio chunk through the voice pipeline.
|
||||
|
||||
1. PersonaPlex/Ollama for transcription + understanding
|
||||
2. Intent detection
|
||||
3. Task creation if needed
|
||||
4. Response generation
|
||||
5. Audio synthesis (if PersonaPlex)
|
||||
"""
|
||||
from services.task_orchestrator import TaskOrchestrator
|
||||
from services.intent_router import IntentRouter
|
||||
|
||||
orchestrator = TaskOrchestrator()
|
||||
intent_router = IntentRouter()
|
||||
|
||||
try:
|
||||
# Transcribe audio
|
||||
if settings.use_personaplex:
|
||||
# Use PersonaPlex for transcription
|
||||
from services.personaplex_client import PersonaPlexClient
|
||||
client = PersonaPlexClient()
|
||||
transcript = await client.transcribe(audio_data)
|
||||
else:
|
||||
# Use Ollama fallback (text-only, requires separate ASR)
|
||||
# For MVP, we'll simulate with a placeholder
|
||||
# In production, integrate with Whisper or similar
|
||||
from services.fallback_llm_client import FallbackLLMClient
|
||||
llm_client = FallbackLLMClient()
|
||||
transcript = await llm_client.process_audio_description(audio_data)
|
||||
|
||||
if not transcript or not transcript.strip():
|
||||
return
|
||||
|
||||
# Send transcript to client
|
||||
await websocket.send_json({
|
||||
"type": "transcript",
|
||||
"text": transcript,
|
||||
"final": True,
|
||||
"confidence": 0.95,
|
||||
})
|
||||
|
||||
# Add to session messages
|
||||
user_message = TranscriptMessage(
|
||||
role="user",
|
||||
content=transcript,
|
||||
confidence=0.95,
|
||||
)
|
||||
session.messages.append(user_message)
|
||||
|
||||
# Detect intent
|
||||
intent = await intent_router.detect_intent(transcript, session.messages)
|
||||
|
||||
if intent:
|
||||
await websocket.send_json({
|
||||
"type": "intent",
|
||||
"intent": intent.type.value,
|
||||
"confidence": intent.confidence,
|
||||
"parameters": intent.parameters,
|
||||
})
|
||||
|
||||
# Create task if intent is actionable
|
||||
if intent.is_actionable:
|
||||
task = await orchestrator.create_task_from_intent(
|
||||
session_id=session.id,
|
||||
namespace_id=session.namespace_id,
|
||||
intent=intent,
|
||||
transcript=transcript,
|
||||
)
|
||||
|
||||
await websocket.send_json({
|
||||
"type": "task_created",
|
||||
"task_id": task.id,
|
||||
"task_type": task.type.value,
|
||||
"state": task.state.value,
|
||||
})
|
||||
|
||||
# Generate response
|
||||
response_text = await orchestrator.generate_response(
|
||||
session_messages=session.messages,
|
||||
intent=intent,
|
||||
namespace_id=session.namespace_id,
|
||||
)
|
||||
|
||||
# Send text response
|
||||
await websocket.send_json({
|
||||
"type": "response",
|
||||
"text": response_text,
|
||||
})
|
||||
|
||||
# Add to session messages
|
||||
assistant_message = TranscriptMessage(
|
||||
role="assistant",
|
||||
content=response_text,
|
||||
)
|
||||
session.messages.append(assistant_message)
|
||||
|
||||
# Generate audio response if PersonaPlex is available
|
||||
if settings.use_personaplex:
|
||||
from services.personaplex_client import PersonaPlexClient
|
||||
client = PersonaPlexClient()
|
||||
audio_response = await client.synthesize(response_text)
|
||||
|
||||
if audio_response:
|
||||
# Send audio in chunks
|
||||
chunk_size = settings.audio_frame_samples * 2 # 16-bit
|
||||
for i in range(0, len(audio_response), chunk_size):
|
||||
chunk = audio_response[i:i + chunk_size]
|
||||
await websocket.send_bytes(chunk)
|
||||
|
||||
# Update session status
|
||||
session.status = SessionStatus.LISTENING
|
||||
|
||||
await websocket.send_json({
|
||||
"type": "status",
|
||||
"status": "listening",
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Audio processing error", error=str(e))
|
||||
await websocket.send_json({
|
||||
"type": "error",
|
||||
"message": "Failed to process audio",
|
||||
"code": "processing_error",
|
||||
})
|
||||
|
||||
|
||||
@router.get("/ws/stats")
|
||||
async def get_websocket_stats():
|
||||
"""Get WebSocket connection statistics."""
|
||||
return {
|
||||
"active_connections": len(active_connections),
|
||||
"connection_ids": [cid[:8] for cid in active_connections.keys()],
|
||||
}
|
||||
262
voice-service/api/tasks.py
Normal file
262
voice-service/api/tasks.py
Normal file
@@ -0,0 +1,262 @@
|
||||
"""
|
||||
Task Management API
|
||||
Handles TaskOrchestrator task lifecycle
|
||||
|
||||
Endpoints:
|
||||
- POST /api/v1/tasks # Task erstellen
|
||||
- GET /api/v1/tasks/{id} # Task Status
|
||||
- PUT /api/v1/tasks/{id}/transition # Status aendern
|
||||
- DELETE /api/v1/tasks/{id} # Task loeschen
|
||||
"""
|
||||
import structlog
|
||||
from fastapi import APIRouter, HTTPException, Request
|
||||
from typing import Optional
|
||||
from datetime import datetime
|
||||
|
||||
from config import settings
|
||||
from models.task import (
|
||||
Task,
|
||||
TaskCreate,
|
||||
TaskResponse,
|
||||
TaskTransition,
|
||||
TaskState,
|
||||
TaskType,
|
||||
is_valid_transition,
|
||||
)
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
# In-memory task store (will be replaced with Valkey in production)
|
||||
_tasks: dict[str, Task] = {}
|
||||
|
||||
|
||||
async def get_task(task_id: str) -> Task:
|
||||
"""Get task by ID or raise 404."""
|
||||
task = _tasks.get(task_id)
|
||||
if not task:
|
||||
raise HTTPException(status_code=404, detail="Task not found")
|
||||
return task
|
||||
|
||||
|
||||
@router.post("", response_model=TaskResponse)
|
||||
async def create_task(request: Request, task_data: TaskCreate):
|
||||
"""
|
||||
Create a new task.
|
||||
|
||||
The task will be queued for processing by TaskOrchestrator.
|
||||
Intent text is encrypted before storage.
|
||||
"""
|
||||
logger.info(
|
||||
"Creating task",
|
||||
session_id=task_data.session_id[:8],
|
||||
task_type=task_data.type.value,
|
||||
)
|
||||
|
||||
# Get encryption service
|
||||
encryption = request.app.state.encryption
|
||||
|
||||
# Get session to validate and get namespace
|
||||
from api.sessions import _sessions
|
||||
session = _sessions.get(task_data.session_id)
|
||||
if not session:
|
||||
raise HTTPException(status_code=404, detail="Session not found")
|
||||
|
||||
# Encrypt intent text if encryption is enabled
|
||||
encrypted_intent = task_data.intent_text
|
||||
if settings.encryption_enabled:
|
||||
encrypted_intent = encryption.encrypt_content(
|
||||
task_data.intent_text,
|
||||
session.namespace_id,
|
||||
)
|
||||
|
||||
# Encrypt any PII in parameters
|
||||
encrypted_params = {}
|
||||
pii_fields = ["student_name", "class_name", "parent_name", "content"]
|
||||
for key, value in task_data.parameters.items():
|
||||
if key in pii_fields and settings.encryption_enabled:
|
||||
encrypted_params[key] = encryption.encrypt_content(
|
||||
str(value),
|
||||
session.namespace_id,
|
||||
)
|
||||
else:
|
||||
encrypted_params[key] = value
|
||||
|
||||
# Create task
|
||||
task = Task(
|
||||
session_id=task_data.session_id,
|
||||
namespace_id=session.namespace_id,
|
||||
type=task_data.type,
|
||||
intent_text=encrypted_intent,
|
||||
parameters=encrypted_params,
|
||||
)
|
||||
|
||||
# Store task
|
||||
_tasks[task.id] = task
|
||||
|
||||
# Add to session's pending tasks
|
||||
session.pending_tasks.append(task.id)
|
||||
|
||||
# Queue task for processing
|
||||
orchestrator = request.app.state.orchestrator
|
||||
await orchestrator.queue_task(task)
|
||||
|
||||
logger.info(
|
||||
"Task created",
|
||||
task_id=task.id[:8],
|
||||
session_id=task_data.session_id[:8],
|
||||
task_type=task_data.type.value,
|
||||
)
|
||||
|
||||
return TaskResponse(
|
||||
id=task.id,
|
||||
session_id=task.session_id,
|
||||
type=task.type,
|
||||
state=task.state,
|
||||
created_at=task.created_at,
|
||||
updated_at=task.updated_at,
|
||||
result_available=False,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{task_id}", response_model=TaskResponse)
|
||||
async def get_task_status(task_id: str):
|
||||
"""
|
||||
Get task status.
|
||||
|
||||
Returns current state and whether results are available.
|
||||
"""
|
||||
task = await get_task(task_id)
|
||||
|
||||
return TaskResponse(
|
||||
id=task.id,
|
||||
session_id=task.session_id,
|
||||
type=task.type,
|
||||
state=task.state,
|
||||
created_at=task.created_at,
|
||||
updated_at=task.updated_at,
|
||||
result_available=task.result_ref is not None,
|
||||
error_message=task.error_message,
|
||||
)
|
||||
|
||||
|
||||
@router.put("/{task_id}/transition", response_model=TaskResponse)
|
||||
async def transition_task(task_id: str, transition: TaskTransition):
|
||||
"""
|
||||
Transition task to a new state.
|
||||
|
||||
Only valid transitions are allowed according to the state machine.
|
||||
"""
|
||||
task = await get_task(task_id)
|
||||
|
||||
# Validate transition
|
||||
if not is_valid_transition(task.state, transition.new_state):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Invalid transition from {task.state.value} to {transition.new_state.value}"
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Transitioning task",
|
||||
task_id=task_id[:8],
|
||||
from_state=task.state.value,
|
||||
to_state=transition.new_state.value,
|
||||
reason=transition.reason,
|
||||
)
|
||||
|
||||
# Apply transition
|
||||
task.transition_to(transition.new_state, transition.reason)
|
||||
|
||||
# If approved, execute the task
|
||||
if transition.new_state == TaskState.APPROVED:
|
||||
from services.task_orchestrator import TaskOrchestrator
|
||||
orchestrator = TaskOrchestrator()
|
||||
await orchestrator.execute_task(task)
|
||||
|
||||
return TaskResponse(
|
||||
id=task.id,
|
||||
session_id=task.session_id,
|
||||
type=task.type,
|
||||
state=task.state,
|
||||
created_at=task.created_at,
|
||||
updated_at=task.updated_at,
|
||||
result_available=task.result_ref is not None,
|
||||
error_message=task.error_message,
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/{task_id}")
|
||||
async def delete_task(task_id: str):
|
||||
"""
|
||||
Delete a task.
|
||||
|
||||
Only allowed for tasks in DRAFT, COMPLETED, or EXPIRED state.
|
||||
"""
|
||||
task = await get_task(task_id)
|
||||
|
||||
# Check if deletion is allowed
|
||||
if task.state not in [TaskState.DRAFT, TaskState.COMPLETED, TaskState.EXPIRED, TaskState.REJECTED]:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Cannot delete task in {task.state.value} state"
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Deleting task",
|
||||
task_id=task_id[:8],
|
||||
state=task.state.value,
|
||||
)
|
||||
|
||||
# Remove from session's pending tasks
|
||||
from api.sessions import _sessions
|
||||
session = _sessions.get(task.session_id)
|
||||
if session and task_id in session.pending_tasks:
|
||||
session.pending_tasks.remove(task_id)
|
||||
|
||||
# Delete task
|
||||
del _tasks[task_id]
|
||||
|
||||
return {"status": "deleted", "task_id": task_id}
|
||||
|
||||
|
||||
@router.get("/{task_id}/result")
|
||||
async def get_task_result(task_id: str, request: Request):
|
||||
"""
|
||||
Get task result.
|
||||
|
||||
Result is decrypted using the session's namespace key.
|
||||
Only available for completed tasks.
|
||||
"""
|
||||
task = await get_task(task_id)
|
||||
|
||||
if task.state != TaskState.COMPLETED:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Task is in {task.state.value} state, not completed"
|
||||
)
|
||||
|
||||
if not task.result_ref:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="No result available for this task"
|
||||
)
|
||||
|
||||
# Get encryption service to decrypt result
|
||||
encryption = request.app.state.encryption
|
||||
|
||||
# Decrypt result reference
|
||||
if settings.encryption_enabled:
|
||||
result = encryption.decrypt_content(
|
||||
task.result_ref,
|
||||
task.namespace_id,
|
||||
)
|
||||
else:
|
||||
result = task.result_ref
|
||||
|
||||
return {
|
||||
"task_id": task_id,
|
||||
"type": task.type.value,
|
||||
"result": result,
|
||||
"completed_at": task.completed_at.isoformat() if task.completed_at else None,
|
||||
}
|
||||
Reference in New Issue
Block a user