feat: voice-service von lehrer nach core verschoben, Pipeline erweitert (voice, BQAS, embedding, night-scheduler)
This commit is contained in:
152
voice-service/models/session.py
Normal file
152
voice-service/models/session.py
Normal file
@@ -0,0 +1,152 @@
|
||||
"""
|
||||
Voice Session Models
|
||||
Transient session management - no persistent storage of audio data
|
||||
|
||||
DSGVO Compliance:
|
||||
- Sessions are RAM-only
|
||||
- Audio chunks are processed and discarded
|
||||
- Transcripts are encrypted before any storage
|
||||
"""
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Optional, List, Dict, Any
|
||||
from pydantic import BaseModel, Field
|
||||
import uuid
|
||||
|
||||
|
||||
class SessionStatus(str, Enum):
|
||||
"""Voice session status."""
|
||||
CREATED = "created"
|
||||
CONNECTED = "connected"
|
||||
LISTENING = "listening"
|
||||
PROCESSING = "processing"
|
||||
RESPONDING = "responding"
|
||||
PAUSED = "paused"
|
||||
CLOSED = "closed"
|
||||
ERROR = "error"
|
||||
|
||||
|
||||
class AudioChunk(BaseModel):
|
||||
"""
|
||||
Audio chunk for streaming.
|
||||
NEVER persisted - only exists in RAM during processing.
|
||||
"""
|
||||
sequence: int = Field(..., description="Chunk sequence number")
|
||||
timestamp_ms: int = Field(..., description="Timestamp in milliseconds")
|
||||
data: bytes = Field(..., description="PCM audio data (Int16, 24kHz)")
|
||||
duration_ms: int = Field(default=80, description="Chunk duration in ms")
|
||||
|
||||
class Config:
|
||||
# Exclude from serialization to prevent accidental logging
|
||||
json_encoders = {
|
||||
bytes: lambda v: f"<audio:{len(v)} bytes>"
|
||||
}
|
||||
|
||||
|
||||
class TranscriptMessage(BaseModel):
|
||||
"""
|
||||
Transcript message - encrypted before storage.
|
||||
"""
|
||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
role: str = Field(..., description="'user' or 'assistant'")
|
||||
content: str = Field(..., description="Transcript text (plaintext in RAM only)")
|
||||
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
||||
confidence: Optional[float] = Field(default=None, description="ASR confidence 0-1")
|
||||
intent: Optional[str] = Field(default=None, description="Detected intent")
|
||||
encrypted_ref: Optional[str] = Field(default=None, description="Encrypted storage reference")
|
||||
|
||||
class Config:
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"id": "msg-123",
|
||||
"role": "user",
|
||||
"content": "Notiz zu Max: heute wiederholt gestoert",
|
||||
"timestamp": "2026-01-26T10:30:00Z",
|
||||
"confidence": 0.95,
|
||||
"intent": "student_observation",
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class VoiceSession(BaseModel):
|
||||
"""
|
||||
Voice session state.
|
||||
Stored in Valkey with TTL, never in persistent storage.
|
||||
"""
|
||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
namespace_id: str = Field(..., description="Teacher namespace ID")
|
||||
key_hash: str = Field(..., description="Hash of client-side encryption key")
|
||||
status: SessionStatus = Field(default=SessionStatus.CREATED)
|
||||
created_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
last_activity: datetime = Field(default_factory=datetime.utcnow)
|
||||
|
||||
# Conversation state (transient)
|
||||
messages: List[TranscriptMessage] = Field(default_factory=list)
|
||||
pending_tasks: List[str] = Field(default_factory=list, description="Task IDs")
|
||||
|
||||
# Audio state (never persisted)
|
||||
audio_chunks_received: int = Field(default=0)
|
||||
audio_chunks_processed: int = Field(default=0)
|
||||
|
||||
# Metadata (no PII)
|
||||
device_type: Optional[str] = Field(default=None, description="'pwa' or 'app'")
|
||||
client_version: Optional[str] = Field(default=None)
|
||||
|
||||
def update_activity(self):
|
||||
"""Update last activity timestamp."""
|
||||
self.last_activity = datetime.utcnow()
|
||||
|
||||
class Config:
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"id": "session-abc123",
|
||||
"namespace_id": "teacher-ns-456",
|
||||
"key_hash": "sha256:abc...",
|
||||
"status": "listening",
|
||||
"created_at": "2026-01-26T10:00:00Z",
|
||||
"last_activity": "2026-01-26T10:30:00Z",
|
||||
"messages": [],
|
||||
"pending_tasks": [],
|
||||
"audio_chunks_received": 150,
|
||||
"audio_chunks_processed": 150,
|
||||
"device_type": "pwa",
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class SessionCreate(BaseModel):
|
||||
"""Request to create a new voice session."""
|
||||
namespace_id: str = Field(..., description="Teacher namespace ID")
|
||||
key_hash: str = Field(..., description="Hash of client-side encryption key")
|
||||
device_type: Optional[str] = Field(default="pwa")
|
||||
client_version: Optional[str] = Field(default=None)
|
||||
|
||||
class Config:
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"namespace_id": "teacher-ns-456",
|
||||
"key_hash": "sha256:abc123def456...",
|
||||
"device_type": "pwa",
|
||||
"client_version": "1.0.0",
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class SessionResponse(BaseModel):
|
||||
"""Response after session creation."""
|
||||
id: str
|
||||
namespace_id: str
|
||||
status: SessionStatus
|
||||
created_at: datetime
|
||||
websocket_url: str = Field(..., description="WebSocket URL for audio streaming")
|
||||
|
||||
class Config:
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"id": "session-abc123",
|
||||
"namespace_id": "teacher-ns-456",
|
||||
"status": "created",
|
||||
"created_at": "2026-01-26T10:00:00Z",
|
||||
"websocket_url": "ws://localhost:8091/ws/voice?session_id=session-abc123",
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user