feat: voice-service von lehrer nach core verschoben, Pipeline erweitert (voice, BQAS, embedding, night-scheduler)
This commit is contained in:
40
voice-service/models/__init__.py
Normal file
40
voice-service/models/__init__.py
Normal file
@@ -0,0 +1,40 @@
|
||||
"""
|
||||
Voice Service Models
|
||||
Pydantic models for sessions, tasks, and audit logging
|
||||
"""
|
||||
from models.session import (
|
||||
VoiceSession,
|
||||
SessionCreate,
|
||||
SessionResponse,
|
||||
AudioChunk,
|
||||
TranscriptMessage,
|
||||
)
|
||||
from models.task import (
|
||||
TaskState,
|
||||
Task,
|
||||
TaskCreate,
|
||||
TaskResponse,
|
||||
TaskTransition,
|
||||
)
|
||||
from models.audit import (
|
||||
AuditEntry,
|
||||
AuditCreate,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
# Session models
|
||||
"VoiceSession",
|
||||
"SessionCreate",
|
||||
"SessionResponse",
|
||||
"AudioChunk",
|
||||
"TranscriptMessage",
|
||||
# Task models
|
||||
"TaskState",
|
||||
"Task",
|
||||
"TaskCreate",
|
||||
"TaskResponse",
|
||||
"TaskTransition",
|
||||
# Audit models
|
||||
"AuditEntry",
|
||||
"AuditCreate",
|
||||
]
|
||||
149
voice-service/models/audit.py
Normal file
149
voice-service/models/audit.py
Normal file
@@ -0,0 +1,149 @@
|
||||
"""
|
||||
Audit Models - DSGVO-compliant logging
|
||||
NO PII in audit logs - only references and metadata
|
||||
|
||||
Erlaubt: ref_id (truncated), content_type, size_bytes, ttl_hours
|
||||
Verboten: user_name, content, transcript, email
|
||||
"""
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Optional, Dict, Any
|
||||
from pydantic import BaseModel, Field
|
||||
import uuid
|
||||
|
||||
|
||||
class AuditAction(str, Enum):
|
||||
"""Audit action types."""
|
||||
# Session actions
|
||||
SESSION_CREATED = "session_created"
|
||||
SESSION_CONNECTED = "session_connected"
|
||||
SESSION_CLOSED = "session_closed"
|
||||
SESSION_EXPIRED = "session_expired"
|
||||
|
||||
# Audio actions (no content logged)
|
||||
AUDIO_RECEIVED = "audio_received"
|
||||
AUDIO_PROCESSED = "audio_processed"
|
||||
|
||||
# Task actions
|
||||
TASK_CREATED = "task_created"
|
||||
TASK_QUEUED = "task_queued"
|
||||
TASK_STARTED = "task_started"
|
||||
TASK_COMPLETED = "task_completed"
|
||||
TASK_FAILED = "task_failed"
|
||||
TASK_EXPIRED = "task_expired"
|
||||
|
||||
# Encryption actions
|
||||
ENCRYPTION_KEY_VERIFIED = "encryption_key_verified"
|
||||
ENCRYPTION_KEY_INVALID = "encryption_key_invalid"
|
||||
|
||||
# Integration actions
|
||||
BREAKPILOT_CALLED = "breakpilot_called"
|
||||
PERSONAPLEX_CALLED = "personaplex_called"
|
||||
OLLAMA_CALLED = "ollama_called"
|
||||
|
||||
# Security actions
|
||||
RATE_LIMIT_EXCEEDED = "rate_limit_exceeded"
|
||||
UNAUTHORIZED_ACCESS = "unauthorized_access"
|
||||
|
||||
|
||||
class AuditEntry(BaseModel):
|
||||
"""
|
||||
Audit log entry - DSGVO compliant.
|
||||
NO PII is stored - only truncated references and metadata.
|
||||
"""
|
||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
||||
|
||||
# Action identification
|
||||
action: AuditAction
|
||||
namespace_id_truncated: str = Field(
|
||||
...,
|
||||
description="First 8 chars of namespace ID",
|
||||
max_length=8,
|
||||
)
|
||||
|
||||
# Reference IDs (truncated for privacy)
|
||||
session_id_truncated: Optional[str] = Field(
|
||||
default=None,
|
||||
description="First 8 chars of session ID",
|
||||
max_length=8,
|
||||
)
|
||||
task_id_truncated: Optional[str] = Field(
|
||||
default=None,
|
||||
description="First 8 chars of task ID",
|
||||
max_length=8,
|
||||
)
|
||||
|
||||
# Metadata (no PII)
|
||||
content_type: Optional[str] = Field(default=None, description="Type of content processed")
|
||||
size_bytes: Optional[int] = Field(default=None, description="Size in bytes")
|
||||
duration_ms: Optional[int] = Field(default=None, description="Duration in milliseconds")
|
||||
ttl_hours: Optional[int] = Field(default=None, description="TTL in hours")
|
||||
|
||||
# Technical metadata
|
||||
success: bool = Field(default=True)
|
||||
error_code: Optional[str] = Field(default=None)
|
||||
latency_ms: Optional[int] = Field(default=None)
|
||||
|
||||
# Context (no PII)
|
||||
device_type: Optional[str] = Field(default=None)
|
||||
client_version: Optional[str] = Field(default=None)
|
||||
backend_used: Optional[str] = Field(default=None, description="personaplex, ollama, etc.")
|
||||
|
||||
@staticmethod
|
||||
def truncate_id(full_id: str, length: int = 8) -> str:
|
||||
"""Truncate ID for privacy."""
|
||||
if not full_id:
|
||||
return ""
|
||||
return full_id[:length]
|
||||
|
||||
class Config:
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"id": "audit-123",
|
||||
"timestamp": "2026-01-26T10:30:00Z",
|
||||
"action": "task_completed",
|
||||
"namespace_id_truncated": "teacher-",
|
||||
"session_id_truncated": "session-",
|
||||
"task_id_truncated": "task-xyz",
|
||||
"content_type": "student_observation",
|
||||
"size_bytes": 256,
|
||||
"ttl_hours": 168,
|
||||
"success": True,
|
||||
"latency_ms": 1250,
|
||||
"backend_used": "ollama",
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class AuditCreate(BaseModel):
|
||||
"""Request to create an audit entry."""
|
||||
action: AuditAction
|
||||
namespace_id: str = Field(..., description="Will be truncated before storage")
|
||||
session_id: Optional[str] = Field(default=None, description="Will be truncated")
|
||||
task_id: Optional[str] = Field(default=None, description="Will be truncated")
|
||||
content_type: Optional[str] = Field(default=None)
|
||||
size_bytes: Optional[int] = Field(default=None)
|
||||
duration_ms: Optional[int] = Field(default=None)
|
||||
success: bool = Field(default=True)
|
||||
error_code: Optional[str] = Field(default=None)
|
||||
latency_ms: Optional[int] = Field(default=None)
|
||||
device_type: Optional[str] = Field(default=None)
|
||||
backend_used: Optional[str] = Field(default=None)
|
||||
|
||||
def to_audit_entry(self) -> AuditEntry:
|
||||
"""Convert to AuditEntry with truncated IDs."""
|
||||
return AuditEntry(
|
||||
action=self.action,
|
||||
namespace_id_truncated=AuditEntry.truncate_id(self.namespace_id),
|
||||
session_id_truncated=AuditEntry.truncate_id(self.session_id) if self.session_id else None,
|
||||
task_id_truncated=AuditEntry.truncate_id(self.task_id) if self.task_id else None,
|
||||
content_type=self.content_type,
|
||||
size_bytes=self.size_bytes,
|
||||
duration_ms=self.duration_ms,
|
||||
success=self.success,
|
||||
error_code=self.error_code,
|
||||
latency_ms=self.latency_ms,
|
||||
device_type=self.device_type,
|
||||
backend_used=self.backend_used,
|
||||
)
|
||||
152
voice-service/models/session.py
Normal file
152
voice-service/models/session.py
Normal file
@@ -0,0 +1,152 @@
|
||||
"""
|
||||
Voice Session Models
|
||||
Transient session management - no persistent storage of audio data
|
||||
|
||||
DSGVO Compliance:
|
||||
- Sessions are RAM-only
|
||||
- Audio chunks are processed and discarded
|
||||
- Transcripts are encrypted before any storage
|
||||
"""
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Optional, List, Dict, Any
|
||||
from pydantic import BaseModel, Field
|
||||
import uuid
|
||||
|
||||
|
||||
class SessionStatus(str, Enum):
|
||||
"""Voice session status."""
|
||||
CREATED = "created"
|
||||
CONNECTED = "connected"
|
||||
LISTENING = "listening"
|
||||
PROCESSING = "processing"
|
||||
RESPONDING = "responding"
|
||||
PAUSED = "paused"
|
||||
CLOSED = "closed"
|
||||
ERROR = "error"
|
||||
|
||||
|
||||
class AudioChunk(BaseModel):
|
||||
"""
|
||||
Audio chunk for streaming.
|
||||
NEVER persisted - only exists in RAM during processing.
|
||||
"""
|
||||
sequence: int = Field(..., description="Chunk sequence number")
|
||||
timestamp_ms: int = Field(..., description="Timestamp in milliseconds")
|
||||
data: bytes = Field(..., description="PCM audio data (Int16, 24kHz)")
|
||||
duration_ms: int = Field(default=80, description="Chunk duration in ms")
|
||||
|
||||
class Config:
|
||||
# Exclude from serialization to prevent accidental logging
|
||||
json_encoders = {
|
||||
bytes: lambda v: f"<audio:{len(v)} bytes>"
|
||||
}
|
||||
|
||||
|
||||
class TranscriptMessage(BaseModel):
|
||||
"""
|
||||
Transcript message - encrypted before storage.
|
||||
"""
|
||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
role: str = Field(..., description="'user' or 'assistant'")
|
||||
content: str = Field(..., description="Transcript text (plaintext in RAM only)")
|
||||
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
||||
confidence: Optional[float] = Field(default=None, description="ASR confidence 0-1")
|
||||
intent: Optional[str] = Field(default=None, description="Detected intent")
|
||||
encrypted_ref: Optional[str] = Field(default=None, description="Encrypted storage reference")
|
||||
|
||||
class Config:
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"id": "msg-123",
|
||||
"role": "user",
|
||||
"content": "Notiz zu Max: heute wiederholt gestoert",
|
||||
"timestamp": "2026-01-26T10:30:00Z",
|
||||
"confidence": 0.95,
|
||||
"intent": "student_observation",
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class VoiceSession(BaseModel):
|
||||
"""
|
||||
Voice session state.
|
||||
Stored in Valkey with TTL, never in persistent storage.
|
||||
"""
|
||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
namespace_id: str = Field(..., description="Teacher namespace ID")
|
||||
key_hash: str = Field(..., description="Hash of client-side encryption key")
|
||||
status: SessionStatus = Field(default=SessionStatus.CREATED)
|
||||
created_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
last_activity: datetime = Field(default_factory=datetime.utcnow)
|
||||
|
||||
# Conversation state (transient)
|
||||
messages: List[TranscriptMessage] = Field(default_factory=list)
|
||||
pending_tasks: List[str] = Field(default_factory=list, description="Task IDs")
|
||||
|
||||
# Audio state (never persisted)
|
||||
audio_chunks_received: int = Field(default=0)
|
||||
audio_chunks_processed: int = Field(default=0)
|
||||
|
||||
# Metadata (no PII)
|
||||
device_type: Optional[str] = Field(default=None, description="'pwa' or 'app'")
|
||||
client_version: Optional[str] = Field(default=None)
|
||||
|
||||
def update_activity(self):
|
||||
"""Update last activity timestamp."""
|
||||
self.last_activity = datetime.utcnow()
|
||||
|
||||
class Config:
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"id": "session-abc123",
|
||||
"namespace_id": "teacher-ns-456",
|
||||
"key_hash": "sha256:abc...",
|
||||
"status": "listening",
|
||||
"created_at": "2026-01-26T10:00:00Z",
|
||||
"last_activity": "2026-01-26T10:30:00Z",
|
||||
"messages": [],
|
||||
"pending_tasks": [],
|
||||
"audio_chunks_received": 150,
|
||||
"audio_chunks_processed": 150,
|
||||
"device_type": "pwa",
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class SessionCreate(BaseModel):
|
||||
"""Request to create a new voice session."""
|
||||
namespace_id: str = Field(..., description="Teacher namespace ID")
|
||||
key_hash: str = Field(..., description="Hash of client-side encryption key")
|
||||
device_type: Optional[str] = Field(default="pwa")
|
||||
client_version: Optional[str] = Field(default=None)
|
||||
|
||||
class Config:
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"namespace_id": "teacher-ns-456",
|
||||
"key_hash": "sha256:abc123def456...",
|
||||
"device_type": "pwa",
|
||||
"client_version": "1.0.0",
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class SessionResponse(BaseModel):
|
||||
"""Response after session creation."""
|
||||
id: str
|
||||
namespace_id: str
|
||||
status: SessionStatus
|
||||
created_at: datetime
|
||||
websocket_url: str = Field(..., description="WebSocket URL for audio streaming")
|
||||
|
||||
class Config:
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"id": "session-abc123",
|
||||
"namespace_id": "teacher-ns-456",
|
||||
"status": "created",
|
||||
"created_at": "2026-01-26T10:00:00Z",
|
||||
"websocket_url": "ws://localhost:8091/ws/voice?session_id=session-abc123",
|
||||
}
|
||||
}
|
||||
217
voice-service/models/task.py
Normal file
217
voice-service/models/task.py
Normal file
@@ -0,0 +1,217 @@
|
||||
"""
|
||||
Task Models - Clawdbot State Machine
|
||||
Task lifecycle management with encrypted references
|
||||
|
||||
State Machine:
|
||||
DRAFT -> QUEUED -> RUNNING -> READY
|
||||
|
|
||||
+-----------+----------+
|
||||
| |
|
||||
APPROVED REJECTED
|
||||
| |
|
||||
COMPLETED DRAFT (revision)
|
||||
|
||||
Any State -> EXPIRED (TTL)
|
||||
Any State -> PAUSED (User Interrupt)
|
||||
"""
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Optional, Dict, Any, List
|
||||
from pydantic import BaseModel, Field
|
||||
import uuid
|
||||
|
||||
|
||||
class TaskState(str, Enum):
|
||||
"""Task state machine states."""
|
||||
DRAFT = "draft"
|
||||
QUEUED = "queued"
|
||||
RUNNING = "running"
|
||||
READY = "ready"
|
||||
APPROVED = "approved"
|
||||
REJECTED = "rejected"
|
||||
COMPLETED = "completed"
|
||||
EXPIRED = "expired"
|
||||
PAUSED = "paused"
|
||||
|
||||
|
||||
class TaskType(str, Enum):
|
||||
"""Task types for Breakpilot integration."""
|
||||
# Gruppe 1: Kurze Notizen
|
||||
STUDENT_OBSERVATION = "student_observation"
|
||||
REMINDER = "reminder"
|
||||
HOMEWORK_CHECK = "homework_check"
|
||||
CONFERENCE_TOPIC = "conference_topic"
|
||||
CORRECTION_NOTE = "correction_note"
|
||||
|
||||
# Gruppe 2: Arbeitsblatt-Generierung
|
||||
WORKSHEET_GENERATE = "worksheet_generate"
|
||||
WORKSHEET_DIFFERENTIATE = "worksheet_differentiate"
|
||||
|
||||
# Gruppe 3: Situatives Arbeiten
|
||||
QUICK_ACTIVITY = "quick_activity"
|
||||
QUIZ_GENERATE = "quiz_generate"
|
||||
PARENT_LETTER = "parent_letter"
|
||||
CLASS_MESSAGE = "class_message"
|
||||
|
||||
# Gruppe 4: Canvas-Editor
|
||||
CANVAS_EDIT = "canvas_edit"
|
||||
CANVAS_LAYOUT = "canvas_layout"
|
||||
|
||||
# Gruppe 5: Korrektur-Assistenz
|
||||
OPERATOR_CHECKLIST = "operator_checklist"
|
||||
EH_PASSAGE = "eh_passage"
|
||||
FEEDBACK_SUGGEST = "feedback_suggest"
|
||||
|
||||
# Gruppe 6: Follow-up
|
||||
REMINDER_SCHEDULE = "reminder_schedule"
|
||||
TASK_SUMMARY = "task_summary"
|
||||
|
||||
|
||||
class Task(BaseModel):
|
||||
"""
|
||||
Task entity for Clawdbot orchestration.
|
||||
Stored in Valkey with TTL.
|
||||
"""
|
||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
session_id: str = Field(..., description="Parent session ID")
|
||||
namespace_id: str = Field(..., description="Teacher namespace ID")
|
||||
|
||||
# Task definition
|
||||
type: TaskType
|
||||
state: TaskState = Field(default=TaskState.DRAFT)
|
||||
intent_text: str = Field(..., description="Original voice command (encrypted ref)")
|
||||
|
||||
# Task parameters (no PII, only references)
|
||||
parameters: Dict[str, Any] = Field(default_factory=dict)
|
||||
# Example parameters:
|
||||
# - student_ref: encrypted reference to student
|
||||
# - class_ref: encrypted reference to class
|
||||
# - content_type: "worksheet", "quiz", etc.
|
||||
# - source_ref: encrypted reference to source document
|
||||
|
||||
# Execution state
|
||||
result_ref: Optional[str] = Field(default=None, description="Encrypted result reference")
|
||||
error_message: Optional[str] = Field(default=None)
|
||||
|
||||
# Timestamps
|
||||
created_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
updated_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
completed_at: Optional[datetime] = Field(default=None)
|
||||
expires_at: Optional[datetime] = Field(default=None)
|
||||
|
||||
# Audit trail (no PII)
|
||||
state_history: List[Dict[str, Any]] = Field(default_factory=list)
|
||||
|
||||
def transition_to(self, new_state: TaskState, reason: Optional[str] = None):
|
||||
"""Transition to a new state with history tracking."""
|
||||
old_state = self.state
|
||||
self.state = new_state
|
||||
self.updated_at = datetime.utcnow()
|
||||
|
||||
# Add to history (no PII in reason)
|
||||
self.state_history.append({
|
||||
"from": old_state.value,
|
||||
"to": new_state.value,
|
||||
"timestamp": self.updated_at.isoformat(),
|
||||
"reason": reason,
|
||||
})
|
||||
|
||||
if new_state in [TaskState.COMPLETED, TaskState.EXPIRED]:
|
||||
self.completed_at = self.updated_at
|
||||
|
||||
class Config:
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"id": "task-xyz789",
|
||||
"session_id": "session-abc123",
|
||||
"namespace_id": "teacher-ns-456",
|
||||
"type": "student_observation",
|
||||
"state": "ready",
|
||||
"intent_text": "encrypted:abc123...",
|
||||
"parameters": {
|
||||
"student_ref": "encrypted:student-max-123",
|
||||
"observation_type": "behavior",
|
||||
},
|
||||
"created_at": "2026-01-26T10:30:00Z",
|
||||
"updated_at": "2026-01-26T10:30:05Z",
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class TaskCreate(BaseModel):
|
||||
"""Request to create a new task."""
|
||||
session_id: str
|
||||
type: TaskType
|
||||
intent_text: str = Field(..., description="Voice command text")
|
||||
parameters: Dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
class Config:
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"session_id": "session-abc123",
|
||||
"type": "student_observation",
|
||||
"intent_text": "Notiz zu Max: heute wiederholt gestoert",
|
||||
"parameters": {
|
||||
"student_name": "Max", # Will be encrypted
|
||||
"observation": "wiederholt gestoert",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class TaskResponse(BaseModel):
|
||||
"""Task response for API."""
|
||||
id: str
|
||||
session_id: str
|
||||
type: TaskType
|
||||
state: TaskState
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
result_available: bool = Field(default=False)
|
||||
error_message: Optional[str] = Field(default=None)
|
||||
|
||||
class Config:
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"id": "task-xyz789",
|
||||
"session_id": "session-abc123",
|
||||
"type": "student_observation",
|
||||
"state": "completed",
|
||||
"created_at": "2026-01-26T10:30:00Z",
|
||||
"updated_at": "2026-01-26T10:30:10Z",
|
||||
"result_available": True,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class TaskTransition(BaseModel):
|
||||
"""Request to transition task state."""
|
||||
new_state: TaskState
|
||||
reason: Optional[str] = Field(default=None, description="Transition reason (no PII)")
|
||||
|
||||
class Config:
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"new_state": "approved",
|
||||
"reason": "user_confirmed",
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# Valid state transitions
|
||||
VALID_TRANSITIONS: Dict[TaskState, List[TaskState]] = {
|
||||
TaskState.DRAFT: [TaskState.QUEUED, TaskState.EXPIRED, TaskState.PAUSED],
|
||||
TaskState.QUEUED: [TaskState.RUNNING, TaskState.EXPIRED, TaskState.PAUSED],
|
||||
TaskState.RUNNING: [TaskState.READY, TaskState.EXPIRED, TaskState.PAUSED],
|
||||
TaskState.READY: [TaskState.APPROVED, TaskState.REJECTED, TaskState.EXPIRED, TaskState.PAUSED],
|
||||
TaskState.APPROVED: [TaskState.COMPLETED, TaskState.EXPIRED],
|
||||
TaskState.REJECTED: [TaskState.DRAFT, TaskState.EXPIRED],
|
||||
TaskState.PAUSED: [TaskState.DRAFT, TaskState.QUEUED, TaskState.EXPIRED],
|
||||
TaskState.COMPLETED: [], # Terminal state
|
||||
TaskState.EXPIRED: [], # Terminal state
|
||||
}
|
||||
|
||||
|
||||
def is_valid_transition(from_state: TaskState, to_state: TaskState) -> bool:
|
||||
"""Check if a state transition is valid."""
|
||||
return to_state in VALID_TRANSITIONS.get(from_state, [])
|
||||
Reference in New Issue
Block a user