feat: voice-service von lehrer nach core verschoben, Pipeline erweitert (voice, BQAS, embedding, night-scheduler)

This commit is contained in:
Benjamin Boenisch
2026-02-15 13:26:06 +01:00
parent a7e4500ea6
commit 1089c73b46
59 changed files with 12921 additions and 20 deletions

View File

@@ -0,0 +1,40 @@
"""
Voice Service Models
Pydantic models for sessions, tasks, and audit logging
"""
from models.session import (
VoiceSession,
SessionCreate,
SessionResponse,
AudioChunk,
TranscriptMessage,
)
from models.task import (
TaskState,
Task,
TaskCreate,
TaskResponse,
TaskTransition,
)
from models.audit import (
AuditEntry,
AuditCreate,
)
__all__ = [
# Session models
"VoiceSession",
"SessionCreate",
"SessionResponse",
"AudioChunk",
"TranscriptMessage",
# Task models
"TaskState",
"Task",
"TaskCreate",
"TaskResponse",
"TaskTransition",
# Audit models
"AuditEntry",
"AuditCreate",
]

View File

@@ -0,0 +1,149 @@
"""
Audit Models - DSGVO-compliant logging
NO PII in audit logs - only references and metadata
Erlaubt: ref_id (truncated), content_type, size_bytes, ttl_hours
Verboten: user_name, content, transcript, email
"""
from datetime import datetime
from enum import Enum
from typing import Optional, Dict, Any
from pydantic import BaseModel, Field
import uuid
class AuditAction(str, Enum):
"""Audit action types."""
# Session actions
SESSION_CREATED = "session_created"
SESSION_CONNECTED = "session_connected"
SESSION_CLOSED = "session_closed"
SESSION_EXPIRED = "session_expired"
# Audio actions (no content logged)
AUDIO_RECEIVED = "audio_received"
AUDIO_PROCESSED = "audio_processed"
# Task actions
TASK_CREATED = "task_created"
TASK_QUEUED = "task_queued"
TASK_STARTED = "task_started"
TASK_COMPLETED = "task_completed"
TASK_FAILED = "task_failed"
TASK_EXPIRED = "task_expired"
# Encryption actions
ENCRYPTION_KEY_VERIFIED = "encryption_key_verified"
ENCRYPTION_KEY_INVALID = "encryption_key_invalid"
# Integration actions
BREAKPILOT_CALLED = "breakpilot_called"
PERSONAPLEX_CALLED = "personaplex_called"
OLLAMA_CALLED = "ollama_called"
# Security actions
RATE_LIMIT_EXCEEDED = "rate_limit_exceeded"
UNAUTHORIZED_ACCESS = "unauthorized_access"
class AuditEntry(BaseModel):
"""
Audit log entry - DSGVO compliant.
NO PII is stored - only truncated references and metadata.
"""
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
timestamp: datetime = Field(default_factory=datetime.utcnow)
# Action identification
action: AuditAction
namespace_id_truncated: str = Field(
...,
description="First 8 chars of namespace ID",
max_length=8,
)
# Reference IDs (truncated for privacy)
session_id_truncated: Optional[str] = Field(
default=None,
description="First 8 chars of session ID",
max_length=8,
)
task_id_truncated: Optional[str] = Field(
default=None,
description="First 8 chars of task ID",
max_length=8,
)
# Metadata (no PII)
content_type: Optional[str] = Field(default=None, description="Type of content processed")
size_bytes: Optional[int] = Field(default=None, description="Size in bytes")
duration_ms: Optional[int] = Field(default=None, description="Duration in milliseconds")
ttl_hours: Optional[int] = Field(default=None, description="TTL in hours")
# Technical metadata
success: bool = Field(default=True)
error_code: Optional[str] = Field(default=None)
latency_ms: Optional[int] = Field(default=None)
# Context (no PII)
device_type: Optional[str] = Field(default=None)
client_version: Optional[str] = Field(default=None)
backend_used: Optional[str] = Field(default=None, description="personaplex, ollama, etc.")
@staticmethod
def truncate_id(full_id: str, length: int = 8) -> str:
"""Truncate ID for privacy."""
if not full_id:
return ""
return full_id[:length]
class Config:
json_schema_extra = {
"example": {
"id": "audit-123",
"timestamp": "2026-01-26T10:30:00Z",
"action": "task_completed",
"namespace_id_truncated": "teacher-",
"session_id_truncated": "session-",
"task_id_truncated": "task-xyz",
"content_type": "student_observation",
"size_bytes": 256,
"ttl_hours": 168,
"success": True,
"latency_ms": 1250,
"backend_used": "ollama",
}
}
class AuditCreate(BaseModel):
"""Request to create an audit entry."""
action: AuditAction
namespace_id: str = Field(..., description="Will be truncated before storage")
session_id: Optional[str] = Field(default=None, description="Will be truncated")
task_id: Optional[str] = Field(default=None, description="Will be truncated")
content_type: Optional[str] = Field(default=None)
size_bytes: Optional[int] = Field(default=None)
duration_ms: Optional[int] = Field(default=None)
success: bool = Field(default=True)
error_code: Optional[str] = Field(default=None)
latency_ms: Optional[int] = Field(default=None)
device_type: Optional[str] = Field(default=None)
backend_used: Optional[str] = Field(default=None)
def to_audit_entry(self) -> AuditEntry:
"""Convert to AuditEntry with truncated IDs."""
return AuditEntry(
action=self.action,
namespace_id_truncated=AuditEntry.truncate_id(self.namespace_id),
session_id_truncated=AuditEntry.truncate_id(self.session_id) if self.session_id else None,
task_id_truncated=AuditEntry.truncate_id(self.task_id) if self.task_id else None,
content_type=self.content_type,
size_bytes=self.size_bytes,
duration_ms=self.duration_ms,
success=self.success,
error_code=self.error_code,
latency_ms=self.latency_ms,
device_type=self.device_type,
backend_used=self.backend_used,
)

View File

@@ -0,0 +1,152 @@
"""
Voice Session Models
Transient session management - no persistent storage of audio data
DSGVO Compliance:
- Sessions are RAM-only
- Audio chunks are processed and discarded
- Transcripts are encrypted before any storage
"""
from datetime import datetime
from enum import Enum
from typing import Optional, List, Dict, Any
from pydantic import BaseModel, Field
import uuid
class SessionStatus(str, Enum):
"""Voice session status."""
CREATED = "created"
CONNECTED = "connected"
LISTENING = "listening"
PROCESSING = "processing"
RESPONDING = "responding"
PAUSED = "paused"
CLOSED = "closed"
ERROR = "error"
class AudioChunk(BaseModel):
"""
Audio chunk for streaming.
NEVER persisted - only exists in RAM during processing.
"""
sequence: int = Field(..., description="Chunk sequence number")
timestamp_ms: int = Field(..., description="Timestamp in milliseconds")
data: bytes = Field(..., description="PCM audio data (Int16, 24kHz)")
duration_ms: int = Field(default=80, description="Chunk duration in ms")
class Config:
# Exclude from serialization to prevent accidental logging
json_encoders = {
bytes: lambda v: f"<audio:{len(v)} bytes>"
}
class TranscriptMessage(BaseModel):
"""
Transcript message - encrypted before storage.
"""
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
role: str = Field(..., description="'user' or 'assistant'")
content: str = Field(..., description="Transcript text (plaintext in RAM only)")
timestamp: datetime = Field(default_factory=datetime.utcnow)
confidence: Optional[float] = Field(default=None, description="ASR confidence 0-1")
intent: Optional[str] = Field(default=None, description="Detected intent")
encrypted_ref: Optional[str] = Field(default=None, description="Encrypted storage reference")
class Config:
json_schema_extra = {
"example": {
"id": "msg-123",
"role": "user",
"content": "Notiz zu Max: heute wiederholt gestoert",
"timestamp": "2026-01-26T10:30:00Z",
"confidence": 0.95,
"intent": "student_observation",
}
}
class VoiceSession(BaseModel):
"""
Voice session state.
Stored in Valkey with TTL, never in persistent storage.
"""
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
namespace_id: str = Field(..., description="Teacher namespace ID")
key_hash: str = Field(..., description="Hash of client-side encryption key")
status: SessionStatus = Field(default=SessionStatus.CREATED)
created_at: datetime = Field(default_factory=datetime.utcnow)
last_activity: datetime = Field(default_factory=datetime.utcnow)
# Conversation state (transient)
messages: List[TranscriptMessage] = Field(default_factory=list)
pending_tasks: List[str] = Field(default_factory=list, description="Task IDs")
# Audio state (never persisted)
audio_chunks_received: int = Field(default=0)
audio_chunks_processed: int = Field(default=0)
# Metadata (no PII)
device_type: Optional[str] = Field(default=None, description="'pwa' or 'app'")
client_version: Optional[str] = Field(default=None)
def update_activity(self):
"""Update last activity timestamp."""
self.last_activity = datetime.utcnow()
class Config:
json_schema_extra = {
"example": {
"id": "session-abc123",
"namespace_id": "teacher-ns-456",
"key_hash": "sha256:abc...",
"status": "listening",
"created_at": "2026-01-26T10:00:00Z",
"last_activity": "2026-01-26T10:30:00Z",
"messages": [],
"pending_tasks": [],
"audio_chunks_received": 150,
"audio_chunks_processed": 150,
"device_type": "pwa",
}
}
class SessionCreate(BaseModel):
"""Request to create a new voice session."""
namespace_id: str = Field(..., description="Teacher namespace ID")
key_hash: str = Field(..., description="Hash of client-side encryption key")
device_type: Optional[str] = Field(default="pwa")
client_version: Optional[str] = Field(default=None)
class Config:
json_schema_extra = {
"example": {
"namespace_id": "teacher-ns-456",
"key_hash": "sha256:abc123def456...",
"device_type": "pwa",
"client_version": "1.0.0",
}
}
class SessionResponse(BaseModel):
"""Response after session creation."""
id: str
namespace_id: str
status: SessionStatus
created_at: datetime
websocket_url: str = Field(..., description="WebSocket URL for audio streaming")
class Config:
json_schema_extra = {
"example": {
"id": "session-abc123",
"namespace_id": "teacher-ns-456",
"status": "created",
"created_at": "2026-01-26T10:00:00Z",
"websocket_url": "ws://localhost:8091/ws/voice?session_id=session-abc123",
}
}

View File

@@ -0,0 +1,217 @@
"""
Task Models - Clawdbot State Machine
Task lifecycle management with encrypted references
State Machine:
DRAFT -> QUEUED -> RUNNING -> READY
|
+-----------+----------+
| |
APPROVED REJECTED
| |
COMPLETED DRAFT (revision)
Any State -> EXPIRED (TTL)
Any State -> PAUSED (User Interrupt)
"""
from datetime import datetime
from enum import Enum
from typing import Optional, Dict, Any, List
from pydantic import BaseModel, Field
import uuid
class TaskState(str, Enum):
"""Task state machine states."""
DRAFT = "draft"
QUEUED = "queued"
RUNNING = "running"
READY = "ready"
APPROVED = "approved"
REJECTED = "rejected"
COMPLETED = "completed"
EXPIRED = "expired"
PAUSED = "paused"
class TaskType(str, Enum):
"""Task types for Breakpilot integration."""
# Gruppe 1: Kurze Notizen
STUDENT_OBSERVATION = "student_observation"
REMINDER = "reminder"
HOMEWORK_CHECK = "homework_check"
CONFERENCE_TOPIC = "conference_topic"
CORRECTION_NOTE = "correction_note"
# Gruppe 2: Arbeitsblatt-Generierung
WORKSHEET_GENERATE = "worksheet_generate"
WORKSHEET_DIFFERENTIATE = "worksheet_differentiate"
# Gruppe 3: Situatives Arbeiten
QUICK_ACTIVITY = "quick_activity"
QUIZ_GENERATE = "quiz_generate"
PARENT_LETTER = "parent_letter"
CLASS_MESSAGE = "class_message"
# Gruppe 4: Canvas-Editor
CANVAS_EDIT = "canvas_edit"
CANVAS_LAYOUT = "canvas_layout"
# Gruppe 5: Korrektur-Assistenz
OPERATOR_CHECKLIST = "operator_checklist"
EH_PASSAGE = "eh_passage"
FEEDBACK_SUGGEST = "feedback_suggest"
# Gruppe 6: Follow-up
REMINDER_SCHEDULE = "reminder_schedule"
TASK_SUMMARY = "task_summary"
class Task(BaseModel):
"""
Task entity for Clawdbot orchestration.
Stored in Valkey with TTL.
"""
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
session_id: str = Field(..., description="Parent session ID")
namespace_id: str = Field(..., description="Teacher namespace ID")
# Task definition
type: TaskType
state: TaskState = Field(default=TaskState.DRAFT)
intent_text: str = Field(..., description="Original voice command (encrypted ref)")
# Task parameters (no PII, only references)
parameters: Dict[str, Any] = Field(default_factory=dict)
# Example parameters:
# - student_ref: encrypted reference to student
# - class_ref: encrypted reference to class
# - content_type: "worksheet", "quiz", etc.
# - source_ref: encrypted reference to source document
# Execution state
result_ref: Optional[str] = Field(default=None, description="Encrypted result reference")
error_message: Optional[str] = Field(default=None)
# Timestamps
created_at: datetime = Field(default_factory=datetime.utcnow)
updated_at: datetime = Field(default_factory=datetime.utcnow)
completed_at: Optional[datetime] = Field(default=None)
expires_at: Optional[datetime] = Field(default=None)
# Audit trail (no PII)
state_history: List[Dict[str, Any]] = Field(default_factory=list)
def transition_to(self, new_state: TaskState, reason: Optional[str] = None):
"""Transition to a new state with history tracking."""
old_state = self.state
self.state = new_state
self.updated_at = datetime.utcnow()
# Add to history (no PII in reason)
self.state_history.append({
"from": old_state.value,
"to": new_state.value,
"timestamp": self.updated_at.isoformat(),
"reason": reason,
})
if new_state in [TaskState.COMPLETED, TaskState.EXPIRED]:
self.completed_at = self.updated_at
class Config:
json_schema_extra = {
"example": {
"id": "task-xyz789",
"session_id": "session-abc123",
"namespace_id": "teacher-ns-456",
"type": "student_observation",
"state": "ready",
"intent_text": "encrypted:abc123...",
"parameters": {
"student_ref": "encrypted:student-max-123",
"observation_type": "behavior",
},
"created_at": "2026-01-26T10:30:00Z",
"updated_at": "2026-01-26T10:30:05Z",
}
}
class TaskCreate(BaseModel):
"""Request to create a new task."""
session_id: str
type: TaskType
intent_text: str = Field(..., description="Voice command text")
parameters: Dict[str, Any] = Field(default_factory=dict)
class Config:
json_schema_extra = {
"example": {
"session_id": "session-abc123",
"type": "student_observation",
"intent_text": "Notiz zu Max: heute wiederholt gestoert",
"parameters": {
"student_name": "Max", # Will be encrypted
"observation": "wiederholt gestoert",
},
}
}
class TaskResponse(BaseModel):
"""Task response for API."""
id: str
session_id: str
type: TaskType
state: TaskState
created_at: datetime
updated_at: datetime
result_available: bool = Field(default=False)
error_message: Optional[str] = Field(default=None)
class Config:
json_schema_extra = {
"example": {
"id": "task-xyz789",
"session_id": "session-abc123",
"type": "student_observation",
"state": "completed",
"created_at": "2026-01-26T10:30:00Z",
"updated_at": "2026-01-26T10:30:10Z",
"result_available": True,
}
}
class TaskTransition(BaseModel):
"""Request to transition task state."""
new_state: TaskState
reason: Optional[str] = Field(default=None, description="Transition reason (no PII)")
class Config:
json_schema_extra = {
"example": {
"new_state": "approved",
"reason": "user_confirmed",
}
}
# Valid state transitions
VALID_TRANSITIONS: Dict[TaskState, List[TaskState]] = {
TaskState.DRAFT: [TaskState.QUEUED, TaskState.EXPIRED, TaskState.PAUSED],
TaskState.QUEUED: [TaskState.RUNNING, TaskState.EXPIRED, TaskState.PAUSED],
TaskState.RUNNING: [TaskState.READY, TaskState.EXPIRED, TaskState.PAUSED],
TaskState.READY: [TaskState.APPROVED, TaskState.REJECTED, TaskState.EXPIRED, TaskState.PAUSED],
TaskState.APPROVED: [TaskState.COMPLETED, TaskState.EXPIRED],
TaskState.REJECTED: [TaskState.DRAFT, TaskState.EXPIRED],
TaskState.PAUSED: [TaskState.DRAFT, TaskState.QUEUED, TaskState.EXPIRED],
TaskState.COMPLETED: [], # Terminal state
TaskState.EXPIRED: [], # Terminal state
}
def is_valid_transition(from_state: TaskState, to_state: TaskState) -> bool:
"""Check if a state transition is valid."""
return to_state in VALID_TRANSITIONS.get(from_state, [])