feat: voice-service von lehrer nach core verschoben, Pipeline erweitert (voice, BQAS, embedding, night-scheduler)

This commit is contained in:
Benjamin Boenisch
2026-02-15 13:26:06 +01:00
parent a7e4500ea6
commit 1089c73b46
59 changed files with 12921 additions and 20 deletions

View File

@@ -0,0 +1,18 @@
"""
Voice Service Core Services
"""
from services.encryption_service import EncryptionService
from services.task_orchestrator import TaskOrchestrator
from services.personaplex_client import PersonaPlexClient
from services.fallback_llm_client import FallbackLLMClient
from services.intent_router import IntentRouter
from services.audio_processor import AudioProcessor
__all__ = [
"EncryptionService",
"TaskOrchestrator",
"PersonaPlexClient",
"FallbackLLMClient",
"IntentRouter",
"AudioProcessor",
]

View File

@@ -0,0 +1,303 @@
"""
Audio Processor - Mimi Codec Compatible
Handles audio encoding/decoding for voice streaming
Mimi Codec specifications:
- Sample rate: 24kHz
- Frame size: 80ms
- Format: Int16 PCM
- Channels: Mono
IMPORTANT: Audio is NEVER persisted to disk.
All processing happens in RAM only.
"""
import structlog
import numpy as np
from typing import Optional, Iterator, Tuple
from dataclasses import dataclass
from config import settings
logger = structlog.get_logger(__name__)
@dataclass
class AudioFrame:
"""A single audio frame for processing."""
samples: np.ndarray
timestamp_ms: int
duration_ms: int = 80
class AudioProcessor:
"""
Processes audio for the Mimi codec.
All audio processing is transient - data exists only
in RAM and is discarded after processing.
"""
def __init__(self):
self.sample_rate = settings.audio_sample_rate
self.frame_size_ms = settings.audio_frame_size_ms
self.samples_per_frame = int(self.sample_rate * self.frame_size_ms / 1000)
def bytes_to_samples(self, audio_bytes: bytes) -> np.ndarray:
"""
Convert raw bytes to numpy samples.
Args:
audio_bytes: Int16 PCM audio data
Returns:
numpy array of float32 samples (-1.0 to 1.0)
"""
# Convert bytes to int16
samples_int16 = np.frombuffer(audio_bytes, dtype=np.int16)
# Normalize to float32 (-1.0 to 1.0)
samples_float = samples_int16.astype(np.float32) / 32768.0
return samples_float
def samples_to_bytes(self, samples: np.ndarray) -> bytes:
"""
Convert numpy samples to raw bytes.
Args:
samples: float32 samples (-1.0 to 1.0)
Returns:
Int16 PCM audio data
"""
# Clip to valid range
samples = np.clip(samples, -1.0, 1.0)
# Convert to int16
samples_int16 = (samples * 32767).astype(np.int16)
return samples_int16.tobytes()
def extract_frames(
self,
audio_bytes: bytes,
start_timestamp_ms: int = 0,
) -> Iterator[AudioFrame]:
"""
Extract frames from audio data.
Args:
audio_bytes: Raw audio data
start_timestamp_ms: Starting timestamp
Yields:
AudioFrame objects
"""
samples = self.bytes_to_samples(audio_bytes)
bytes_per_frame = self.samples_per_frame * 2 # Int16 = 2 bytes
timestamp = start_timestamp_ms
for i in range(0, len(samples), self.samples_per_frame):
frame_samples = samples[i:i + self.samples_per_frame]
# Pad last frame if needed
if len(frame_samples) < self.samples_per_frame:
frame_samples = np.pad(
frame_samples,
(0, self.samples_per_frame - len(frame_samples)),
)
yield AudioFrame(
samples=frame_samples,
timestamp_ms=timestamp,
duration_ms=self.frame_size_ms,
)
timestamp += self.frame_size_ms
def combine_frames(self, frames: list[AudioFrame]) -> bytes:
"""
Combine multiple frames into continuous audio.
Args:
frames: List of AudioFrame objects
Returns:
Combined audio bytes
"""
if not frames:
return b""
# Sort by timestamp
sorted_frames = sorted(frames, key=lambda f: f.timestamp_ms)
# Combine samples
all_samples = np.concatenate([f.samples for f in sorted_frames])
return self.samples_to_bytes(all_samples)
def detect_voice_activity(
self,
audio_bytes: bytes,
threshold: float = 0.02,
min_duration_ms: int = 100,
) -> Tuple[bool, float]:
"""
Simple voice activity detection.
Args:
audio_bytes: Raw audio data
threshold: Energy threshold for speech detection
min_duration_ms: Minimum duration for valid speech
Returns:
(is_speech, energy_level)
"""
samples = self.bytes_to_samples(audio_bytes)
# Calculate RMS energy
energy = np.sqrt(np.mean(samples ** 2))
# Check if duration is sufficient
duration_ms = len(samples) / self.sample_rate * 1000
if duration_ms < min_duration_ms:
return False, energy
return energy > threshold, energy
def resample(
self,
audio_bytes: bytes,
source_rate: int,
target_rate: Optional[int] = None,
) -> bytes:
"""
Resample audio to target sample rate.
Args:
audio_bytes: Raw audio data
source_rate: Source sample rate
target_rate: Target sample rate (default: 24kHz)
Returns:
Resampled audio bytes
"""
target_rate = target_rate or self.sample_rate
if source_rate == target_rate:
return audio_bytes
samples = self.bytes_to_samples(audio_bytes)
# Calculate new length
new_length = int(len(samples) * target_rate / source_rate)
# Simple linear interpolation resampling
# (In production, use scipy.signal.resample or librosa)
x_old = np.linspace(0, 1, len(samples))
x_new = np.linspace(0, 1, new_length)
samples_resampled = np.interp(x_new, x_old, samples)
return self.samples_to_bytes(samples_resampled)
def normalize_audio(
self,
audio_bytes: bytes,
target_db: float = -3.0,
) -> bytes:
"""
Normalize audio to target dB level.
Args:
audio_bytes: Raw audio data
target_db: Target peak level in dB
Returns:
Normalized audio bytes
"""
samples = self.bytes_to_samples(audio_bytes)
# Find peak
peak = np.max(np.abs(samples))
if peak < 0.001: # Silence
return audio_bytes
# Calculate gain
target_linear = 10 ** (target_db / 20)
gain = target_linear / peak
# Apply gain
samples_normalized = samples * gain
return self.samples_to_bytes(samples_normalized)
def apply_noise_gate(
self,
audio_bytes: bytes,
threshold_db: float = -40.0,
attack_ms: float = 5.0,
release_ms: float = 50.0,
) -> bytes:
"""
Apply noise gate to reduce background noise.
Args:
audio_bytes: Raw audio data
threshold_db: Gate threshold in dB
attack_ms: Attack time in ms
release_ms: Release time in ms
Returns:
Gated audio bytes
"""
samples = self.bytes_to_samples(audio_bytes)
# Convert threshold to linear
threshold = 10 ** (threshold_db / 20)
# Calculate envelope
envelope = np.abs(samples)
# Simple gate
gate = np.where(envelope > threshold, 1.0, 0.0)
# Smooth gate transitions
attack_samples = int(attack_ms * self.sample_rate / 1000)
release_samples = int(release_ms * self.sample_rate / 1000)
# Apply smoothing (simple moving average)
kernel_size = max(attack_samples, release_samples)
if kernel_size > 1:
kernel = np.ones(kernel_size) / kernel_size
gate = np.convolve(gate, kernel, mode='same')
# Apply gate
samples_gated = samples * gate
return self.samples_to_bytes(samples_gated)
def get_audio_stats(self, audio_bytes: bytes) -> dict:
"""
Get statistics about audio data.
Args:
audio_bytes: Raw audio data
Returns:
Dictionary with audio statistics
"""
samples = self.bytes_to_samples(audio_bytes)
# Calculate stats
rms = np.sqrt(np.mean(samples ** 2))
peak = np.max(np.abs(samples))
duration_ms = len(samples) / self.sample_rate * 1000
# Convert to dB
rms_db = 20 * np.log10(rms + 1e-10)
peak_db = 20 * np.log10(peak + 1e-10)
return {
"duration_ms": duration_ms,
"sample_count": len(samples),
"rms_db": round(rms_db, 1),
"peak_db": round(peak_db, 1),
"sample_rate": self.sample_rate,
}

View File

@@ -0,0 +1,231 @@
"""
Encryption Service - Namespace Key Management
Client-side encryption for DSGVO compliance
The encryption key NEVER leaves the teacher's device.
Server only sees:
- Key hash (for verification)
- Encrypted blobs
- Namespace ID (pseudonym)
"""
import structlog
import hashlib
import base64
import secrets
from typing import Optional
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
from config import settings
logger = structlog.get_logger(__name__)
class EncryptionService:
"""
Handles namespace key verification and server-side encryption.
Important: This service does NOT have access to the actual encryption key.
The key is stored only on the teacher's device.
This service only verifies key hashes and manages encrypted blobs.
"""
def __init__(self):
self._key_hashes: dict[str, str] = {} # namespace_id -> key_hash
self._server_key = secrets.token_bytes(32) # Server-side encryption for transit
def verify_key_hash(self, key_hash: str) -> bool:
"""
Verify that a key hash is valid format.
Does NOT verify the actual key - that's client-side only.
Accepts "disabled" for development over HTTP (where crypto.subtle is unavailable).
In production, always use HTTPS to enable proper encryption.
"""
if not key_hash:
return False
# Allow "disabled" for development (HTTP context where crypto.subtle is unavailable)
if key_hash == "disabled":
logger.warning(
"Encryption disabled - client running in non-secure context (HTTP). "
"Use HTTPS in production!"
)
return True
# Expected format: "sha256:base64encodedHash"
if not key_hash.startswith("sha256:"):
return False
try:
hash_part = key_hash[7:] # Remove "sha256:" prefix
decoded = base64.b64decode(hash_part)
return len(decoded) == 32 # SHA-256 produces 32 bytes
except Exception:
return False
def register_namespace_key(self, namespace_id: str, key_hash: str) -> bool:
"""
Register a namespace's key hash for future verification.
"""
if not self.verify_key_hash(key_hash):
logger.warning("Invalid key hash format", namespace_id=namespace_id[:8])
return False
self._key_hashes[namespace_id] = key_hash
if key_hash == "disabled":
logger.info("Namespace registered (encryption disabled)", namespace_id=namespace_id[:8])
else:
logger.info("Namespace key registered", namespace_id=namespace_id[:8])
return True
def encrypt_content(self, plaintext: str, namespace_id: str) -> str:
"""
Encrypt content for server-side storage.
Note: This is transit encryption only.
The actual client-side encryption happens in the browser/app.
This adds an additional layer for data at rest on the server.
"""
if not settings.encryption_enabled:
return plaintext
try:
# Derive key from server key + namespace
derived_key = self._derive_key(namespace_id)
# Generate nonce
nonce = secrets.token_bytes(12)
# Encrypt
aesgcm = AESGCM(derived_key)
ciphertext = aesgcm.encrypt(nonce, plaintext.encode('utf-8'), None)
# Combine nonce + ciphertext and encode
encrypted = base64.b64encode(nonce + ciphertext).decode('utf-8')
return f"encrypted:{encrypted}"
except Exception as e:
logger.error("Encryption failed", error=str(e))
raise
def decrypt_content(self, encrypted: str, namespace_id: str) -> str:
"""
Decrypt server-side encrypted content.
"""
if not settings.encryption_enabled:
return encrypted
if not encrypted.startswith("encrypted:"):
return encrypted # Not encrypted
try:
# Decode
encoded = encrypted[10:] # Remove "encrypted:" prefix
data = base64.b64decode(encoded)
# Split nonce and ciphertext
nonce = data[:12]
ciphertext = data[12:]
# Derive key from server key + namespace
derived_key = self._derive_key(namespace_id)
# Decrypt
aesgcm = AESGCM(derived_key)
plaintext = aesgcm.decrypt(nonce, ciphertext, None)
return plaintext.decode('utf-8')
except Exception as e:
logger.error("Decryption failed", error=str(e))
raise
def _derive_key(self, namespace_id: str) -> bytes:
"""
Derive a key from server key + namespace ID.
This ensures each namespace has a unique encryption key.
"""
kdf = PBKDF2HMAC(
algorithm=hashes.SHA256(),
length=32,
salt=namespace_id.encode('utf-8'),
iterations=100000,
)
return kdf.derive(self._server_key)
@staticmethod
def generate_key_hash(key: bytes) -> str:
"""
Generate a key hash for client-side use.
This is a utility method - actual implementation is in the client.
"""
hash_bytes = hashlib.sha256(key).digest()
encoded = base64.b64encode(hash_bytes).decode('utf-8')
return f"sha256:{encoded}"
@staticmethod
def generate_namespace_id() -> str:
"""
Generate a new namespace ID for a teacher.
"""
return f"ns-{secrets.token_hex(16)}"
class ClientSideEncryption:
"""
Helper class documenting client-side encryption.
This code runs in the browser/app, not on the server.
Client-side encryption flow:
1. Teacher generates a master key on first use
2. Master key is stored in browser/app secure storage
3. Key hash is sent to server for session verification
4. All PII is encrypted with master key before sending to server
5. Server only sees encrypted blobs
JavaScript implementation:
```javascript
// Generate master key (one-time)
const masterKey = await crypto.subtle.generateKey(
{ name: "AES-GCM", length: 256 },
true,
["encrypt", "decrypt"]
);
// Store in IndexedDB (encrypted with device key)
await storeSecurely("masterKey", masterKey);
// Generate key hash for server
const keyData = await crypto.subtle.exportKey("raw", masterKey);
const hashBuffer = await crypto.subtle.digest("SHA-256", keyData);
const keyHash = "sha256:" + btoa(String.fromCharCode(...new Uint8Array(hashBuffer)));
// Encrypt content before sending
async function encryptContent(content) {
const iv = crypto.getRandomValues(new Uint8Array(12));
const encoded = new TextEncoder().encode(content);
const ciphertext = await crypto.subtle.encrypt(
{ name: "AES-GCM", iv },
masterKey,
encoded
);
return btoa(String.fromCharCode(...iv, ...new Uint8Array(ciphertext)));
}
// Decrypt content after receiving
async function decryptContent(encrypted) {
const data = Uint8Array.from(atob(encrypted), c => c.charCodeAt(0));
const iv = data.slice(0, 12);
const ciphertext = data.slice(12);
const decrypted = await crypto.subtle.decrypt(
{ name: "AES-GCM", iv },
masterKey,
ciphertext
);
return new TextDecoder().decode(decrypted);
}
```
"""
pass

View File

@@ -0,0 +1,519 @@
"""
Enhanced Task Orchestrator - Multi-Agent Integration
Extends the existing TaskOrchestrator with Multi-Agent support:
- Session management with checkpoints
- Message bus integration for inter-agent communication
- Quality judge integration via BQAS
- Heartbeat-based liveness
"""
import structlog
import asyncio
from typing import Optional, Dict, Any
from datetime import datetime
from services.task_orchestrator import TaskOrchestrator, Intent
from models.task import Task, TaskState
# Import agent-core components
import sys
sys.path.insert(0, '/Users/benjaminadmin/Projekte/breakpilot-pwa/agent-core')
from sessions.session_manager import SessionManager, AgentSession, SessionState
from sessions.heartbeat import HeartbeatMonitor, HeartbeatClient
from brain.memory_store import MemoryStore
from brain.context_manager import ContextManager, MessageRole
from orchestrator.message_bus import MessageBus, AgentMessage, MessagePriority
from orchestrator.task_router import TaskRouter, RoutingStrategy
logger = structlog.get_logger(__name__)
class EnhancedTaskOrchestrator(TaskOrchestrator):
"""
Enhanced TaskOrchestrator with Multi-Agent support.
Extends the existing TaskOrchestrator to integrate with:
- Session management for persistence and recovery
- Message bus for inter-agent communication
- Quality judge for response validation
- Memory store for long-term learning
"""
def __init__(
self,
redis_client=None,
db_pool=None,
namespace: str = "breakpilot"
):
"""
Initialize the enhanced orchestrator.
Args:
redis_client: Async Redis/Valkey client
db_pool: Async PostgreSQL connection pool
namespace: Namespace for isolation
"""
super().__init__()
# Initialize agent-core components
self.session_manager = SessionManager(
redis_client=redis_client,
db_pool=db_pool,
namespace=namespace
)
self.memory_store = MemoryStore(
redis_client=redis_client,
db_pool=db_pool,
namespace=namespace
)
self.context_manager = ContextManager(
redis_client=redis_client,
db_pool=db_pool,
namespace=namespace
)
self.message_bus = MessageBus(
redis_client=redis_client,
db_pool=db_pool,
namespace=namespace
)
self.heartbeat = HeartbeatMonitor(
timeout_seconds=30,
check_interval_seconds=5,
max_missed_beats=3
)
self.task_router = TaskRouter()
# Track active sessions by voice session ID
self._voice_sessions: Dict[str, AgentSession] = {}
self._heartbeat_clients: Dict[str, HeartbeatClient] = {}
logger.info("Enhanced TaskOrchestrator initialized with agent-core")
async def start(self) -> None:
"""Starts the enhanced orchestrator"""
await self.message_bus.start()
await self.heartbeat.start_monitoring()
# Subscribe to messages directed at this orchestrator
await self.message_bus.subscribe(
"voice-orchestrator",
self._handle_agent_message
)
logger.info("Enhanced TaskOrchestrator started")
async def stop(self) -> None:
"""Stops the enhanced orchestrator"""
# Stop all heartbeat clients
for client in self._heartbeat_clients.values():
await client.stop()
self._heartbeat_clients.clear()
await self.heartbeat.stop_monitoring()
await self.message_bus.stop()
logger.info("Enhanced TaskOrchestrator stopped")
async def create_session(
self,
voice_session_id: str,
user_id: str = "",
metadata: Optional[Dict[str, Any]] = None
) -> AgentSession:
"""
Creates a new agent session for a voice session.
Args:
voice_session_id: The voice session ID
user_id: Optional user ID
metadata: Additional metadata
Returns:
The created AgentSession
"""
# Create session via session manager
session = await self.session_manager.create_session(
agent_type="voice-orchestrator",
user_id=user_id,
context={"voice_session_id": voice_session_id},
metadata=metadata
)
# Create conversation context
self.context_manager.create_context(
session_id=session.session_id,
system_prompt=self._get_system_prompt(),
max_messages=50
)
# Start heartbeat for this session
heartbeat_client = HeartbeatClient(
session_id=session.session_id,
monitor=self.heartbeat,
interval_seconds=10
)
await heartbeat_client.start()
# Register heartbeat for monitoring
self.heartbeat.register(session.session_id, "voice-orchestrator")
# Store references
self._voice_sessions[voice_session_id] = session
self._heartbeat_clients[session.session_id] = heartbeat_client
logger.info(
"Created agent session",
session_id=session.session_id[:8],
voice_session_id=voice_session_id
)
return session
async def get_session(
self,
voice_session_id: str
) -> Optional[AgentSession]:
"""Gets the agent session for a voice session"""
return self._voice_sessions.get(voice_session_id)
async def end_session(self, voice_session_id: str) -> None:
"""
Ends an agent session.
Args:
voice_session_id: The voice session ID
"""
session = self._voice_sessions.get(voice_session_id)
if not session:
return
# Stop heartbeat
if session.session_id in self._heartbeat_clients:
await self._heartbeat_clients[session.session_id].stop()
del self._heartbeat_clients[session.session_id]
# Unregister from heartbeat monitor
self.heartbeat.unregister(session.session_id)
# Mark session as completed
session.complete()
await self.session_manager.update_session(session)
# Clean up
del self._voice_sessions[voice_session_id]
logger.info(
"Ended agent session",
session_id=session.session_id[:8],
duration_seconds=session.get_duration().total_seconds()
)
async def queue_task(self, task: Task) -> None:
"""
Queue a task with session checkpointing.
Extends parent to add checkpoint for recovery.
"""
# Get session for this task
session = self._voice_sessions.get(task.session_id)
if session:
# Checkpoint before queueing
session.checkpoint("task_queued", {
"task_id": task.id,
"task_type": task.type.value,
"parameters": task.parameters
})
await self.session_manager.update_session(session)
# Call parent implementation
await super().queue_task(task)
async def process_task(self, task: Task) -> None:
"""
Process a task with enhanced routing and quality checks.
Extends parent to:
- Route complex tasks to specialized agents
- Run quality checks via BQAS
- Store results in memory for learning
"""
session = self._voice_sessions.get(task.session_id)
if session:
session.checkpoint("task_processing", {
"task_id": task.id
})
# Check if this task should be routed to a specialized agent
if self._needs_specialized_agent(task):
await self._route_to_agent(task, session)
else:
# Use parent implementation for simple tasks
await super().process_task(task)
# Run quality check on result
if task.result_ref and self._needs_quality_check(task):
await self._run_quality_check(task, session)
# Store in memory for learning
if task.state == TaskState.READY and task.result_ref:
await self._store_task_result(task)
if session:
session.checkpoint("task_completed", {
"task_id": task.id,
"state": task.state.value
})
await self.session_manager.update_session(session)
def _needs_specialized_agent(self, task: Task) -> bool:
"""Check if task needs routing to a specialized agent"""
from models.task import TaskType
# Tasks that benefit from specialized agents
specialized_types = [
TaskType.PARENT_LETTER, # Could use grader for tone
TaskType.FEEDBACK_SUGGEST, # Quality judge for appropriateness
]
return task.type in specialized_types
def _needs_quality_check(self, task: Task) -> bool:
"""Check if task result needs quality validation"""
from models.task import TaskType
# Tasks that generate content should be checked
content_types = [
TaskType.PARENT_LETTER,
TaskType.CLASS_MESSAGE,
TaskType.FEEDBACK_SUGGEST,
TaskType.WORKSHEET_GENERATE,
]
return task.type in content_types
async def _route_to_agent(
self,
task: Task,
session: Optional[AgentSession]
) -> None:
"""Routes a task to a specialized agent"""
# Determine target agent
intent = f"task_{task.type.value}"
routing_result = await self.task_router.route(
intent=intent,
context={"task": task.parameters},
strategy=RoutingStrategy.LEAST_LOADED
)
if not routing_result.success:
# Fall back to local processing
logger.warning(
"No agent available for task, using local processing",
task_id=task.id[:8],
reason=routing_result.reason
)
await super().process_task(task)
return
# Send to agent via message bus
try:
response = await self.message_bus.request(
AgentMessage(
sender="voice-orchestrator",
receiver=routing_result.agent_id,
message_type=f"process_{task.type.value}",
payload={
"task_id": task.id,
"task_type": task.type.value,
"parameters": task.parameters,
"session_id": session.session_id if session else None
},
priority=MessagePriority.NORMAL
),
timeout=30.0
)
task.result_ref = response.get("result", "")
task.transition_to(TaskState.READY, "agent_processed")
except asyncio.TimeoutError:
logger.error(
"Agent timeout, falling back to local",
task_id=task.id[:8],
agent=routing_result.agent_id
)
await super().process_task(task)
async def _run_quality_check(
self,
task: Task,
session: Optional[AgentSession]
) -> None:
"""Runs quality check on task result via quality judge"""
try:
response = await self.message_bus.request(
AgentMessage(
sender="voice-orchestrator",
receiver="quality-judge",
message_type="evaluate_response",
payload={
"task_id": task.id,
"task_type": task.type.value,
"response": task.result_ref,
"context": task.parameters
},
priority=MessagePriority.NORMAL
),
timeout=10.0
)
quality_score = response.get("composite_score", 0)
if quality_score < 60:
# Mark for review
task.error_message = f"Quality check failed: {quality_score}"
logger.warning(
"Task failed quality check",
task_id=task.id[:8],
score=quality_score
)
except asyncio.TimeoutError:
# Quality check timeout is non-fatal
logger.warning(
"Quality check timeout",
task_id=task.id[:8]
)
async def _store_task_result(self, task: Task) -> None:
"""Stores task result in memory for learning"""
await self.memory_store.remember(
key=f"task:{task.type.value}:{task.id}",
value={
"result": task.result_ref,
"parameters": task.parameters,
"completed_at": datetime.utcnow().isoformat()
},
agent_id="voice-orchestrator",
ttl_days=30
)
async def _handle_agent_message(
self,
message: AgentMessage
) -> Optional[Dict[str, Any]]:
"""Handles incoming messages from other agents"""
logger.debug(
"Received agent message",
sender=message.sender,
type=message.message_type
)
if message.message_type == "task_status_update":
# Handle task status updates
task_id = message.payload.get("task_id")
if task_id in self._tasks:
task = self._tasks[task_id]
new_state = message.payload.get("state")
if new_state:
task.transition_to(TaskState(new_state), "agent_update")
return None
def _get_system_prompt(self) -> str:
"""Returns the system prompt for the voice assistant"""
return """Du bist ein hilfreicher Assistent für Lehrer in der Breakpilot-App.
Deine Aufgaben:
- Hilf beim Erstellen von Arbeitsblättern
- Unterstütze bei der Korrektur
- Erstelle Elternbriefe und Klassennachrichten
- Dokumentiere Beobachtungen und Erinnerungen
Halte dich kurz und präzise. Nutze einfache, klare Sprache.
Bei Unklarheiten frage nach."""
# Recovery methods
async def recover_session(
self,
voice_session_id: str,
session_id: str
) -> Optional[AgentSession]:
"""
Recovers a session from checkpoint.
Args:
voice_session_id: The voice session ID
session_id: The agent session ID to recover
Returns:
The recovered session or None
"""
session = await self.session_manager.get_session(session_id)
if not session:
logger.warning(
"Session not found for recovery",
session_id=session_id
)
return None
if session.state != SessionState.ACTIVE:
logger.warning(
"Session not active for recovery",
session_id=session_id,
state=session.state.value
)
return None
# Resume session
session.resume()
# Restore heartbeat
heartbeat_client = HeartbeatClient(
session_id=session.session_id,
monitor=self.heartbeat,
interval_seconds=10
)
await heartbeat_client.start()
self.heartbeat.register(session.session_id, "voice-orchestrator")
# Store references
self._voice_sessions[voice_session_id] = session
self._heartbeat_clients[session.session_id] = heartbeat_client
# Recover pending tasks from checkpoints
await self._recover_pending_tasks(session)
logger.info(
"Recovered session",
session_id=session.session_id[:8],
checkpoints=len(session.checkpoints)
)
return session
async def _recover_pending_tasks(self, session: AgentSession) -> None:
"""Recovers pending tasks from session checkpoints"""
for checkpoint in reversed(session.checkpoints):
if checkpoint.name == "task_queued":
task_id = checkpoint.data.get("task_id")
if task_id and task_id in self._tasks:
task = self._tasks[task_id]
if task.state == TaskState.QUEUED:
# Re-process queued task
await self.process_task(task)
logger.info(
"Recovered pending task",
task_id=task_id[:8]
)

View File

@@ -0,0 +1,248 @@
"""
Fallback LLM Client - Ollama Integration
Text-only fallback when PersonaPlex is not available
Used in development on Mac Mini with:
- qwen2.5:32b for conversation
- Local processing (DSGVO-konform)
"""
import structlog
import httpx
from typing import Optional, List, Dict, Any
from config import settings
logger = structlog.get_logger(__name__)
class FallbackLLMClient:
"""
Ollama LLM client for text-only processing.
When PersonaPlex is not available (development mode),
this client provides:
- Intent detection (text-based)
- Response generation
- Task execution assistance
Note: Audio transcription requires a separate ASR service
(e.g., Whisper) when using this fallback.
"""
def __init__(self):
self._base_url = settings.ollama_base_url
self._model = settings.ollama_voice_model
self._timeout = settings.ollama_timeout
self._client: Optional[httpx.AsyncClient] = None
async def _get_client(self) -> httpx.AsyncClient:
"""Get or create HTTP client."""
if self._client is None:
self._client = httpx.AsyncClient(timeout=self._timeout)
return self._client
async def generate(
self,
prompt: str,
system_prompt: Optional[str] = None,
temperature: float = 0.7,
max_tokens: int = 500,
) -> str:
"""
Generate text completion.
Args:
prompt: User prompt
system_prompt: Optional system instructions
temperature: Sampling temperature
max_tokens: Maximum tokens to generate
Returns:
Generated text
"""
if settings.fallback_llm_provider == "none":
logger.warning("No LLM provider configured")
return "LLM nicht verfügbar"
client = await self._get_client()
# Build messages
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
try:
response = await client.post(
f"{self._base_url}/api/chat",
json={
"model": self._model,
"messages": messages,
"options": {
"temperature": temperature,
"num_predict": max_tokens,
},
"stream": False,
},
)
response.raise_for_status()
data = response.json()
return data.get("message", {}).get("content", "")
except httpx.HTTPError as e:
logger.error("Ollama request failed", error=str(e))
return "Fehler bei der Verarbeitung"
except Exception as e:
logger.error("Unexpected error", error=str(e))
return "Unerwarteter Fehler"
async def detect_intent(self, text: str) -> Dict[str, Any]:
"""
Detect intent from text using LLM.
Returns:
{
"type": "student_observation" | "reminder" | ...,
"confidence": 0.0-1.0,
"parameters": {...},
"is_actionable": bool
}
"""
system_prompt = """Du bist ein Intent-Detektor für Lehrer-Sprachbefehle.
Analysiere den Text und bestimme die Absicht.
Mögliche Intents:
- student_observation: Beobachtung zu einem Schüler
- reminder: Erinnerung an etwas
- homework_check: Hausaufgaben kontrollieren
- conference_topic: Thema für Konferenz
- correction_note: Notiz zur Korrektur
- worksheet_generate: Arbeitsblatt erstellen
- worksheet_differentiate: Differenzierung
- quick_activity: Schnelle Aktivität
- quiz_generate: Quiz erstellen
- parent_letter: Elternbrief
- class_message: Nachricht an Klasse
- canvas_edit: Canvas bearbeiten
- canvas_layout: Layout ändern
- operator_checklist: Operatoren-Checkliste
- eh_passage: EH-Passage suchen
- feedback_suggest: Feedback vorschlagen
- reminder_schedule: Erinnerung planen
- task_summary: Aufgaben zusammenfassen
- unknown: Unbekannt
Antworte NUR mit JSON:
{"type": "intent_name", "confidence": 0.0-1.0, "parameters": {...}, "is_actionable": true/false}"""
result = await self.generate(
prompt=f"Text: {text}",
system_prompt=system_prompt,
temperature=0.1,
max_tokens=200,
)
try:
# Parse JSON from response
import json
# Find JSON in response
start = result.find("{")
end = result.rfind("}") + 1
if start >= 0 and end > start:
return json.loads(result[start:end])
except Exception as e:
logger.warning("Intent parsing failed", error=str(e))
return {
"type": "unknown",
"confidence": 0.0,
"parameters": {},
"is_actionable": False,
}
async def process_audio_description(self, audio_data: bytes) -> str:
"""
Process audio by describing it (placeholder for ASR).
In production, this would use Whisper or similar.
For MVP, this returns a placeholder.
"""
# Calculate audio duration
samples = len(audio_data) // 2 # 16-bit = 2 bytes
duration_sec = samples / settings.audio_sample_rate
logger.debug(
"Audio received (no ASR in fallback mode)",
duration_sec=duration_sec,
bytes=len(audio_data),
)
# Placeholder - in production, integrate with Whisper
return ""
async def chat(
self,
messages: List[Dict[str, str]],
temperature: float = 0.7,
) -> str:
"""
Multi-turn conversation.
Args:
messages: List of {"role": "user"|"assistant", "content": "..."}
temperature: Sampling temperature
Returns:
Assistant response
"""
if settings.fallback_llm_provider == "none":
return "LLM nicht verfügbar"
client = await self._get_client()
# Add system prompt
system_prompt = """Du bist Breakpilot, ein hilfreicher Assistent für Lehrer.
Du hilfst bei:
- Notizen und Beobachtungen
- Unterrichtsvorbereitung
- Elternkommunikation
- Korrekturunterstützung
Antworte kurz und präzise. Halte Antworten unter 100 Wörtern."""
full_messages = [{"role": "system", "content": system_prompt}] + messages
try:
response = await client.post(
f"{self._base_url}/api/chat",
json={
"model": self._model,
"messages": full_messages,
"options": {
"temperature": temperature,
"num_predict": 300,
},
"stream": False,
},
)
response.raise_for_status()
data = response.json()
return data.get("message", {}).get("content", "")
except Exception as e:
logger.error("Chat failed", error=str(e))
return "Entschuldigung, ein Fehler ist aufgetreten."
async def health_check(self) -> bool:
"""Check if Ollama is available."""
if settings.fallback_llm_provider == "none":
return False
try:
client = await self._get_client()
response = await client.get(f"{self._base_url}/api/tags")
return response.status_code == 200
except Exception:
return False

View File

@@ -0,0 +1,368 @@
"""
Intent Router - Voice Command Classification
Routes detected intents to appropriate handlers
Supports all use case groups:
1. Kurze Notizen (Autofahrt)
2. Arbeitsblatt-Generierung (Zug)
3. Situatives Arbeiten (Schule)
4. Canvas-Editor
5. Korrektur & RAG-Assistenz
6. Follow-up über Tage
"""
import structlog
import re
from typing import Optional, List, Dict, Any
from dataclasses import dataclass
from config import settings
from models.task import TaskType
from models.session import TranscriptMessage
logger = structlog.get_logger(__name__)
@dataclass
class DetectedIntent:
"""Detected intent with confidence and parameters."""
type: TaskType
confidence: float
parameters: Dict[str, Any]
is_actionable: bool
# Pattern-based intent detection rules
INTENT_PATTERNS = {
# Gruppe 1: Kurze Notizen
TaskType.STUDENT_OBSERVATION: [
r"notiz\s+zu\s+(\w+)",
r"beobachtung\s+(\w+)",
r"(\w+)\s+hat\s+(gestoert|gestört)",
r"(\w+)\s+braucht",
],
TaskType.REMINDER: [
r"erinner\s+mich",
r"morgen\s+(\d+:\d+)",
r"reminder",
r"nicht\s+vergessen",
],
TaskType.HOMEWORK_CHECK: [
r"hausaufgabe\s+kontrollieren",
r"(\w+)\s+mathe\s+hausaufgabe",
r"ha\s+check",
],
TaskType.CONFERENCE_TOPIC: [
r"thema\s+(lehrerkonferenz|konferenz)",
r"fuer\s+die\s+konferenz",
r"konferenzthema",
],
TaskType.CORRECTION_NOTE: [
r"aufgabe\s+(\d+)",
r"haeufiger\s+fehler",
r"naechste\s+stunde\s+erklaeren",
r"korrekturnotiz",
],
# Gruppe 2: Arbeitsblatt-Generierung
TaskType.WORKSHEET_GENERATE: [
r"arbeitsblatt\s+(erstellen|machen|generieren)",
r"nimm\s+vokabeln",
r"mach\s+(\d+)\s+lueckentexte",
r"uebungsblatt",
],
TaskType.WORKSHEET_DIFFERENTIATE: [
r"differenzierung",
r"zwei\s+schwierigkeitsstufen",
r"basis\s+und\s+plus",
r"leichtere\s+version",
],
# Gruppe 3: Situatives Arbeiten
TaskType.QUICK_ACTIVITY: [
r"(\d+)\s+minuten\s+einstieg",
r"schnelle\s+aktivitaet",
r"warming\s*up",
r"einstiegsaufgabe",
],
TaskType.QUIZ_GENERATE: [
r"vokabeltest",
r"quiz\s+(erstellen|generieren)",
r"(\d+)-minuten\s+test",
r"kurzer\s+test",
],
TaskType.PARENT_LETTER: [
r"elternbrief\s+wegen",
r"elternbrief",
r"brief\s+an\s+eltern",
r"wegen\s+wiederholter?\s+(stoerungen|störungen)",
r"wegen\s+(stoerungen|störungen)",
r"mitteilung\s+an\s+eltern",
],
TaskType.CLASS_MESSAGE: [
r"nachricht\s+an\s+(\d+\w+)",
r"klassen\s*nachricht",
r"info\s+an\s+die\s+klasse",
],
# Gruppe 4: Canvas-Editor
TaskType.CANVAS_EDIT: [
r"ueberschriften?\s+(groesser|kleiner|größer)",
r"bild\s+(\d+)\s+(nach|auf)",
r"pfeil\s+(von|auf)",
r"kasten\s+(hinzufuegen|einfügen)",
],
TaskType.CANVAS_LAYOUT: [
r"auf\s+eine\s+seite",
r"drucklayout\s+a4",
r"layout\s+(aendern|ändern)",
r"alles\s+auf\s+a4",
],
# Gruppe 5: Korrektur & RAG
TaskType.OPERATOR_CHECKLIST: [
r"operatoren[-\s]*checkliste",
r"welche\s+operatoren",
r"operatoren\s+fuer\s+diese\s+aufgabe",
],
TaskType.EH_PASSAGE: [
r"erwartungshorizont",
r"eh\s*passage",
r"was\s+steht\s+im\s+eh",
],
TaskType.FEEDBACK_SUGGEST: [
r"feedback\s*(vorschlag|vorschlagen)",
r"wie\s+formuliere\s+ich",
r"rueckmeldung\s+geben",
],
# Gruppe 6: Follow-up
TaskType.REMINDER_SCHEDULE: [
r"erinner\s+mich\s+morgen",
r"in\s+(\d+)\s+(stunden|tagen)",
r"naechste\s+woche",
],
TaskType.TASK_SUMMARY: [
r"offenen?\s+(aufgaben|tasks)",
r"was\s+steht\s+noch\s+an",
r"zusammenfassung",
r"fasse.+zusammen",
r"diese[rn]?\s+woche",
],
}
class IntentRouter:
"""
Routes voice commands to appropriate task types.
Uses a combination of:
1. Pattern matching for common phrases
2. LLM-based classification for complex queries
3. Context from previous messages for disambiguation
"""
def __init__(self):
self._compiled_patterns: Dict[TaskType, List[re.Pattern]] = {}
self._compile_patterns()
def _compile_patterns(self):
"""Pre-compile regex patterns for performance."""
for task_type, patterns in INTENT_PATTERNS.items():
self._compiled_patterns[task_type] = [
re.compile(pattern, re.IGNORECASE | re.UNICODE)
for pattern in patterns
]
async def detect_intent(
self,
text: str,
context: List[TranscriptMessage] = None,
) -> Optional[DetectedIntent]:
"""
Detect intent from text with optional context.
Args:
text: Input text (transcript)
context: Previous messages for disambiguation
Returns:
DetectedIntent or None if no clear intent
"""
# Normalize text
normalized = self._normalize_text(text)
# Try pattern matching first
pattern_result = self._pattern_match(normalized)
if pattern_result and pattern_result.confidence > 0.6:
logger.info(
"Intent detected via pattern",
type=pattern_result.type.value,
confidence=pattern_result.confidence,
)
return pattern_result
# Fall back to LLM classification
if settings.fallback_llm_provider != "none":
llm_result = await self._llm_classify(normalized, context)
if llm_result and llm_result.confidence > 0.5:
logger.info(
"Intent detected via LLM",
type=llm_result.type.value,
confidence=llm_result.confidence,
)
return llm_result
# Check for context-based disambiguation
if context:
context_result = self._context_disambiguate(normalized, context)
if context_result:
logger.info(
"Intent detected via context",
type=context_result.type.value,
)
return context_result
logger.debug("No intent detected", text=text[:50])
return None
def _normalize_text(self, text: str) -> str:
"""Normalize text for matching."""
# Convert umlauts
text = text.lower()
text = text.replace("ä", "ae").replace("ö", "oe").replace("ü", "ue")
text = text.replace("ß", "ss")
# Remove extra whitespace
text = " ".join(text.split())
return text
def _pattern_match(self, text: str) -> Optional[DetectedIntent]:
"""Match text against known patterns."""
best_match = None
best_confidence = 0.0
for task_type, patterns in self._compiled_patterns.items():
for pattern in patterns:
match = pattern.search(text)
if match:
# Calculate confidence based on match quality
match_ratio = len(match.group()) / len(text)
confidence = min(0.95, 0.6 + match_ratio * 0.4)
if confidence > best_confidence:
# Extract parameters from groups
parameters = self._extract_parameters(task_type, match, text)
best_match = DetectedIntent(
type=task_type,
confidence=confidence,
parameters=parameters,
is_actionable=self._is_actionable(task_type),
)
best_confidence = confidence
return best_match
def _extract_parameters(
self,
task_type: TaskType,
match: re.Match,
full_text: str,
) -> Dict[str, Any]:
"""Extract parameters from regex match."""
params = {}
# Extract named groups or positional groups
if match.groups():
groups = match.groups()
# Task-specific parameter extraction
if task_type == TaskType.STUDENT_OBSERVATION:
params["student_name"] = groups[0] if groups else None
elif task_type == TaskType.HOMEWORK_CHECK:
params["subject"] = "mathe" if "mathe" in full_text else None
elif task_type == TaskType.QUICK_ACTIVITY:
params["duration_minutes"] = int(groups[0]) if groups else 10
elif task_type == TaskType.QUIZ_GENERATE:
params["duration_minutes"] = int(groups[0]) if groups and groups[0].isdigit() else 10
elif task_type == TaskType.CLASS_MESSAGE:
params["class_name"] = groups[0] if groups else None
# Extract time references
time_match = re.search(r"(\d{1,2}):?(\d{2})?", full_text)
if time_match:
params["time"] = time_match.group()
# Extract content after colon
colon_match = re.search(r":\s*(.+)$", full_text)
if colon_match:
params["content"] = colon_match.group(1).strip()
return params
def _is_actionable(self, task_type: TaskType) -> bool:
"""Check if intent type creates an actionable task."""
# All task types are actionable except queries
query_types = [
TaskType.OPERATOR_CHECKLIST,
TaskType.EH_PASSAGE,
TaskType.TASK_SUMMARY,
]
return task_type not in query_types
async def _llm_classify(
self,
text: str,
context: List[TranscriptMessage] = None,
) -> Optional[DetectedIntent]:
"""Use LLM for intent classification."""
from services.fallback_llm_client import FallbackLLMClient
llm = FallbackLLMClient()
result = await llm.detect_intent(text)
if result.get("type") == "unknown":
return None
try:
task_type = TaskType(result["type"])
return DetectedIntent(
type=task_type,
confidence=result.get("confidence", 0.5),
parameters=result.get("parameters", {}),
is_actionable=result.get("is_actionable", True),
)
except ValueError:
logger.warning("Unknown task type from LLM", type=result.get("type"))
return None
def _context_disambiguate(
self,
text: str,
context: List[TranscriptMessage],
) -> Optional[DetectedIntent]:
"""Disambiguate intent using conversation context."""
if not context:
return None
# Look for continuation patterns
continuation_words = ["ja", "genau", "richtig", "okay", "mach das", "bitte"]
if any(word in text.lower() for word in continuation_words):
# Find the last assistant message with a suggestion
for msg in reversed(context):
if msg.role == "assistant" and msg.intent:
try:
return DetectedIntent(
type=TaskType(msg.intent),
confidence=0.6,
parameters={},
is_actionable=True,
)
except ValueError:
pass
return None

View File

@@ -0,0 +1,286 @@
"""
PersonaPlex-7B Client
Full-Duplex Speech-to-Speech with NVIDIA's PersonaPlex model
Features:
- Full-duplex audio streaming
- 80ms latency target
- 24kHz audio (Mimi codec compatible)
- German language support
- Teacher persona customization
"""
import structlog
import asyncio
import json
from typing import Optional, AsyncIterator
import websockets
from websockets.client import WebSocketClientProtocol
from config import settings
logger = structlog.get_logger(__name__)
class PersonaPlexClient:
"""
WebSocket client for PersonaPlex-7B Full-Duplex model.
PersonaPlex is NVIDIA's speech-to-speech model that provides:
- Real-time transcription
- Intent understanding
- Natural language responses
- Voice synthesis
In development mode, this falls back to text-only processing.
"""
def __init__(self):
self._ws: Optional[WebSocketClientProtocol] = None
self._connected = False
self._persona_config: Optional[dict] = None
async def connect(self) -> bool:
"""
Connect to PersonaPlex WebSocket server.
Returns True if connected, False if in fallback mode.
"""
if not settings.use_personaplex:
logger.info("PersonaPlex disabled, using fallback mode")
return False
try:
self._ws = await websockets.connect(
settings.personaplex_ws_url,
ping_interval=20,
ping_timeout=10,
)
self._connected = True
# Send persona configuration
if self._persona_config:
await self._ws.send(json.dumps({
"type": "config",
"persona": self._persona_config,
}))
logger.info("Connected to PersonaPlex")
return True
except Exception as e:
logger.warning("PersonaPlex connection failed, using fallback", error=str(e))
self._connected = False
return False
async def disconnect(self):
"""Disconnect from PersonaPlex."""
if self._ws:
await self._ws.close()
self._ws = None
self._connected = False
def load_persona(self, persona_path: str = "personas/lehrer_persona.json"):
"""
Load persona configuration for voice customization.
"""
try:
with open(persona_path, 'r') as f:
self._persona_config = json.load(f)
logger.info("Loaded persona", path=persona_path)
except FileNotFoundError:
logger.warning("Persona file not found, using defaults", path=persona_path)
self._persona_config = self._default_persona()
def _default_persona(self) -> dict:
"""Default teacher persona configuration."""
return {
"name": "Breakpilot Assistant",
"language": "de-DE",
"voice": {
"gender": "neutral",
"pitch": "medium",
"speed": 1.0,
},
"style": {
"formal": True,
"friendly": True,
"concise": True,
},
"domain_knowledge": [
"education",
"teaching",
"school_administration",
"student_assessment",
],
}
async def transcribe(self, audio_data: bytes) -> str:
"""
Transcribe audio to text.
Args:
audio_data: PCM Int16 audio at 24kHz
Returns:
Transcribed text
"""
if not self._connected:
# Fallback: return empty (audio not processed)
logger.debug("PersonaPlex not connected, skipping transcription")
return ""
try:
# Send audio for transcription
await self._ws.send(audio_data)
# Wait for transcription response
response = await asyncio.wait_for(
self._ws.recv(),
timeout=settings.personaplex_timeout,
)
if isinstance(response, str):
data = json.loads(response)
if data.get("type") == "transcript":
return data.get("text", "")
return ""
except asyncio.TimeoutError:
logger.warning("Transcription timeout")
return ""
except Exception as e:
logger.error("Transcription failed", error=str(e))
return ""
async def synthesize(self, text: str) -> bytes:
"""
Synthesize text to speech.
Args:
text: Text to synthesize
Returns:
PCM Int16 audio at 24kHz
"""
if not self._connected:
logger.debug("PersonaPlex not connected, skipping synthesis")
return b""
try:
# Request synthesis
await self._ws.send(json.dumps({
"type": "synthesize",
"text": text,
}))
# Collect audio chunks
audio_chunks = []
while True:
response = await asyncio.wait_for(
self._ws.recv(),
timeout=settings.personaplex_timeout,
)
if isinstance(response, bytes):
audio_chunks.append(response)
elif isinstance(response, str):
data = json.loads(response)
if data.get("type") == "synthesis_complete":
break
if data.get("type") == "error":
logger.error("Synthesis error", error=data.get("message"))
break
return b"".join(audio_chunks)
except asyncio.TimeoutError:
logger.warning("Synthesis timeout")
return b""
except Exception as e:
logger.error("Synthesis failed", error=str(e))
return b""
async def stream_conversation(
self,
audio_stream: AsyncIterator[bytes],
) -> AsyncIterator[dict]:
"""
Full-duplex conversation streaming.
Yields dictionaries with:
- type: "transcript" | "response_text" | "response_audio" | "intent"
- content: The actual content
"""
if not self._connected:
logger.debug("PersonaPlex not connected, skipping stream")
return
try:
# Start streaming task
async def send_audio():
async for chunk in audio_stream:
if self._ws:
await self._ws.send(chunk)
# Start receiving task
send_task = asyncio.create_task(send_audio())
try:
while True:
response = await asyncio.wait_for(
self._ws.recv(),
timeout=settings.personaplex_timeout,
)
if isinstance(response, bytes):
yield {
"type": "response_audio",
"content": response,
}
elif isinstance(response, str):
data = json.loads(response)
yield data
if data.get("type") == "end_of_turn":
break
finally:
send_task.cancel()
except asyncio.TimeoutError:
logger.warning("Stream timeout")
except Exception as e:
logger.error("Stream failed", error=str(e))
async def detect_intent(self, text: str) -> Optional[dict]:
"""
Detect intent from text using PersonaPlex.
Returns intent dict or None.
"""
if not self._connected:
return None
try:
await self._ws.send(json.dumps({
"type": "detect_intent",
"text": text,
}))
response = await asyncio.wait_for(
self._ws.recv(),
timeout=settings.personaplex_timeout,
)
if isinstance(response, str):
data = json.loads(response)
if data.get("type") == "intent":
return data
return None
except Exception as e:
logger.error("Intent detection failed", error=str(e))
return None

View File

@@ -0,0 +1,382 @@
"""
Task Orchestrator - Task State Machine
Manages task lifecycle and routes to Breakpilot modules
The TaskOrchestrator is the agent orchestration layer that:
1. Receives intents from voice input
2. Creates and manages tasks
3. Routes to appropriate Breakpilot modules
4. Maintains conversation context
5. Handles follow-up queries
Note: This is a safe, internal task router with no shell access,
no email capabilities, and no external API access beyond internal services.
"""
import structlog
import httpx
from typing import Optional, List, Dict, Any
from datetime import datetime, timedelta
from config import settings
from models.task import Task, TaskState, TaskType, is_valid_transition
from models.session import TranscriptMessage
logger = structlog.get_logger(__name__)
class Intent:
"""Detected intent from voice input."""
def __init__(
self,
type: TaskType,
confidence: float,
parameters: Dict[str, Any],
is_actionable: bool = True,
):
self.type = type
self.confidence = confidence
self.parameters = parameters
self.is_actionable = is_actionable
class TaskOrchestrator:
"""
Task orchestration and state machine management.
Handles the full lifecycle of voice-initiated tasks:
1. Intent -> Task creation
2. Task queuing and execution
3. Result handling
4. Follow-up context
Security: This orchestrator only routes to internal Breakpilot services
via HTTP. It has NO access to shell commands, emails, calendars, or
external APIs.
"""
def __init__(self):
self._tasks: Dict[str, Task] = {}
self._session_tasks: Dict[str, List[str]] = {} # session_id -> task_ids
self._http_client: Optional[httpx.AsyncClient] = None
async def _get_client(self) -> httpx.AsyncClient:
"""Get or create HTTP client."""
if self._http_client is None:
self._http_client = httpx.AsyncClient(timeout=30.0)
return self._http_client
async def queue_task(self, task: Task):
"""
Queue a task for processing.
Transitions from DRAFT to QUEUED.
"""
if task.state != TaskState.DRAFT:
logger.warning("Task not in DRAFT state", task_id=task.id[:8])
return
task.transition_to(TaskState.QUEUED, "queued_for_processing")
# Store task
self._tasks[task.id] = task
# Add to session tasks
if task.session_id not in self._session_tasks:
self._session_tasks[task.session_id] = []
self._session_tasks[task.session_id].append(task.id)
logger.info(
"Task queued",
task_id=task.id[:8],
type=task.type.value,
)
# Auto-process certain task types
auto_process_types = [
TaskType.STUDENT_OBSERVATION,
TaskType.REMINDER,
TaskType.HOMEWORK_CHECK,
]
if task.type in auto_process_types:
await self.process_task(task)
async def process_task(self, task: Task):
"""
Process a queued task.
Routes to appropriate Breakpilot module.
"""
if task.state != TaskState.QUEUED:
logger.warning("Task not in QUEUED state", task_id=task.id[:8])
return
task.transition_to(TaskState.RUNNING, "processing_started")
try:
# Route to appropriate handler
result = await self._route_task(task)
# Store result
task.result_ref = result
# Transition to READY
task.transition_to(TaskState.READY, "processing_complete")
logger.info(
"Task processed",
task_id=task.id[:8],
type=task.type.value,
)
except Exception as e:
logger.error("Task processing failed", task_id=task.id[:8], error=str(e))
task.error_message = str(e)
task.transition_to(TaskState.READY, "processing_failed")
async def _route_task(self, task: Task) -> str:
"""
Route task to appropriate Breakpilot module.
"""
client = await self._get_client()
# Task type to endpoint mapping
routes = {
# Worksheet generation
TaskType.WORKSHEET_GENERATE: f"{settings.klausur_service_url}/api/v1/worksheets/generate",
TaskType.WORKSHEET_DIFFERENTIATE: f"{settings.klausur_service_url}/api/v1/worksheets/differentiate",
# Quick activities
TaskType.QUICK_ACTIVITY: f"{settings.klausur_service_url}/api/v1/activities/generate",
TaskType.QUIZ_GENERATE: f"{settings.klausur_service_url}/api/v1/quizzes/generate",
# Korrektur assistance
TaskType.OPERATOR_CHECKLIST: f"{settings.klausur_service_url}/api/v1/corrections/operators",
TaskType.EH_PASSAGE: f"{settings.klausur_service_url}/api/v1/corrections/eh-passage",
TaskType.FEEDBACK_SUGGEST: f"{settings.klausur_service_url}/api/v1/corrections/feedback",
}
# Check if this task type needs API routing
if task.type in routes:
try:
response = await client.post(
routes[task.type],
json={
"task_id": task.id,
"namespace_id": task.namespace_id,
"parameters": task.parameters,
},
timeout=settings.ollama_timeout,
)
response.raise_for_status()
return response.json().get("result", "")
except httpx.HTTPError as e:
logger.error("API call failed", url=routes[task.type], error=str(e))
raise
# Handle local tasks (no API call needed)
if task.type in [TaskType.STUDENT_OBSERVATION, TaskType.REMINDER, TaskType.HOMEWORK_CHECK]:
return await self._handle_note_task(task)
if task.type in [TaskType.CONFERENCE_TOPIC, TaskType.CORRECTION_NOTE]:
return await self._handle_note_task(task)
if task.type == TaskType.PARENT_LETTER:
return await self._generate_parent_letter(task)
if task.type == TaskType.CLASS_MESSAGE:
return await self._generate_class_message(task)
if task.type in [TaskType.CANVAS_EDIT, TaskType.CANVAS_LAYOUT]:
return await self._handle_canvas_command(task)
if task.type == TaskType.REMINDER_SCHEDULE:
return await self._schedule_reminder(task)
if task.type == TaskType.TASK_SUMMARY:
return await self._generate_task_summary(task)
logger.warning("Unknown task type", task_type=task.type.value)
return "Task type not implemented"
async def _handle_note_task(self, task: Task) -> str:
"""Handle simple note/observation tasks."""
# These are stored encrypted, no further processing needed
return "Notiz gespeichert"
async def _generate_parent_letter(self, task: Task) -> str:
"""Generate a parent letter using LLM."""
from services.fallback_llm_client import FallbackLLMClient
llm = FallbackLLMClient()
prompt = f"""Erstelle einen neutralen, professionellen Elternbrief basierend auf:
Anlass: {task.parameters.get('reason', 'Allgemeine Information')}
Kontext: {task.parameters.get('context', '')}
Der Brief soll:
- Sachlich und respektvoll formuliert sein
- Keine Schuldzuweisungen enthalten
- Konstruktiv auf Lösungen ausgerichtet sein
- In der Ich-Form aus Lehrersicht geschrieben sein
Bitte nur den Brieftext ausgeben, ohne Metakommentare."""
result = await llm.generate(prompt)
return result
async def _generate_class_message(self, task: Task) -> str:
"""Generate a class message."""
from services.fallback_llm_client import FallbackLLMClient
llm = FallbackLLMClient()
prompt = f"""Erstelle eine kurze Klassennachricht:
Inhalt: {task.parameters.get('content', '')}
Klasse: {task.parameters.get('class_ref', 'Klasse')}
Die Nachricht soll:
- Kurz und klar formuliert sein
- Freundlich aber verbindlich klingen
- Alle wichtigen Informationen enthalten
Nur die Nachricht ausgeben."""
result = await llm.generate(prompt)
return result
async def _handle_canvas_command(self, task: Task) -> str:
"""Handle Canvas editor commands."""
# Parse canvas commands and generate JSON instructions
command = task.parameters.get('command', '')
# Map natural language to Canvas actions
canvas_actions = []
if 'groesser' in command.lower() or 'größer' in command.lower():
canvas_actions.append({"action": "resize", "target": "headings", "scale": 1.2})
if 'kleiner' in command.lower():
canvas_actions.append({"action": "resize", "target": "spacing", "scale": 0.8})
if 'links' in command.lower():
canvas_actions.append({"action": "move", "direction": "left"})
if 'rechts' in command.lower():
canvas_actions.append({"action": "move", "direction": "right"})
if 'a4' in command.lower() or 'drucklayout' in command.lower():
canvas_actions.append({"action": "layout", "format": "A4"})
return str(canvas_actions)
async def _schedule_reminder(self, task: Task) -> str:
"""Schedule a reminder for later."""
# In production, this would use a scheduler service
reminder_time = task.parameters.get('time', 'tomorrow')
reminder_content = task.parameters.get('content', '')
return f"Erinnerung geplant für {reminder_time}: {reminder_content}"
async def _generate_task_summary(self, task: Task) -> str:
"""Generate a summary of pending tasks."""
session_tasks = self._session_tasks.get(task.session_id, [])
pending = []
for task_id in session_tasks:
t = self._tasks.get(task_id)
if t and t.state not in [TaskState.COMPLETED, TaskState.EXPIRED]:
pending.append(f"- {t.type.value}: {t.state.value}")
if not pending:
return "Keine offenen Aufgaben"
return "Offene Aufgaben:\n" + "\n".join(pending)
async def execute_task(self, task: Task):
"""Execute an approved task."""
if task.state != TaskState.APPROVED:
logger.warning("Task not approved", task_id=task.id[:8])
return
# Mark as completed
task.transition_to(TaskState.COMPLETED, "user_approved")
logger.info("Task completed", task_id=task.id[:8])
async def get_session_tasks(
self,
session_id: str,
state: Optional[TaskState] = None,
) -> List[Task]:
"""Get tasks for a session, optionally filtered by state."""
task_ids = self._session_tasks.get(session_id, [])
tasks = []
for task_id in task_ids:
task = self._tasks.get(task_id)
if task:
if state is None or task.state == state:
tasks.append(task)
return tasks
async def create_task_from_intent(
self,
session_id: str,
namespace_id: str,
intent: Intent,
transcript: str,
) -> Task:
"""Create a task from a detected intent."""
task = Task(
session_id=session_id,
namespace_id=namespace_id,
type=intent.type,
intent_text=transcript,
parameters=intent.parameters,
)
await self.queue_task(task)
return task
async def generate_response(
self,
session_messages: List[TranscriptMessage],
intent: Optional[Intent],
namespace_id: str,
) -> str:
"""Generate a conversational response."""
from services.fallback_llm_client import FallbackLLMClient
llm = FallbackLLMClient()
# Build conversation context
context = "\n".join([
f"{msg.role}: {msg.content}"
for msg in session_messages[-5:] # Last 5 messages
])
# Generate response based on intent
if intent:
if intent.type in [TaskType.STUDENT_OBSERVATION, TaskType.REMINDER]:
return "Verstanden, ich habe mir das notiert."
if intent.type == TaskType.WORKSHEET_GENERATE:
return "Ich erstelle das Arbeitsblatt. Das kann einen Moment dauern."
if intent.type == TaskType.PARENT_LETTER:
return "Ich bereite einen Elternbrief vor."
if intent.type == TaskType.QUIZ_GENERATE:
return "Ich generiere den Quiz. Einen Moment bitte."
# Default: use LLM for conversational response
prompt = f"""Du bist ein hilfreicher Assistent für Lehrer.
Konversation:
{context}
Antworte kurz und hilfreich auf die letzte Nachricht des Nutzers.
Halte die Antwort unter 50 Wörtern."""
response = await llm.generate(prompt)
return response