feat: voice-service von lehrer nach core verschoben, Pipeline erweitert (voice, BQAS, embedding, night-scheduler)
This commit is contained in:
18
voice-service/services/__init__.py
Normal file
18
voice-service/services/__init__.py
Normal file
@@ -0,0 +1,18 @@
|
||||
"""
|
||||
Voice Service Core Services
|
||||
"""
|
||||
from services.encryption_service import EncryptionService
|
||||
from services.task_orchestrator import TaskOrchestrator
|
||||
from services.personaplex_client import PersonaPlexClient
|
||||
from services.fallback_llm_client import FallbackLLMClient
|
||||
from services.intent_router import IntentRouter
|
||||
from services.audio_processor import AudioProcessor
|
||||
|
||||
__all__ = [
|
||||
"EncryptionService",
|
||||
"TaskOrchestrator",
|
||||
"PersonaPlexClient",
|
||||
"FallbackLLMClient",
|
||||
"IntentRouter",
|
||||
"AudioProcessor",
|
||||
]
|
||||
303
voice-service/services/audio_processor.py
Normal file
303
voice-service/services/audio_processor.py
Normal file
@@ -0,0 +1,303 @@
|
||||
"""
|
||||
Audio Processor - Mimi Codec Compatible
|
||||
Handles audio encoding/decoding for voice streaming
|
||||
|
||||
Mimi Codec specifications:
|
||||
- Sample rate: 24kHz
|
||||
- Frame size: 80ms
|
||||
- Format: Int16 PCM
|
||||
- Channels: Mono
|
||||
|
||||
IMPORTANT: Audio is NEVER persisted to disk.
|
||||
All processing happens in RAM only.
|
||||
"""
|
||||
import structlog
|
||||
import numpy as np
|
||||
from typing import Optional, Iterator, Tuple
|
||||
from dataclasses import dataclass
|
||||
|
||||
from config import settings
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class AudioFrame:
|
||||
"""A single audio frame for processing."""
|
||||
samples: np.ndarray
|
||||
timestamp_ms: int
|
||||
duration_ms: int = 80
|
||||
|
||||
|
||||
class AudioProcessor:
|
||||
"""
|
||||
Processes audio for the Mimi codec.
|
||||
|
||||
All audio processing is transient - data exists only
|
||||
in RAM and is discarded after processing.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.sample_rate = settings.audio_sample_rate
|
||||
self.frame_size_ms = settings.audio_frame_size_ms
|
||||
self.samples_per_frame = int(self.sample_rate * self.frame_size_ms / 1000)
|
||||
|
||||
def bytes_to_samples(self, audio_bytes: bytes) -> np.ndarray:
|
||||
"""
|
||||
Convert raw bytes to numpy samples.
|
||||
|
||||
Args:
|
||||
audio_bytes: Int16 PCM audio data
|
||||
|
||||
Returns:
|
||||
numpy array of float32 samples (-1.0 to 1.0)
|
||||
"""
|
||||
# Convert bytes to int16
|
||||
samples_int16 = np.frombuffer(audio_bytes, dtype=np.int16)
|
||||
# Normalize to float32 (-1.0 to 1.0)
|
||||
samples_float = samples_int16.astype(np.float32) / 32768.0
|
||||
return samples_float
|
||||
|
||||
def samples_to_bytes(self, samples: np.ndarray) -> bytes:
|
||||
"""
|
||||
Convert numpy samples to raw bytes.
|
||||
|
||||
Args:
|
||||
samples: float32 samples (-1.0 to 1.0)
|
||||
|
||||
Returns:
|
||||
Int16 PCM audio data
|
||||
"""
|
||||
# Clip to valid range
|
||||
samples = np.clip(samples, -1.0, 1.0)
|
||||
# Convert to int16
|
||||
samples_int16 = (samples * 32767).astype(np.int16)
|
||||
return samples_int16.tobytes()
|
||||
|
||||
def extract_frames(
|
||||
self,
|
||||
audio_bytes: bytes,
|
||||
start_timestamp_ms: int = 0,
|
||||
) -> Iterator[AudioFrame]:
|
||||
"""
|
||||
Extract frames from audio data.
|
||||
|
||||
Args:
|
||||
audio_bytes: Raw audio data
|
||||
start_timestamp_ms: Starting timestamp
|
||||
|
||||
Yields:
|
||||
AudioFrame objects
|
||||
"""
|
||||
samples = self.bytes_to_samples(audio_bytes)
|
||||
bytes_per_frame = self.samples_per_frame * 2 # Int16 = 2 bytes
|
||||
|
||||
timestamp = start_timestamp_ms
|
||||
|
||||
for i in range(0, len(samples), self.samples_per_frame):
|
||||
frame_samples = samples[i:i + self.samples_per_frame]
|
||||
|
||||
# Pad last frame if needed
|
||||
if len(frame_samples) < self.samples_per_frame:
|
||||
frame_samples = np.pad(
|
||||
frame_samples,
|
||||
(0, self.samples_per_frame - len(frame_samples)),
|
||||
)
|
||||
|
||||
yield AudioFrame(
|
||||
samples=frame_samples,
|
||||
timestamp_ms=timestamp,
|
||||
duration_ms=self.frame_size_ms,
|
||||
)
|
||||
|
||||
timestamp += self.frame_size_ms
|
||||
|
||||
def combine_frames(self, frames: list[AudioFrame]) -> bytes:
|
||||
"""
|
||||
Combine multiple frames into continuous audio.
|
||||
|
||||
Args:
|
||||
frames: List of AudioFrame objects
|
||||
|
||||
Returns:
|
||||
Combined audio bytes
|
||||
"""
|
||||
if not frames:
|
||||
return b""
|
||||
|
||||
# Sort by timestamp
|
||||
sorted_frames = sorted(frames, key=lambda f: f.timestamp_ms)
|
||||
|
||||
# Combine samples
|
||||
all_samples = np.concatenate([f.samples for f in sorted_frames])
|
||||
|
||||
return self.samples_to_bytes(all_samples)
|
||||
|
||||
def detect_voice_activity(
|
||||
self,
|
||||
audio_bytes: bytes,
|
||||
threshold: float = 0.02,
|
||||
min_duration_ms: int = 100,
|
||||
) -> Tuple[bool, float]:
|
||||
"""
|
||||
Simple voice activity detection.
|
||||
|
||||
Args:
|
||||
audio_bytes: Raw audio data
|
||||
threshold: Energy threshold for speech detection
|
||||
min_duration_ms: Minimum duration for valid speech
|
||||
|
||||
Returns:
|
||||
(is_speech, energy_level)
|
||||
"""
|
||||
samples = self.bytes_to_samples(audio_bytes)
|
||||
|
||||
# Calculate RMS energy
|
||||
energy = np.sqrt(np.mean(samples ** 2))
|
||||
|
||||
# Check if duration is sufficient
|
||||
duration_ms = len(samples) / self.sample_rate * 1000
|
||||
if duration_ms < min_duration_ms:
|
||||
return False, energy
|
||||
|
||||
return energy > threshold, energy
|
||||
|
||||
def resample(
|
||||
self,
|
||||
audio_bytes: bytes,
|
||||
source_rate: int,
|
||||
target_rate: Optional[int] = None,
|
||||
) -> bytes:
|
||||
"""
|
||||
Resample audio to target sample rate.
|
||||
|
||||
Args:
|
||||
audio_bytes: Raw audio data
|
||||
source_rate: Source sample rate
|
||||
target_rate: Target sample rate (default: 24kHz)
|
||||
|
||||
Returns:
|
||||
Resampled audio bytes
|
||||
"""
|
||||
target_rate = target_rate or self.sample_rate
|
||||
|
||||
if source_rate == target_rate:
|
||||
return audio_bytes
|
||||
|
||||
samples = self.bytes_to_samples(audio_bytes)
|
||||
|
||||
# Calculate new length
|
||||
new_length = int(len(samples) * target_rate / source_rate)
|
||||
|
||||
# Simple linear interpolation resampling
|
||||
# (In production, use scipy.signal.resample or librosa)
|
||||
x_old = np.linspace(0, 1, len(samples))
|
||||
x_new = np.linspace(0, 1, new_length)
|
||||
samples_resampled = np.interp(x_new, x_old, samples)
|
||||
|
||||
return self.samples_to_bytes(samples_resampled)
|
||||
|
||||
def normalize_audio(
|
||||
self,
|
||||
audio_bytes: bytes,
|
||||
target_db: float = -3.0,
|
||||
) -> bytes:
|
||||
"""
|
||||
Normalize audio to target dB level.
|
||||
|
||||
Args:
|
||||
audio_bytes: Raw audio data
|
||||
target_db: Target peak level in dB
|
||||
|
||||
Returns:
|
||||
Normalized audio bytes
|
||||
"""
|
||||
samples = self.bytes_to_samples(audio_bytes)
|
||||
|
||||
# Find peak
|
||||
peak = np.max(np.abs(samples))
|
||||
if peak < 0.001: # Silence
|
||||
return audio_bytes
|
||||
|
||||
# Calculate gain
|
||||
target_linear = 10 ** (target_db / 20)
|
||||
gain = target_linear / peak
|
||||
|
||||
# Apply gain
|
||||
samples_normalized = samples * gain
|
||||
|
||||
return self.samples_to_bytes(samples_normalized)
|
||||
|
||||
def apply_noise_gate(
|
||||
self,
|
||||
audio_bytes: bytes,
|
||||
threshold_db: float = -40.0,
|
||||
attack_ms: float = 5.0,
|
||||
release_ms: float = 50.0,
|
||||
) -> bytes:
|
||||
"""
|
||||
Apply noise gate to reduce background noise.
|
||||
|
||||
Args:
|
||||
audio_bytes: Raw audio data
|
||||
threshold_db: Gate threshold in dB
|
||||
attack_ms: Attack time in ms
|
||||
release_ms: Release time in ms
|
||||
|
||||
Returns:
|
||||
Gated audio bytes
|
||||
"""
|
||||
samples = self.bytes_to_samples(audio_bytes)
|
||||
|
||||
# Convert threshold to linear
|
||||
threshold = 10 ** (threshold_db / 20)
|
||||
|
||||
# Calculate envelope
|
||||
envelope = np.abs(samples)
|
||||
|
||||
# Simple gate
|
||||
gate = np.where(envelope > threshold, 1.0, 0.0)
|
||||
|
||||
# Smooth gate transitions
|
||||
attack_samples = int(attack_ms * self.sample_rate / 1000)
|
||||
release_samples = int(release_ms * self.sample_rate / 1000)
|
||||
|
||||
# Apply smoothing (simple moving average)
|
||||
kernel_size = max(attack_samples, release_samples)
|
||||
if kernel_size > 1:
|
||||
kernel = np.ones(kernel_size) / kernel_size
|
||||
gate = np.convolve(gate, kernel, mode='same')
|
||||
|
||||
# Apply gate
|
||||
samples_gated = samples * gate
|
||||
|
||||
return self.samples_to_bytes(samples_gated)
|
||||
|
||||
def get_audio_stats(self, audio_bytes: bytes) -> dict:
|
||||
"""
|
||||
Get statistics about audio data.
|
||||
|
||||
Args:
|
||||
audio_bytes: Raw audio data
|
||||
|
||||
Returns:
|
||||
Dictionary with audio statistics
|
||||
"""
|
||||
samples = self.bytes_to_samples(audio_bytes)
|
||||
|
||||
# Calculate stats
|
||||
rms = np.sqrt(np.mean(samples ** 2))
|
||||
peak = np.max(np.abs(samples))
|
||||
duration_ms = len(samples) / self.sample_rate * 1000
|
||||
|
||||
# Convert to dB
|
||||
rms_db = 20 * np.log10(rms + 1e-10)
|
||||
peak_db = 20 * np.log10(peak + 1e-10)
|
||||
|
||||
return {
|
||||
"duration_ms": duration_ms,
|
||||
"sample_count": len(samples),
|
||||
"rms_db": round(rms_db, 1),
|
||||
"peak_db": round(peak_db, 1),
|
||||
"sample_rate": self.sample_rate,
|
||||
}
|
||||
231
voice-service/services/encryption_service.py
Normal file
231
voice-service/services/encryption_service.py
Normal file
@@ -0,0 +1,231 @@
|
||||
"""
|
||||
Encryption Service - Namespace Key Management
|
||||
Client-side encryption for DSGVO compliance
|
||||
|
||||
The encryption key NEVER leaves the teacher's device.
|
||||
Server only sees:
|
||||
- Key hash (for verification)
|
||||
- Encrypted blobs
|
||||
- Namespace ID (pseudonym)
|
||||
"""
|
||||
import structlog
|
||||
import hashlib
|
||||
import base64
|
||||
import secrets
|
||||
from typing import Optional
|
||||
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
|
||||
from cryptography.hazmat.primitives import hashes
|
||||
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
|
||||
|
||||
from config import settings
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
|
||||
class EncryptionService:
|
||||
"""
|
||||
Handles namespace key verification and server-side encryption.
|
||||
|
||||
Important: This service does NOT have access to the actual encryption key.
|
||||
The key is stored only on the teacher's device.
|
||||
This service only verifies key hashes and manages encrypted blobs.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._key_hashes: dict[str, str] = {} # namespace_id -> key_hash
|
||||
self._server_key = secrets.token_bytes(32) # Server-side encryption for transit
|
||||
|
||||
def verify_key_hash(self, key_hash: str) -> bool:
|
||||
"""
|
||||
Verify that a key hash is valid format.
|
||||
Does NOT verify the actual key - that's client-side only.
|
||||
|
||||
Accepts "disabled" for development over HTTP (where crypto.subtle is unavailable).
|
||||
In production, always use HTTPS to enable proper encryption.
|
||||
"""
|
||||
if not key_hash:
|
||||
return False
|
||||
|
||||
# Allow "disabled" for development (HTTP context where crypto.subtle is unavailable)
|
||||
if key_hash == "disabled":
|
||||
logger.warning(
|
||||
"Encryption disabled - client running in non-secure context (HTTP). "
|
||||
"Use HTTPS in production!"
|
||||
)
|
||||
return True
|
||||
|
||||
# Expected format: "sha256:base64encodedHash"
|
||||
if not key_hash.startswith("sha256:"):
|
||||
return False
|
||||
|
||||
try:
|
||||
hash_part = key_hash[7:] # Remove "sha256:" prefix
|
||||
decoded = base64.b64decode(hash_part)
|
||||
return len(decoded) == 32 # SHA-256 produces 32 bytes
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def register_namespace_key(self, namespace_id: str, key_hash: str) -> bool:
|
||||
"""
|
||||
Register a namespace's key hash for future verification.
|
||||
"""
|
||||
if not self.verify_key_hash(key_hash):
|
||||
logger.warning("Invalid key hash format", namespace_id=namespace_id[:8])
|
||||
return False
|
||||
|
||||
self._key_hashes[namespace_id] = key_hash
|
||||
if key_hash == "disabled":
|
||||
logger.info("Namespace registered (encryption disabled)", namespace_id=namespace_id[:8])
|
||||
else:
|
||||
logger.info("Namespace key registered", namespace_id=namespace_id[:8])
|
||||
return True
|
||||
|
||||
def encrypt_content(self, plaintext: str, namespace_id: str) -> str:
|
||||
"""
|
||||
Encrypt content for server-side storage.
|
||||
|
||||
Note: This is transit encryption only.
|
||||
The actual client-side encryption happens in the browser/app.
|
||||
This adds an additional layer for data at rest on the server.
|
||||
"""
|
||||
if not settings.encryption_enabled:
|
||||
return plaintext
|
||||
|
||||
try:
|
||||
# Derive key from server key + namespace
|
||||
derived_key = self._derive_key(namespace_id)
|
||||
|
||||
# Generate nonce
|
||||
nonce = secrets.token_bytes(12)
|
||||
|
||||
# Encrypt
|
||||
aesgcm = AESGCM(derived_key)
|
||||
ciphertext = aesgcm.encrypt(nonce, plaintext.encode('utf-8'), None)
|
||||
|
||||
# Combine nonce + ciphertext and encode
|
||||
encrypted = base64.b64encode(nonce + ciphertext).decode('utf-8')
|
||||
return f"encrypted:{encrypted}"
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Encryption failed", error=str(e))
|
||||
raise
|
||||
|
||||
def decrypt_content(self, encrypted: str, namespace_id: str) -> str:
|
||||
"""
|
||||
Decrypt server-side encrypted content.
|
||||
"""
|
||||
if not settings.encryption_enabled:
|
||||
return encrypted
|
||||
|
||||
if not encrypted.startswith("encrypted:"):
|
||||
return encrypted # Not encrypted
|
||||
|
||||
try:
|
||||
# Decode
|
||||
encoded = encrypted[10:] # Remove "encrypted:" prefix
|
||||
data = base64.b64decode(encoded)
|
||||
|
||||
# Split nonce and ciphertext
|
||||
nonce = data[:12]
|
||||
ciphertext = data[12:]
|
||||
|
||||
# Derive key from server key + namespace
|
||||
derived_key = self._derive_key(namespace_id)
|
||||
|
||||
# Decrypt
|
||||
aesgcm = AESGCM(derived_key)
|
||||
plaintext = aesgcm.decrypt(nonce, ciphertext, None)
|
||||
|
||||
return plaintext.decode('utf-8')
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Decryption failed", error=str(e))
|
||||
raise
|
||||
|
||||
def _derive_key(self, namespace_id: str) -> bytes:
|
||||
"""
|
||||
Derive a key from server key + namespace ID.
|
||||
This ensures each namespace has a unique encryption key.
|
||||
"""
|
||||
kdf = PBKDF2HMAC(
|
||||
algorithm=hashes.SHA256(),
|
||||
length=32,
|
||||
salt=namespace_id.encode('utf-8'),
|
||||
iterations=100000,
|
||||
)
|
||||
return kdf.derive(self._server_key)
|
||||
|
||||
@staticmethod
|
||||
def generate_key_hash(key: bytes) -> str:
|
||||
"""
|
||||
Generate a key hash for client-side use.
|
||||
This is a utility method - actual implementation is in the client.
|
||||
"""
|
||||
hash_bytes = hashlib.sha256(key).digest()
|
||||
encoded = base64.b64encode(hash_bytes).decode('utf-8')
|
||||
return f"sha256:{encoded}"
|
||||
|
||||
@staticmethod
|
||||
def generate_namespace_id() -> str:
|
||||
"""
|
||||
Generate a new namespace ID for a teacher.
|
||||
"""
|
||||
return f"ns-{secrets.token_hex(16)}"
|
||||
|
||||
|
||||
class ClientSideEncryption:
|
||||
"""
|
||||
Helper class documenting client-side encryption.
|
||||
This code runs in the browser/app, not on the server.
|
||||
|
||||
Client-side encryption flow:
|
||||
1. Teacher generates a master key on first use
|
||||
2. Master key is stored in browser/app secure storage
|
||||
3. Key hash is sent to server for session verification
|
||||
4. All PII is encrypted with master key before sending to server
|
||||
5. Server only sees encrypted blobs
|
||||
|
||||
JavaScript implementation:
|
||||
```javascript
|
||||
// Generate master key (one-time)
|
||||
const masterKey = await crypto.subtle.generateKey(
|
||||
{ name: "AES-GCM", length: 256 },
|
||||
true,
|
||||
["encrypt", "decrypt"]
|
||||
);
|
||||
|
||||
// Store in IndexedDB (encrypted with device key)
|
||||
await storeSecurely("masterKey", masterKey);
|
||||
|
||||
// Generate key hash for server
|
||||
const keyData = await crypto.subtle.exportKey("raw", masterKey);
|
||||
const hashBuffer = await crypto.subtle.digest("SHA-256", keyData);
|
||||
const keyHash = "sha256:" + btoa(String.fromCharCode(...new Uint8Array(hashBuffer)));
|
||||
|
||||
// Encrypt content before sending
|
||||
async function encryptContent(content) {
|
||||
const iv = crypto.getRandomValues(new Uint8Array(12));
|
||||
const encoded = new TextEncoder().encode(content);
|
||||
const ciphertext = await crypto.subtle.encrypt(
|
||||
{ name: "AES-GCM", iv },
|
||||
masterKey,
|
||||
encoded
|
||||
);
|
||||
return btoa(String.fromCharCode(...iv, ...new Uint8Array(ciphertext)));
|
||||
}
|
||||
|
||||
// Decrypt content after receiving
|
||||
async function decryptContent(encrypted) {
|
||||
const data = Uint8Array.from(atob(encrypted), c => c.charCodeAt(0));
|
||||
const iv = data.slice(0, 12);
|
||||
const ciphertext = data.slice(12);
|
||||
const decrypted = await crypto.subtle.decrypt(
|
||||
{ name: "AES-GCM", iv },
|
||||
masterKey,
|
||||
ciphertext
|
||||
);
|
||||
return new TextDecoder().decode(decrypted);
|
||||
}
|
||||
```
|
||||
"""
|
||||
pass
|
||||
519
voice-service/services/enhanced_task_orchestrator.py
Normal file
519
voice-service/services/enhanced_task_orchestrator.py
Normal file
@@ -0,0 +1,519 @@
|
||||
"""
|
||||
Enhanced Task Orchestrator - Multi-Agent Integration
|
||||
|
||||
Extends the existing TaskOrchestrator with Multi-Agent support:
|
||||
- Session management with checkpoints
|
||||
- Message bus integration for inter-agent communication
|
||||
- Quality judge integration via BQAS
|
||||
- Heartbeat-based liveness
|
||||
"""
|
||||
|
||||
import structlog
|
||||
import asyncio
|
||||
from typing import Optional, Dict, Any
|
||||
from datetime import datetime
|
||||
|
||||
from services.task_orchestrator import TaskOrchestrator, Intent
|
||||
from models.task import Task, TaskState
|
||||
|
||||
# Import agent-core components
|
||||
import sys
|
||||
sys.path.insert(0, '/Users/benjaminadmin/Projekte/breakpilot-pwa/agent-core')
|
||||
|
||||
from sessions.session_manager import SessionManager, AgentSession, SessionState
|
||||
from sessions.heartbeat import HeartbeatMonitor, HeartbeatClient
|
||||
from brain.memory_store import MemoryStore
|
||||
from brain.context_manager import ContextManager, MessageRole
|
||||
from orchestrator.message_bus import MessageBus, AgentMessage, MessagePriority
|
||||
from orchestrator.task_router import TaskRouter, RoutingStrategy
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
|
||||
class EnhancedTaskOrchestrator(TaskOrchestrator):
|
||||
"""
|
||||
Enhanced TaskOrchestrator with Multi-Agent support.
|
||||
|
||||
Extends the existing TaskOrchestrator to integrate with:
|
||||
- Session management for persistence and recovery
|
||||
- Message bus for inter-agent communication
|
||||
- Quality judge for response validation
|
||||
- Memory store for long-term learning
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
redis_client=None,
|
||||
db_pool=None,
|
||||
namespace: str = "breakpilot"
|
||||
):
|
||||
"""
|
||||
Initialize the enhanced orchestrator.
|
||||
|
||||
Args:
|
||||
redis_client: Async Redis/Valkey client
|
||||
db_pool: Async PostgreSQL connection pool
|
||||
namespace: Namespace for isolation
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
# Initialize agent-core components
|
||||
self.session_manager = SessionManager(
|
||||
redis_client=redis_client,
|
||||
db_pool=db_pool,
|
||||
namespace=namespace
|
||||
)
|
||||
|
||||
self.memory_store = MemoryStore(
|
||||
redis_client=redis_client,
|
||||
db_pool=db_pool,
|
||||
namespace=namespace
|
||||
)
|
||||
|
||||
self.context_manager = ContextManager(
|
||||
redis_client=redis_client,
|
||||
db_pool=db_pool,
|
||||
namespace=namespace
|
||||
)
|
||||
|
||||
self.message_bus = MessageBus(
|
||||
redis_client=redis_client,
|
||||
db_pool=db_pool,
|
||||
namespace=namespace
|
||||
)
|
||||
|
||||
self.heartbeat = HeartbeatMonitor(
|
||||
timeout_seconds=30,
|
||||
check_interval_seconds=5,
|
||||
max_missed_beats=3
|
||||
)
|
||||
|
||||
self.task_router = TaskRouter()
|
||||
|
||||
# Track active sessions by voice session ID
|
||||
self._voice_sessions: Dict[str, AgentSession] = {}
|
||||
self._heartbeat_clients: Dict[str, HeartbeatClient] = {}
|
||||
|
||||
logger.info("Enhanced TaskOrchestrator initialized with agent-core")
|
||||
|
||||
async def start(self) -> None:
|
||||
"""Starts the enhanced orchestrator"""
|
||||
await self.message_bus.start()
|
||||
await self.heartbeat.start_monitoring()
|
||||
|
||||
# Subscribe to messages directed at this orchestrator
|
||||
await self.message_bus.subscribe(
|
||||
"voice-orchestrator",
|
||||
self._handle_agent_message
|
||||
)
|
||||
|
||||
logger.info("Enhanced TaskOrchestrator started")
|
||||
|
||||
async def stop(self) -> None:
|
||||
"""Stops the enhanced orchestrator"""
|
||||
# Stop all heartbeat clients
|
||||
for client in self._heartbeat_clients.values():
|
||||
await client.stop()
|
||||
self._heartbeat_clients.clear()
|
||||
|
||||
await self.heartbeat.stop_monitoring()
|
||||
await self.message_bus.stop()
|
||||
|
||||
logger.info("Enhanced TaskOrchestrator stopped")
|
||||
|
||||
async def create_session(
|
||||
self,
|
||||
voice_session_id: str,
|
||||
user_id: str = "",
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
) -> AgentSession:
|
||||
"""
|
||||
Creates a new agent session for a voice session.
|
||||
|
||||
Args:
|
||||
voice_session_id: The voice session ID
|
||||
user_id: Optional user ID
|
||||
metadata: Additional metadata
|
||||
|
||||
Returns:
|
||||
The created AgentSession
|
||||
"""
|
||||
# Create session via session manager
|
||||
session = await self.session_manager.create_session(
|
||||
agent_type="voice-orchestrator",
|
||||
user_id=user_id,
|
||||
context={"voice_session_id": voice_session_id},
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
# Create conversation context
|
||||
self.context_manager.create_context(
|
||||
session_id=session.session_id,
|
||||
system_prompt=self._get_system_prompt(),
|
||||
max_messages=50
|
||||
)
|
||||
|
||||
# Start heartbeat for this session
|
||||
heartbeat_client = HeartbeatClient(
|
||||
session_id=session.session_id,
|
||||
monitor=self.heartbeat,
|
||||
interval_seconds=10
|
||||
)
|
||||
await heartbeat_client.start()
|
||||
|
||||
# Register heartbeat for monitoring
|
||||
self.heartbeat.register(session.session_id, "voice-orchestrator")
|
||||
|
||||
# Store references
|
||||
self._voice_sessions[voice_session_id] = session
|
||||
self._heartbeat_clients[session.session_id] = heartbeat_client
|
||||
|
||||
logger.info(
|
||||
"Created agent session",
|
||||
session_id=session.session_id[:8],
|
||||
voice_session_id=voice_session_id
|
||||
)
|
||||
|
||||
return session
|
||||
|
||||
async def get_session(
|
||||
self,
|
||||
voice_session_id: str
|
||||
) -> Optional[AgentSession]:
|
||||
"""Gets the agent session for a voice session"""
|
||||
return self._voice_sessions.get(voice_session_id)
|
||||
|
||||
async def end_session(self, voice_session_id: str) -> None:
|
||||
"""
|
||||
Ends an agent session.
|
||||
|
||||
Args:
|
||||
voice_session_id: The voice session ID
|
||||
"""
|
||||
session = self._voice_sessions.get(voice_session_id)
|
||||
if not session:
|
||||
return
|
||||
|
||||
# Stop heartbeat
|
||||
if session.session_id in self._heartbeat_clients:
|
||||
await self._heartbeat_clients[session.session_id].stop()
|
||||
del self._heartbeat_clients[session.session_id]
|
||||
|
||||
# Unregister from heartbeat monitor
|
||||
self.heartbeat.unregister(session.session_id)
|
||||
|
||||
# Mark session as completed
|
||||
session.complete()
|
||||
await self.session_manager.update_session(session)
|
||||
|
||||
# Clean up
|
||||
del self._voice_sessions[voice_session_id]
|
||||
|
||||
logger.info(
|
||||
"Ended agent session",
|
||||
session_id=session.session_id[:8],
|
||||
duration_seconds=session.get_duration().total_seconds()
|
||||
)
|
||||
|
||||
async def queue_task(self, task: Task) -> None:
|
||||
"""
|
||||
Queue a task with session checkpointing.
|
||||
|
||||
Extends parent to add checkpoint for recovery.
|
||||
"""
|
||||
# Get session for this task
|
||||
session = self._voice_sessions.get(task.session_id)
|
||||
|
||||
if session:
|
||||
# Checkpoint before queueing
|
||||
session.checkpoint("task_queued", {
|
||||
"task_id": task.id,
|
||||
"task_type": task.type.value,
|
||||
"parameters": task.parameters
|
||||
})
|
||||
await self.session_manager.update_session(session)
|
||||
|
||||
# Call parent implementation
|
||||
await super().queue_task(task)
|
||||
|
||||
async def process_task(self, task: Task) -> None:
|
||||
"""
|
||||
Process a task with enhanced routing and quality checks.
|
||||
|
||||
Extends parent to:
|
||||
- Route complex tasks to specialized agents
|
||||
- Run quality checks via BQAS
|
||||
- Store results in memory for learning
|
||||
"""
|
||||
session = self._voice_sessions.get(task.session_id)
|
||||
|
||||
if session:
|
||||
session.checkpoint("task_processing", {
|
||||
"task_id": task.id
|
||||
})
|
||||
|
||||
# Check if this task should be routed to a specialized agent
|
||||
if self._needs_specialized_agent(task):
|
||||
await self._route_to_agent(task, session)
|
||||
else:
|
||||
# Use parent implementation for simple tasks
|
||||
await super().process_task(task)
|
||||
|
||||
# Run quality check on result
|
||||
if task.result_ref and self._needs_quality_check(task):
|
||||
await self._run_quality_check(task, session)
|
||||
|
||||
# Store in memory for learning
|
||||
if task.state == TaskState.READY and task.result_ref:
|
||||
await self._store_task_result(task)
|
||||
|
||||
if session:
|
||||
session.checkpoint("task_completed", {
|
||||
"task_id": task.id,
|
||||
"state": task.state.value
|
||||
})
|
||||
await self.session_manager.update_session(session)
|
||||
|
||||
def _needs_specialized_agent(self, task: Task) -> bool:
|
||||
"""Check if task needs routing to a specialized agent"""
|
||||
from models.task import TaskType
|
||||
|
||||
# Tasks that benefit from specialized agents
|
||||
specialized_types = [
|
||||
TaskType.PARENT_LETTER, # Could use grader for tone
|
||||
TaskType.FEEDBACK_SUGGEST, # Quality judge for appropriateness
|
||||
]
|
||||
|
||||
return task.type in specialized_types
|
||||
|
||||
def _needs_quality_check(self, task: Task) -> bool:
|
||||
"""Check if task result needs quality validation"""
|
||||
from models.task import TaskType
|
||||
|
||||
# Tasks that generate content should be checked
|
||||
content_types = [
|
||||
TaskType.PARENT_LETTER,
|
||||
TaskType.CLASS_MESSAGE,
|
||||
TaskType.FEEDBACK_SUGGEST,
|
||||
TaskType.WORKSHEET_GENERATE,
|
||||
]
|
||||
|
||||
return task.type in content_types
|
||||
|
||||
async def _route_to_agent(
|
||||
self,
|
||||
task: Task,
|
||||
session: Optional[AgentSession]
|
||||
) -> None:
|
||||
"""Routes a task to a specialized agent"""
|
||||
# Determine target agent
|
||||
intent = f"task_{task.type.value}"
|
||||
routing_result = await self.task_router.route(
|
||||
intent=intent,
|
||||
context={"task": task.parameters},
|
||||
strategy=RoutingStrategy.LEAST_LOADED
|
||||
)
|
||||
|
||||
if not routing_result.success:
|
||||
# Fall back to local processing
|
||||
logger.warning(
|
||||
"No agent available for task, using local processing",
|
||||
task_id=task.id[:8],
|
||||
reason=routing_result.reason
|
||||
)
|
||||
await super().process_task(task)
|
||||
return
|
||||
|
||||
# Send to agent via message bus
|
||||
try:
|
||||
response = await self.message_bus.request(
|
||||
AgentMessage(
|
||||
sender="voice-orchestrator",
|
||||
receiver=routing_result.agent_id,
|
||||
message_type=f"process_{task.type.value}",
|
||||
payload={
|
||||
"task_id": task.id,
|
||||
"task_type": task.type.value,
|
||||
"parameters": task.parameters,
|
||||
"session_id": session.session_id if session else None
|
||||
},
|
||||
priority=MessagePriority.NORMAL
|
||||
),
|
||||
timeout=30.0
|
||||
)
|
||||
|
||||
task.result_ref = response.get("result", "")
|
||||
task.transition_to(TaskState.READY, "agent_processed")
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
logger.error(
|
||||
"Agent timeout, falling back to local",
|
||||
task_id=task.id[:8],
|
||||
agent=routing_result.agent_id
|
||||
)
|
||||
await super().process_task(task)
|
||||
|
||||
async def _run_quality_check(
|
||||
self,
|
||||
task: Task,
|
||||
session: Optional[AgentSession]
|
||||
) -> None:
|
||||
"""Runs quality check on task result via quality judge"""
|
||||
try:
|
||||
response = await self.message_bus.request(
|
||||
AgentMessage(
|
||||
sender="voice-orchestrator",
|
||||
receiver="quality-judge",
|
||||
message_type="evaluate_response",
|
||||
payload={
|
||||
"task_id": task.id,
|
||||
"task_type": task.type.value,
|
||||
"response": task.result_ref,
|
||||
"context": task.parameters
|
||||
},
|
||||
priority=MessagePriority.NORMAL
|
||||
),
|
||||
timeout=10.0
|
||||
)
|
||||
|
||||
quality_score = response.get("composite_score", 0)
|
||||
|
||||
if quality_score < 60:
|
||||
# Mark for review
|
||||
task.error_message = f"Quality check failed: {quality_score}"
|
||||
logger.warning(
|
||||
"Task failed quality check",
|
||||
task_id=task.id[:8],
|
||||
score=quality_score
|
||||
)
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
# Quality check timeout is non-fatal
|
||||
logger.warning(
|
||||
"Quality check timeout",
|
||||
task_id=task.id[:8]
|
||||
)
|
||||
|
||||
async def _store_task_result(self, task: Task) -> None:
|
||||
"""Stores task result in memory for learning"""
|
||||
await self.memory_store.remember(
|
||||
key=f"task:{task.type.value}:{task.id}",
|
||||
value={
|
||||
"result": task.result_ref,
|
||||
"parameters": task.parameters,
|
||||
"completed_at": datetime.utcnow().isoformat()
|
||||
},
|
||||
agent_id="voice-orchestrator",
|
||||
ttl_days=30
|
||||
)
|
||||
|
||||
async def _handle_agent_message(
|
||||
self,
|
||||
message: AgentMessage
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Handles incoming messages from other agents"""
|
||||
logger.debug(
|
||||
"Received agent message",
|
||||
sender=message.sender,
|
||||
type=message.message_type
|
||||
)
|
||||
|
||||
if message.message_type == "task_status_update":
|
||||
# Handle task status updates
|
||||
task_id = message.payload.get("task_id")
|
||||
if task_id in self._tasks:
|
||||
task = self._tasks[task_id]
|
||||
new_state = message.payload.get("state")
|
||||
if new_state:
|
||||
task.transition_to(TaskState(new_state), "agent_update")
|
||||
|
||||
return None
|
||||
|
||||
def _get_system_prompt(self) -> str:
|
||||
"""Returns the system prompt for the voice assistant"""
|
||||
return """Du bist ein hilfreicher Assistent für Lehrer in der Breakpilot-App.
|
||||
|
||||
Deine Aufgaben:
|
||||
- Hilf beim Erstellen von Arbeitsblättern
|
||||
- Unterstütze bei der Korrektur
|
||||
- Erstelle Elternbriefe und Klassennachrichten
|
||||
- Dokumentiere Beobachtungen und Erinnerungen
|
||||
|
||||
Halte dich kurz und präzise. Nutze einfache, klare Sprache.
|
||||
Bei Unklarheiten frage nach."""
|
||||
|
||||
# Recovery methods
|
||||
|
||||
async def recover_session(
|
||||
self,
|
||||
voice_session_id: str,
|
||||
session_id: str
|
||||
) -> Optional[AgentSession]:
|
||||
"""
|
||||
Recovers a session from checkpoint.
|
||||
|
||||
Args:
|
||||
voice_session_id: The voice session ID
|
||||
session_id: The agent session ID to recover
|
||||
|
||||
Returns:
|
||||
The recovered session or None
|
||||
"""
|
||||
session = await self.session_manager.get_session(session_id)
|
||||
|
||||
if not session:
|
||||
logger.warning(
|
||||
"Session not found for recovery",
|
||||
session_id=session_id
|
||||
)
|
||||
return None
|
||||
|
||||
if session.state != SessionState.ACTIVE:
|
||||
logger.warning(
|
||||
"Session not active for recovery",
|
||||
session_id=session_id,
|
||||
state=session.state.value
|
||||
)
|
||||
return None
|
||||
|
||||
# Resume session
|
||||
session.resume()
|
||||
|
||||
# Restore heartbeat
|
||||
heartbeat_client = HeartbeatClient(
|
||||
session_id=session.session_id,
|
||||
monitor=self.heartbeat,
|
||||
interval_seconds=10
|
||||
)
|
||||
await heartbeat_client.start()
|
||||
self.heartbeat.register(session.session_id, "voice-orchestrator")
|
||||
|
||||
# Store references
|
||||
self._voice_sessions[voice_session_id] = session
|
||||
self._heartbeat_clients[session.session_id] = heartbeat_client
|
||||
|
||||
# Recover pending tasks from checkpoints
|
||||
await self._recover_pending_tasks(session)
|
||||
|
||||
logger.info(
|
||||
"Recovered session",
|
||||
session_id=session.session_id[:8],
|
||||
checkpoints=len(session.checkpoints)
|
||||
)
|
||||
|
||||
return session
|
||||
|
||||
async def _recover_pending_tasks(self, session: AgentSession) -> None:
|
||||
"""Recovers pending tasks from session checkpoints"""
|
||||
for checkpoint in reversed(session.checkpoints):
|
||||
if checkpoint.name == "task_queued":
|
||||
task_id = checkpoint.data.get("task_id")
|
||||
if task_id and task_id in self._tasks:
|
||||
task = self._tasks[task_id]
|
||||
if task.state == TaskState.QUEUED:
|
||||
# Re-process queued task
|
||||
await self.process_task(task)
|
||||
logger.info(
|
||||
"Recovered pending task",
|
||||
task_id=task_id[:8]
|
||||
)
|
||||
248
voice-service/services/fallback_llm_client.py
Normal file
248
voice-service/services/fallback_llm_client.py
Normal file
@@ -0,0 +1,248 @@
|
||||
"""
|
||||
Fallback LLM Client - Ollama Integration
|
||||
Text-only fallback when PersonaPlex is not available
|
||||
|
||||
Used in development on Mac Mini with:
|
||||
- qwen2.5:32b for conversation
|
||||
- Local processing (DSGVO-konform)
|
||||
"""
|
||||
import structlog
|
||||
import httpx
|
||||
from typing import Optional, List, Dict, Any
|
||||
|
||||
from config import settings
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
|
||||
class FallbackLLMClient:
|
||||
"""
|
||||
Ollama LLM client for text-only processing.
|
||||
|
||||
When PersonaPlex is not available (development mode),
|
||||
this client provides:
|
||||
- Intent detection (text-based)
|
||||
- Response generation
|
||||
- Task execution assistance
|
||||
|
||||
Note: Audio transcription requires a separate ASR service
|
||||
(e.g., Whisper) when using this fallback.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._base_url = settings.ollama_base_url
|
||||
self._model = settings.ollama_voice_model
|
||||
self._timeout = settings.ollama_timeout
|
||||
self._client: Optional[httpx.AsyncClient] = None
|
||||
|
||||
async def _get_client(self) -> httpx.AsyncClient:
|
||||
"""Get or create HTTP client."""
|
||||
if self._client is None:
|
||||
self._client = httpx.AsyncClient(timeout=self._timeout)
|
||||
return self._client
|
||||
|
||||
async def generate(
|
||||
self,
|
||||
prompt: str,
|
||||
system_prompt: Optional[str] = None,
|
||||
temperature: float = 0.7,
|
||||
max_tokens: int = 500,
|
||||
) -> str:
|
||||
"""
|
||||
Generate text completion.
|
||||
|
||||
Args:
|
||||
prompt: User prompt
|
||||
system_prompt: Optional system instructions
|
||||
temperature: Sampling temperature
|
||||
max_tokens: Maximum tokens to generate
|
||||
|
||||
Returns:
|
||||
Generated text
|
||||
"""
|
||||
if settings.fallback_llm_provider == "none":
|
||||
logger.warning("No LLM provider configured")
|
||||
return "LLM nicht verfügbar"
|
||||
|
||||
client = await self._get_client()
|
||||
|
||||
# Build messages
|
||||
messages = []
|
||||
if system_prompt:
|
||||
messages.append({"role": "system", "content": system_prompt})
|
||||
messages.append({"role": "user", "content": prompt})
|
||||
|
||||
try:
|
||||
response = await client.post(
|
||||
f"{self._base_url}/api/chat",
|
||||
json={
|
||||
"model": self._model,
|
||||
"messages": messages,
|
||||
"options": {
|
||||
"temperature": temperature,
|
||||
"num_predict": max_tokens,
|
||||
},
|
||||
"stream": False,
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
data = response.json()
|
||||
return data.get("message", {}).get("content", "")
|
||||
|
||||
except httpx.HTTPError as e:
|
||||
logger.error("Ollama request failed", error=str(e))
|
||||
return "Fehler bei der Verarbeitung"
|
||||
except Exception as e:
|
||||
logger.error("Unexpected error", error=str(e))
|
||||
return "Unerwarteter Fehler"
|
||||
|
||||
async def detect_intent(self, text: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Detect intent from text using LLM.
|
||||
|
||||
Returns:
|
||||
{
|
||||
"type": "student_observation" | "reminder" | ...,
|
||||
"confidence": 0.0-1.0,
|
||||
"parameters": {...},
|
||||
"is_actionable": bool
|
||||
}
|
||||
"""
|
||||
system_prompt = """Du bist ein Intent-Detektor für Lehrer-Sprachbefehle.
|
||||
Analysiere den Text und bestimme die Absicht.
|
||||
|
||||
Mögliche Intents:
|
||||
- student_observation: Beobachtung zu einem Schüler
|
||||
- reminder: Erinnerung an etwas
|
||||
- homework_check: Hausaufgaben kontrollieren
|
||||
- conference_topic: Thema für Konferenz
|
||||
- correction_note: Notiz zur Korrektur
|
||||
- worksheet_generate: Arbeitsblatt erstellen
|
||||
- worksheet_differentiate: Differenzierung
|
||||
- quick_activity: Schnelle Aktivität
|
||||
- quiz_generate: Quiz erstellen
|
||||
- parent_letter: Elternbrief
|
||||
- class_message: Nachricht an Klasse
|
||||
- canvas_edit: Canvas bearbeiten
|
||||
- canvas_layout: Layout ändern
|
||||
- operator_checklist: Operatoren-Checkliste
|
||||
- eh_passage: EH-Passage suchen
|
||||
- feedback_suggest: Feedback vorschlagen
|
||||
- reminder_schedule: Erinnerung planen
|
||||
- task_summary: Aufgaben zusammenfassen
|
||||
- unknown: Unbekannt
|
||||
|
||||
Antworte NUR mit JSON:
|
||||
{"type": "intent_name", "confidence": 0.0-1.0, "parameters": {...}, "is_actionable": true/false}"""
|
||||
|
||||
result = await self.generate(
|
||||
prompt=f"Text: {text}",
|
||||
system_prompt=system_prompt,
|
||||
temperature=0.1,
|
||||
max_tokens=200,
|
||||
)
|
||||
|
||||
try:
|
||||
# Parse JSON from response
|
||||
import json
|
||||
# Find JSON in response
|
||||
start = result.find("{")
|
||||
end = result.rfind("}") + 1
|
||||
if start >= 0 and end > start:
|
||||
return json.loads(result[start:end])
|
||||
except Exception as e:
|
||||
logger.warning("Intent parsing failed", error=str(e))
|
||||
|
||||
return {
|
||||
"type": "unknown",
|
||||
"confidence": 0.0,
|
||||
"parameters": {},
|
||||
"is_actionable": False,
|
||||
}
|
||||
|
||||
async def process_audio_description(self, audio_data: bytes) -> str:
|
||||
"""
|
||||
Process audio by describing it (placeholder for ASR).
|
||||
|
||||
In production, this would use Whisper or similar.
|
||||
For MVP, this returns a placeholder.
|
||||
"""
|
||||
# Calculate audio duration
|
||||
samples = len(audio_data) // 2 # 16-bit = 2 bytes
|
||||
duration_sec = samples / settings.audio_sample_rate
|
||||
|
||||
logger.debug(
|
||||
"Audio received (no ASR in fallback mode)",
|
||||
duration_sec=duration_sec,
|
||||
bytes=len(audio_data),
|
||||
)
|
||||
|
||||
# Placeholder - in production, integrate with Whisper
|
||||
return ""
|
||||
|
||||
async def chat(
|
||||
self,
|
||||
messages: List[Dict[str, str]],
|
||||
temperature: float = 0.7,
|
||||
) -> str:
|
||||
"""
|
||||
Multi-turn conversation.
|
||||
|
||||
Args:
|
||||
messages: List of {"role": "user"|"assistant", "content": "..."}
|
||||
temperature: Sampling temperature
|
||||
|
||||
Returns:
|
||||
Assistant response
|
||||
"""
|
||||
if settings.fallback_llm_provider == "none":
|
||||
return "LLM nicht verfügbar"
|
||||
|
||||
client = await self._get_client()
|
||||
|
||||
# Add system prompt
|
||||
system_prompt = """Du bist Breakpilot, ein hilfreicher Assistent für Lehrer.
|
||||
Du hilfst bei:
|
||||
- Notizen und Beobachtungen
|
||||
- Unterrichtsvorbereitung
|
||||
- Elternkommunikation
|
||||
- Korrekturunterstützung
|
||||
|
||||
Antworte kurz und präzise. Halte Antworten unter 100 Wörtern."""
|
||||
|
||||
full_messages = [{"role": "system", "content": system_prompt}] + messages
|
||||
|
||||
try:
|
||||
response = await client.post(
|
||||
f"{self._base_url}/api/chat",
|
||||
json={
|
||||
"model": self._model,
|
||||
"messages": full_messages,
|
||||
"options": {
|
||||
"temperature": temperature,
|
||||
"num_predict": 300,
|
||||
},
|
||||
"stream": False,
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
data = response.json()
|
||||
return data.get("message", {}).get("content", "")
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Chat failed", error=str(e))
|
||||
return "Entschuldigung, ein Fehler ist aufgetreten."
|
||||
|
||||
async def health_check(self) -> bool:
|
||||
"""Check if Ollama is available."""
|
||||
if settings.fallback_llm_provider == "none":
|
||||
return False
|
||||
|
||||
try:
|
||||
client = await self._get_client()
|
||||
response = await client.get(f"{self._base_url}/api/tags")
|
||||
return response.status_code == 200
|
||||
except Exception:
|
||||
return False
|
||||
368
voice-service/services/intent_router.py
Normal file
368
voice-service/services/intent_router.py
Normal file
@@ -0,0 +1,368 @@
|
||||
"""
|
||||
Intent Router - Voice Command Classification
|
||||
Routes detected intents to appropriate handlers
|
||||
|
||||
Supports all use case groups:
|
||||
1. Kurze Notizen (Autofahrt)
|
||||
2. Arbeitsblatt-Generierung (Zug)
|
||||
3. Situatives Arbeiten (Schule)
|
||||
4. Canvas-Editor
|
||||
5. Korrektur & RAG-Assistenz
|
||||
6. Follow-up über Tage
|
||||
"""
|
||||
import structlog
|
||||
import re
|
||||
from typing import Optional, List, Dict, Any
|
||||
from dataclasses import dataclass
|
||||
|
||||
from config import settings
|
||||
from models.task import TaskType
|
||||
from models.session import TranscriptMessage
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetectedIntent:
|
||||
"""Detected intent with confidence and parameters."""
|
||||
type: TaskType
|
||||
confidence: float
|
||||
parameters: Dict[str, Any]
|
||||
is_actionable: bool
|
||||
|
||||
|
||||
# Pattern-based intent detection rules
|
||||
INTENT_PATTERNS = {
|
||||
# Gruppe 1: Kurze Notizen
|
||||
TaskType.STUDENT_OBSERVATION: [
|
||||
r"notiz\s+zu\s+(\w+)",
|
||||
r"beobachtung\s+(\w+)",
|
||||
r"(\w+)\s+hat\s+(gestoert|gestört)",
|
||||
r"(\w+)\s+braucht",
|
||||
],
|
||||
TaskType.REMINDER: [
|
||||
r"erinner\s+mich",
|
||||
r"morgen\s+(\d+:\d+)",
|
||||
r"reminder",
|
||||
r"nicht\s+vergessen",
|
||||
],
|
||||
TaskType.HOMEWORK_CHECK: [
|
||||
r"hausaufgabe\s+kontrollieren",
|
||||
r"(\w+)\s+mathe\s+hausaufgabe",
|
||||
r"ha\s+check",
|
||||
],
|
||||
TaskType.CONFERENCE_TOPIC: [
|
||||
r"thema\s+(lehrerkonferenz|konferenz)",
|
||||
r"fuer\s+die\s+konferenz",
|
||||
r"konferenzthema",
|
||||
],
|
||||
TaskType.CORRECTION_NOTE: [
|
||||
r"aufgabe\s+(\d+)",
|
||||
r"haeufiger\s+fehler",
|
||||
r"naechste\s+stunde\s+erklaeren",
|
||||
r"korrekturnotiz",
|
||||
],
|
||||
|
||||
# Gruppe 2: Arbeitsblatt-Generierung
|
||||
TaskType.WORKSHEET_GENERATE: [
|
||||
r"arbeitsblatt\s+(erstellen|machen|generieren)",
|
||||
r"nimm\s+vokabeln",
|
||||
r"mach\s+(\d+)\s+lueckentexte",
|
||||
r"uebungsblatt",
|
||||
],
|
||||
TaskType.WORKSHEET_DIFFERENTIATE: [
|
||||
r"differenzierung",
|
||||
r"zwei\s+schwierigkeitsstufen",
|
||||
r"basis\s+und\s+plus",
|
||||
r"leichtere\s+version",
|
||||
],
|
||||
|
||||
# Gruppe 3: Situatives Arbeiten
|
||||
TaskType.QUICK_ACTIVITY: [
|
||||
r"(\d+)\s+minuten\s+einstieg",
|
||||
r"schnelle\s+aktivitaet",
|
||||
r"warming\s*up",
|
||||
r"einstiegsaufgabe",
|
||||
],
|
||||
TaskType.QUIZ_GENERATE: [
|
||||
r"vokabeltest",
|
||||
r"quiz\s+(erstellen|generieren)",
|
||||
r"(\d+)-minuten\s+test",
|
||||
r"kurzer\s+test",
|
||||
],
|
||||
TaskType.PARENT_LETTER: [
|
||||
r"elternbrief\s+wegen",
|
||||
r"elternbrief",
|
||||
r"brief\s+an\s+eltern",
|
||||
r"wegen\s+wiederholter?\s+(stoerungen|störungen)",
|
||||
r"wegen\s+(stoerungen|störungen)",
|
||||
r"mitteilung\s+an\s+eltern",
|
||||
],
|
||||
TaskType.CLASS_MESSAGE: [
|
||||
r"nachricht\s+an\s+(\d+\w+)",
|
||||
r"klassen\s*nachricht",
|
||||
r"info\s+an\s+die\s+klasse",
|
||||
],
|
||||
|
||||
# Gruppe 4: Canvas-Editor
|
||||
TaskType.CANVAS_EDIT: [
|
||||
r"ueberschriften?\s+(groesser|kleiner|größer)",
|
||||
r"bild\s+(\d+)\s+(nach|auf)",
|
||||
r"pfeil\s+(von|auf)",
|
||||
r"kasten\s+(hinzufuegen|einfügen)",
|
||||
],
|
||||
TaskType.CANVAS_LAYOUT: [
|
||||
r"auf\s+eine\s+seite",
|
||||
r"drucklayout\s+a4",
|
||||
r"layout\s+(aendern|ändern)",
|
||||
r"alles\s+auf\s+a4",
|
||||
],
|
||||
|
||||
# Gruppe 5: Korrektur & RAG
|
||||
TaskType.OPERATOR_CHECKLIST: [
|
||||
r"operatoren[-\s]*checkliste",
|
||||
r"welche\s+operatoren",
|
||||
r"operatoren\s+fuer\s+diese\s+aufgabe",
|
||||
],
|
||||
TaskType.EH_PASSAGE: [
|
||||
r"erwartungshorizont",
|
||||
r"eh\s*passage",
|
||||
r"was\s+steht\s+im\s+eh",
|
||||
],
|
||||
TaskType.FEEDBACK_SUGGEST: [
|
||||
r"feedback\s*(vorschlag|vorschlagen)",
|
||||
r"wie\s+formuliere\s+ich",
|
||||
r"rueckmeldung\s+geben",
|
||||
],
|
||||
|
||||
# Gruppe 6: Follow-up
|
||||
TaskType.REMINDER_SCHEDULE: [
|
||||
r"erinner\s+mich\s+morgen",
|
||||
r"in\s+(\d+)\s+(stunden|tagen)",
|
||||
r"naechste\s+woche",
|
||||
],
|
||||
TaskType.TASK_SUMMARY: [
|
||||
r"offenen?\s+(aufgaben|tasks)",
|
||||
r"was\s+steht\s+noch\s+an",
|
||||
r"zusammenfassung",
|
||||
r"fasse.+zusammen",
|
||||
r"diese[rn]?\s+woche",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
class IntentRouter:
|
||||
"""
|
||||
Routes voice commands to appropriate task types.
|
||||
|
||||
Uses a combination of:
|
||||
1. Pattern matching for common phrases
|
||||
2. LLM-based classification for complex queries
|
||||
3. Context from previous messages for disambiguation
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._compiled_patterns: Dict[TaskType, List[re.Pattern]] = {}
|
||||
self._compile_patterns()
|
||||
|
||||
def _compile_patterns(self):
|
||||
"""Pre-compile regex patterns for performance."""
|
||||
for task_type, patterns in INTENT_PATTERNS.items():
|
||||
self._compiled_patterns[task_type] = [
|
||||
re.compile(pattern, re.IGNORECASE | re.UNICODE)
|
||||
for pattern in patterns
|
||||
]
|
||||
|
||||
async def detect_intent(
|
||||
self,
|
||||
text: str,
|
||||
context: List[TranscriptMessage] = None,
|
||||
) -> Optional[DetectedIntent]:
|
||||
"""
|
||||
Detect intent from text with optional context.
|
||||
|
||||
Args:
|
||||
text: Input text (transcript)
|
||||
context: Previous messages for disambiguation
|
||||
|
||||
Returns:
|
||||
DetectedIntent or None if no clear intent
|
||||
"""
|
||||
# Normalize text
|
||||
normalized = self._normalize_text(text)
|
||||
|
||||
# Try pattern matching first
|
||||
pattern_result = self._pattern_match(normalized)
|
||||
if pattern_result and pattern_result.confidence > 0.6:
|
||||
logger.info(
|
||||
"Intent detected via pattern",
|
||||
type=pattern_result.type.value,
|
||||
confidence=pattern_result.confidence,
|
||||
)
|
||||
return pattern_result
|
||||
|
||||
# Fall back to LLM classification
|
||||
if settings.fallback_llm_provider != "none":
|
||||
llm_result = await self._llm_classify(normalized, context)
|
||||
if llm_result and llm_result.confidence > 0.5:
|
||||
logger.info(
|
||||
"Intent detected via LLM",
|
||||
type=llm_result.type.value,
|
||||
confidence=llm_result.confidence,
|
||||
)
|
||||
return llm_result
|
||||
|
||||
# Check for context-based disambiguation
|
||||
if context:
|
||||
context_result = self._context_disambiguate(normalized, context)
|
||||
if context_result:
|
||||
logger.info(
|
||||
"Intent detected via context",
|
||||
type=context_result.type.value,
|
||||
)
|
||||
return context_result
|
||||
|
||||
logger.debug("No intent detected", text=text[:50])
|
||||
return None
|
||||
|
||||
def _normalize_text(self, text: str) -> str:
|
||||
"""Normalize text for matching."""
|
||||
# Convert umlauts
|
||||
text = text.lower()
|
||||
text = text.replace("ä", "ae").replace("ö", "oe").replace("ü", "ue")
|
||||
text = text.replace("ß", "ss")
|
||||
# Remove extra whitespace
|
||||
text = " ".join(text.split())
|
||||
return text
|
||||
|
||||
def _pattern_match(self, text: str) -> Optional[DetectedIntent]:
|
||||
"""Match text against known patterns."""
|
||||
best_match = None
|
||||
best_confidence = 0.0
|
||||
|
||||
for task_type, patterns in self._compiled_patterns.items():
|
||||
for pattern in patterns:
|
||||
match = pattern.search(text)
|
||||
if match:
|
||||
# Calculate confidence based on match quality
|
||||
match_ratio = len(match.group()) / len(text)
|
||||
confidence = min(0.95, 0.6 + match_ratio * 0.4)
|
||||
|
||||
if confidence > best_confidence:
|
||||
# Extract parameters from groups
|
||||
parameters = self._extract_parameters(task_type, match, text)
|
||||
|
||||
best_match = DetectedIntent(
|
||||
type=task_type,
|
||||
confidence=confidence,
|
||||
parameters=parameters,
|
||||
is_actionable=self._is_actionable(task_type),
|
||||
)
|
||||
best_confidence = confidence
|
||||
|
||||
return best_match
|
||||
|
||||
def _extract_parameters(
|
||||
self,
|
||||
task_type: TaskType,
|
||||
match: re.Match,
|
||||
full_text: str,
|
||||
) -> Dict[str, Any]:
|
||||
"""Extract parameters from regex match."""
|
||||
params = {}
|
||||
|
||||
# Extract named groups or positional groups
|
||||
if match.groups():
|
||||
groups = match.groups()
|
||||
|
||||
# Task-specific parameter extraction
|
||||
if task_type == TaskType.STUDENT_OBSERVATION:
|
||||
params["student_name"] = groups[0] if groups else None
|
||||
|
||||
elif task_type == TaskType.HOMEWORK_CHECK:
|
||||
params["subject"] = "mathe" if "mathe" in full_text else None
|
||||
|
||||
elif task_type == TaskType.QUICK_ACTIVITY:
|
||||
params["duration_minutes"] = int(groups[0]) if groups else 10
|
||||
|
||||
elif task_type == TaskType.QUIZ_GENERATE:
|
||||
params["duration_minutes"] = int(groups[0]) if groups and groups[0].isdigit() else 10
|
||||
|
||||
elif task_type == TaskType.CLASS_MESSAGE:
|
||||
params["class_name"] = groups[0] if groups else None
|
||||
|
||||
# Extract time references
|
||||
time_match = re.search(r"(\d{1,2}):?(\d{2})?", full_text)
|
||||
if time_match:
|
||||
params["time"] = time_match.group()
|
||||
|
||||
# Extract content after colon
|
||||
colon_match = re.search(r":\s*(.+)$", full_text)
|
||||
if colon_match:
|
||||
params["content"] = colon_match.group(1).strip()
|
||||
|
||||
return params
|
||||
|
||||
def _is_actionable(self, task_type: TaskType) -> bool:
|
||||
"""Check if intent type creates an actionable task."""
|
||||
# All task types are actionable except queries
|
||||
query_types = [
|
||||
TaskType.OPERATOR_CHECKLIST,
|
||||
TaskType.EH_PASSAGE,
|
||||
TaskType.TASK_SUMMARY,
|
||||
]
|
||||
return task_type not in query_types
|
||||
|
||||
async def _llm_classify(
|
||||
self,
|
||||
text: str,
|
||||
context: List[TranscriptMessage] = None,
|
||||
) -> Optional[DetectedIntent]:
|
||||
"""Use LLM for intent classification."""
|
||||
from services.fallback_llm_client import FallbackLLMClient
|
||||
|
||||
llm = FallbackLLMClient()
|
||||
result = await llm.detect_intent(text)
|
||||
|
||||
if result.get("type") == "unknown":
|
||||
return None
|
||||
|
||||
try:
|
||||
task_type = TaskType(result["type"])
|
||||
return DetectedIntent(
|
||||
type=task_type,
|
||||
confidence=result.get("confidence", 0.5),
|
||||
parameters=result.get("parameters", {}),
|
||||
is_actionable=result.get("is_actionable", True),
|
||||
)
|
||||
except ValueError:
|
||||
logger.warning("Unknown task type from LLM", type=result.get("type"))
|
||||
return None
|
||||
|
||||
def _context_disambiguate(
|
||||
self,
|
||||
text: str,
|
||||
context: List[TranscriptMessage],
|
||||
) -> Optional[DetectedIntent]:
|
||||
"""Disambiguate intent using conversation context."""
|
||||
if not context:
|
||||
return None
|
||||
|
||||
# Look for continuation patterns
|
||||
continuation_words = ["ja", "genau", "richtig", "okay", "mach das", "bitte"]
|
||||
|
||||
if any(word in text.lower() for word in continuation_words):
|
||||
# Find the last assistant message with a suggestion
|
||||
for msg in reversed(context):
|
||||
if msg.role == "assistant" and msg.intent:
|
||||
try:
|
||||
return DetectedIntent(
|
||||
type=TaskType(msg.intent),
|
||||
confidence=0.6,
|
||||
parameters={},
|
||||
is_actionable=True,
|
||||
)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
return None
|
||||
286
voice-service/services/personaplex_client.py
Normal file
286
voice-service/services/personaplex_client.py
Normal file
@@ -0,0 +1,286 @@
|
||||
"""
|
||||
PersonaPlex-7B Client
|
||||
Full-Duplex Speech-to-Speech with NVIDIA's PersonaPlex model
|
||||
|
||||
Features:
|
||||
- Full-duplex audio streaming
|
||||
- 80ms latency target
|
||||
- 24kHz audio (Mimi codec compatible)
|
||||
- German language support
|
||||
- Teacher persona customization
|
||||
"""
|
||||
import structlog
|
||||
import asyncio
|
||||
import json
|
||||
from typing import Optional, AsyncIterator
|
||||
import websockets
|
||||
from websockets.client import WebSocketClientProtocol
|
||||
|
||||
from config import settings
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
|
||||
class PersonaPlexClient:
|
||||
"""
|
||||
WebSocket client for PersonaPlex-7B Full-Duplex model.
|
||||
|
||||
PersonaPlex is NVIDIA's speech-to-speech model that provides:
|
||||
- Real-time transcription
|
||||
- Intent understanding
|
||||
- Natural language responses
|
||||
- Voice synthesis
|
||||
|
||||
In development mode, this falls back to text-only processing.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._ws: Optional[WebSocketClientProtocol] = None
|
||||
self._connected = False
|
||||
self._persona_config: Optional[dict] = None
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""
|
||||
Connect to PersonaPlex WebSocket server.
|
||||
|
||||
Returns True if connected, False if in fallback mode.
|
||||
"""
|
||||
if not settings.use_personaplex:
|
||||
logger.info("PersonaPlex disabled, using fallback mode")
|
||||
return False
|
||||
|
||||
try:
|
||||
self._ws = await websockets.connect(
|
||||
settings.personaplex_ws_url,
|
||||
ping_interval=20,
|
||||
ping_timeout=10,
|
||||
)
|
||||
self._connected = True
|
||||
|
||||
# Send persona configuration
|
||||
if self._persona_config:
|
||||
await self._ws.send(json.dumps({
|
||||
"type": "config",
|
||||
"persona": self._persona_config,
|
||||
}))
|
||||
|
||||
logger.info("Connected to PersonaPlex")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("PersonaPlex connection failed, using fallback", error=str(e))
|
||||
self._connected = False
|
||||
return False
|
||||
|
||||
async def disconnect(self):
|
||||
"""Disconnect from PersonaPlex."""
|
||||
if self._ws:
|
||||
await self._ws.close()
|
||||
self._ws = None
|
||||
self._connected = False
|
||||
|
||||
def load_persona(self, persona_path: str = "personas/lehrer_persona.json"):
|
||||
"""
|
||||
Load persona configuration for voice customization.
|
||||
"""
|
||||
try:
|
||||
with open(persona_path, 'r') as f:
|
||||
self._persona_config = json.load(f)
|
||||
logger.info("Loaded persona", path=persona_path)
|
||||
except FileNotFoundError:
|
||||
logger.warning("Persona file not found, using defaults", path=persona_path)
|
||||
self._persona_config = self._default_persona()
|
||||
|
||||
def _default_persona(self) -> dict:
|
||||
"""Default teacher persona configuration."""
|
||||
return {
|
||||
"name": "Breakpilot Assistant",
|
||||
"language": "de-DE",
|
||||
"voice": {
|
||||
"gender": "neutral",
|
||||
"pitch": "medium",
|
||||
"speed": 1.0,
|
||||
},
|
||||
"style": {
|
||||
"formal": True,
|
||||
"friendly": True,
|
||||
"concise": True,
|
||||
},
|
||||
"domain_knowledge": [
|
||||
"education",
|
||||
"teaching",
|
||||
"school_administration",
|
||||
"student_assessment",
|
||||
],
|
||||
}
|
||||
|
||||
async def transcribe(self, audio_data: bytes) -> str:
|
||||
"""
|
||||
Transcribe audio to text.
|
||||
|
||||
Args:
|
||||
audio_data: PCM Int16 audio at 24kHz
|
||||
|
||||
Returns:
|
||||
Transcribed text
|
||||
"""
|
||||
if not self._connected:
|
||||
# Fallback: return empty (audio not processed)
|
||||
logger.debug("PersonaPlex not connected, skipping transcription")
|
||||
return ""
|
||||
|
||||
try:
|
||||
# Send audio for transcription
|
||||
await self._ws.send(audio_data)
|
||||
|
||||
# Wait for transcription response
|
||||
response = await asyncio.wait_for(
|
||||
self._ws.recv(),
|
||||
timeout=settings.personaplex_timeout,
|
||||
)
|
||||
|
||||
if isinstance(response, str):
|
||||
data = json.loads(response)
|
||||
if data.get("type") == "transcript":
|
||||
return data.get("text", "")
|
||||
|
||||
return ""
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("Transcription timeout")
|
||||
return ""
|
||||
except Exception as e:
|
||||
logger.error("Transcription failed", error=str(e))
|
||||
return ""
|
||||
|
||||
async def synthesize(self, text: str) -> bytes:
|
||||
"""
|
||||
Synthesize text to speech.
|
||||
|
||||
Args:
|
||||
text: Text to synthesize
|
||||
|
||||
Returns:
|
||||
PCM Int16 audio at 24kHz
|
||||
"""
|
||||
if not self._connected:
|
||||
logger.debug("PersonaPlex not connected, skipping synthesis")
|
||||
return b""
|
||||
|
||||
try:
|
||||
# Request synthesis
|
||||
await self._ws.send(json.dumps({
|
||||
"type": "synthesize",
|
||||
"text": text,
|
||||
}))
|
||||
|
||||
# Collect audio chunks
|
||||
audio_chunks = []
|
||||
|
||||
while True:
|
||||
response = await asyncio.wait_for(
|
||||
self._ws.recv(),
|
||||
timeout=settings.personaplex_timeout,
|
||||
)
|
||||
|
||||
if isinstance(response, bytes):
|
||||
audio_chunks.append(response)
|
||||
elif isinstance(response, str):
|
||||
data = json.loads(response)
|
||||
if data.get("type") == "synthesis_complete":
|
||||
break
|
||||
if data.get("type") == "error":
|
||||
logger.error("Synthesis error", error=data.get("message"))
|
||||
break
|
||||
|
||||
return b"".join(audio_chunks)
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("Synthesis timeout")
|
||||
return b""
|
||||
except Exception as e:
|
||||
logger.error("Synthesis failed", error=str(e))
|
||||
return b""
|
||||
|
||||
async def stream_conversation(
|
||||
self,
|
||||
audio_stream: AsyncIterator[bytes],
|
||||
) -> AsyncIterator[dict]:
|
||||
"""
|
||||
Full-duplex conversation streaming.
|
||||
|
||||
Yields dictionaries with:
|
||||
- type: "transcript" | "response_text" | "response_audio" | "intent"
|
||||
- content: The actual content
|
||||
"""
|
||||
if not self._connected:
|
||||
logger.debug("PersonaPlex not connected, skipping stream")
|
||||
return
|
||||
|
||||
try:
|
||||
# Start streaming task
|
||||
async def send_audio():
|
||||
async for chunk in audio_stream:
|
||||
if self._ws:
|
||||
await self._ws.send(chunk)
|
||||
|
||||
# Start receiving task
|
||||
send_task = asyncio.create_task(send_audio())
|
||||
|
||||
try:
|
||||
while True:
|
||||
response = await asyncio.wait_for(
|
||||
self._ws.recv(),
|
||||
timeout=settings.personaplex_timeout,
|
||||
)
|
||||
|
||||
if isinstance(response, bytes):
|
||||
yield {
|
||||
"type": "response_audio",
|
||||
"content": response,
|
||||
}
|
||||
elif isinstance(response, str):
|
||||
data = json.loads(response)
|
||||
yield data
|
||||
|
||||
if data.get("type") == "end_of_turn":
|
||||
break
|
||||
|
||||
finally:
|
||||
send_task.cancel()
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("Stream timeout")
|
||||
except Exception as e:
|
||||
logger.error("Stream failed", error=str(e))
|
||||
|
||||
async def detect_intent(self, text: str) -> Optional[dict]:
|
||||
"""
|
||||
Detect intent from text using PersonaPlex.
|
||||
|
||||
Returns intent dict or None.
|
||||
"""
|
||||
if not self._connected:
|
||||
return None
|
||||
|
||||
try:
|
||||
await self._ws.send(json.dumps({
|
||||
"type": "detect_intent",
|
||||
"text": text,
|
||||
}))
|
||||
|
||||
response = await asyncio.wait_for(
|
||||
self._ws.recv(),
|
||||
timeout=settings.personaplex_timeout,
|
||||
)
|
||||
|
||||
if isinstance(response, str):
|
||||
data = json.loads(response)
|
||||
if data.get("type") == "intent":
|
||||
return data
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Intent detection failed", error=str(e))
|
||||
return None
|
||||
382
voice-service/services/task_orchestrator.py
Normal file
382
voice-service/services/task_orchestrator.py
Normal file
@@ -0,0 +1,382 @@
|
||||
"""
|
||||
Task Orchestrator - Task State Machine
|
||||
Manages task lifecycle and routes to Breakpilot modules
|
||||
|
||||
The TaskOrchestrator is the agent orchestration layer that:
|
||||
1. Receives intents from voice input
|
||||
2. Creates and manages tasks
|
||||
3. Routes to appropriate Breakpilot modules
|
||||
4. Maintains conversation context
|
||||
5. Handles follow-up queries
|
||||
|
||||
Note: This is a safe, internal task router with no shell access,
|
||||
no email capabilities, and no external API access beyond internal services.
|
||||
"""
|
||||
import structlog
|
||||
import httpx
|
||||
from typing import Optional, List, Dict, Any
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from config import settings
|
||||
from models.task import Task, TaskState, TaskType, is_valid_transition
|
||||
from models.session import TranscriptMessage
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
|
||||
class Intent:
|
||||
"""Detected intent from voice input."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
type: TaskType,
|
||||
confidence: float,
|
||||
parameters: Dict[str, Any],
|
||||
is_actionable: bool = True,
|
||||
):
|
||||
self.type = type
|
||||
self.confidence = confidence
|
||||
self.parameters = parameters
|
||||
self.is_actionable = is_actionable
|
||||
|
||||
|
||||
class TaskOrchestrator:
|
||||
"""
|
||||
Task orchestration and state machine management.
|
||||
|
||||
Handles the full lifecycle of voice-initiated tasks:
|
||||
1. Intent -> Task creation
|
||||
2. Task queuing and execution
|
||||
3. Result handling
|
||||
4. Follow-up context
|
||||
|
||||
Security: This orchestrator only routes to internal Breakpilot services
|
||||
via HTTP. It has NO access to shell commands, emails, calendars, or
|
||||
external APIs.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._tasks: Dict[str, Task] = {}
|
||||
self._session_tasks: Dict[str, List[str]] = {} # session_id -> task_ids
|
||||
self._http_client: Optional[httpx.AsyncClient] = None
|
||||
|
||||
async def _get_client(self) -> httpx.AsyncClient:
|
||||
"""Get or create HTTP client."""
|
||||
if self._http_client is None:
|
||||
self._http_client = httpx.AsyncClient(timeout=30.0)
|
||||
return self._http_client
|
||||
|
||||
async def queue_task(self, task: Task):
|
||||
"""
|
||||
Queue a task for processing.
|
||||
Transitions from DRAFT to QUEUED.
|
||||
"""
|
||||
if task.state != TaskState.DRAFT:
|
||||
logger.warning("Task not in DRAFT state", task_id=task.id[:8])
|
||||
return
|
||||
|
||||
task.transition_to(TaskState.QUEUED, "queued_for_processing")
|
||||
|
||||
# Store task
|
||||
self._tasks[task.id] = task
|
||||
|
||||
# Add to session tasks
|
||||
if task.session_id not in self._session_tasks:
|
||||
self._session_tasks[task.session_id] = []
|
||||
self._session_tasks[task.session_id].append(task.id)
|
||||
|
||||
logger.info(
|
||||
"Task queued",
|
||||
task_id=task.id[:8],
|
||||
type=task.type.value,
|
||||
)
|
||||
|
||||
# Auto-process certain task types
|
||||
auto_process_types = [
|
||||
TaskType.STUDENT_OBSERVATION,
|
||||
TaskType.REMINDER,
|
||||
TaskType.HOMEWORK_CHECK,
|
||||
]
|
||||
|
||||
if task.type in auto_process_types:
|
||||
await self.process_task(task)
|
||||
|
||||
async def process_task(self, task: Task):
|
||||
"""
|
||||
Process a queued task.
|
||||
Routes to appropriate Breakpilot module.
|
||||
"""
|
||||
if task.state != TaskState.QUEUED:
|
||||
logger.warning("Task not in QUEUED state", task_id=task.id[:8])
|
||||
return
|
||||
|
||||
task.transition_to(TaskState.RUNNING, "processing_started")
|
||||
|
||||
try:
|
||||
# Route to appropriate handler
|
||||
result = await self._route_task(task)
|
||||
|
||||
# Store result
|
||||
task.result_ref = result
|
||||
|
||||
# Transition to READY
|
||||
task.transition_to(TaskState.READY, "processing_complete")
|
||||
|
||||
logger.info(
|
||||
"Task processed",
|
||||
task_id=task.id[:8],
|
||||
type=task.type.value,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Task processing failed", task_id=task.id[:8], error=str(e))
|
||||
task.error_message = str(e)
|
||||
task.transition_to(TaskState.READY, "processing_failed")
|
||||
|
||||
async def _route_task(self, task: Task) -> str:
|
||||
"""
|
||||
Route task to appropriate Breakpilot module.
|
||||
"""
|
||||
client = await self._get_client()
|
||||
|
||||
# Task type to endpoint mapping
|
||||
routes = {
|
||||
# Worksheet generation
|
||||
TaskType.WORKSHEET_GENERATE: f"{settings.klausur_service_url}/api/v1/worksheets/generate",
|
||||
TaskType.WORKSHEET_DIFFERENTIATE: f"{settings.klausur_service_url}/api/v1/worksheets/differentiate",
|
||||
|
||||
# Quick activities
|
||||
TaskType.QUICK_ACTIVITY: f"{settings.klausur_service_url}/api/v1/activities/generate",
|
||||
TaskType.QUIZ_GENERATE: f"{settings.klausur_service_url}/api/v1/quizzes/generate",
|
||||
|
||||
# Korrektur assistance
|
||||
TaskType.OPERATOR_CHECKLIST: f"{settings.klausur_service_url}/api/v1/corrections/operators",
|
||||
TaskType.EH_PASSAGE: f"{settings.klausur_service_url}/api/v1/corrections/eh-passage",
|
||||
TaskType.FEEDBACK_SUGGEST: f"{settings.klausur_service_url}/api/v1/corrections/feedback",
|
||||
}
|
||||
|
||||
# Check if this task type needs API routing
|
||||
if task.type in routes:
|
||||
try:
|
||||
response = await client.post(
|
||||
routes[task.type],
|
||||
json={
|
||||
"task_id": task.id,
|
||||
"namespace_id": task.namespace_id,
|
||||
"parameters": task.parameters,
|
||||
},
|
||||
timeout=settings.ollama_timeout,
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json().get("result", "")
|
||||
except httpx.HTTPError as e:
|
||||
logger.error("API call failed", url=routes[task.type], error=str(e))
|
||||
raise
|
||||
|
||||
# Handle local tasks (no API call needed)
|
||||
if task.type in [TaskType.STUDENT_OBSERVATION, TaskType.REMINDER, TaskType.HOMEWORK_CHECK]:
|
||||
return await self._handle_note_task(task)
|
||||
|
||||
if task.type in [TaskType.CONFERENCE_TOPIC, TaskType.CORRECTION_NOTE]:
|
||||
return await self._handle_note_task(task)
|
||||
|
||||
if task.type == TaskType.PARENT_LETTER:
|
||||
return await self._generate_parent_letter(task)
|
||||
|
||||
if task.type == TaskType.CLASS_MESSAGE:
|
||||
return await self._generate_class_message(task)
|
||||
|
||||
if task.type in [TaskType.CANVAS_EDIT, TaskType.CANVAS_LAYOUT]:
|
||||
return await self._handle_canvas_command(task)
|
||||
|
||||
if task.type == TaskType.REMINDER_SCHEDULE:
|
||||
return await self._schedule_reminder(task)
|
||||
|
||||
if task.type == TaskType.TASK_SUMMARY:
|
||||
return await self._generate_task_summary(task)
|
||||
|
||||
logger.warning("Unknown task type", task_type=task.type.value)
|
||||
return "Task type not implemented"
|
||||
|
||||
async def _handle_note_task(self, task: Task) -> str:
|
||||
"""Handle simple note/observation tasks."""
|
||||
# These are stored encrypted, no further processing needed
|
||||
return "Notiz gespeichert"
|
||||
|
||||
async def _generate_parent_letter(self, task: Task) -> str:
|
||||
"""Generate a parent letter using LLM."""
|
||||
from services.fallback_llm_client import FallbackLLMClient
|
||||
|
||||
llm = FallbackLLMClient()
|
||||
|
||||
prompt = f"""Erstelle einen neutralen, professionellen Elternbrief basierend auf:
|
||||
Anlass: {task.parameters.get('reason', 'Allgemeine Information')}
|
||||
Kontext: {task.parameters.get('context', '')}
|
||||
|
||||
Der Brief soll:
|
||||
- Sachlich und respektvoll formuliert sein
|
||||
- Keine Schuldzuweisungen enthalten
|
||||
- Konstruktiv auf Lösungen ausgerichtet sein
|
||||
- In der Ich-Form aus Lehrersicht geschrieben sein
|
||||
|
||||
Bitte nur den Brieftext ausgeben, ohne Metakommentare."""
|
||||
|
||||
result = await llm.generate(prompt)
|
||||
return result
|
||||
|
||||
async def _generate_class_message(self, task: Task) -> str:
|
||||
"""Generate a class message."""
|
||||
from services.fallback_llm_client import FallbackLLMClient
|
||||
|
||||
llm = FallbackLLMClient()
|
||||
|
||||
prompt = f"""Erstelle eine kurze Klassennachricht:
|
||||
Inhalt: {task.parameters.get('content', '')}
|
||||
Klasse: {task.parameters.get('class_ref', 'Klasse')}
|
||||
|
||||
Die Nachricht soll:
|
||||
- Kurz und klar formuliert sein
|
||||
- Freundlich aber verbindlich klingen
|
||||
- Alle wichtigen Informationen enthalten
|
||||
|
||||
Nur die Nachricht ausgeben."""
|
||||
|
||||
result = await llm.generate(prompt)
|
||||
return result
|
||||
|
||||
async def _handle_canvas_command(self, task: Task) -> str:
|
||||
"""Handle Canvas editor commands."""
|
||||
# Parse canvas commands and generate JSON instructions
|
||||
command = task.parameters.get('command', '')
|
||||
|
||||
# Map natural language to Canvas actions
|
||||
canvas_actions = []
|
||||
|
||||
if 'groesser' in command.lower() or 'größer' in command.lower():
|
||||
canvas_actions.append({"action": "resize", "target": "headings", "scale": 1.2})
|
||||
|
||||
if 'kleiner' in command.lower():
|
||||
canvas_actions.append({"action": "resize", "target": "spacing", "scale": 0.8})
|
||||
|
||||
if 'links' in command.lower():
|
||||
canvas_actions.append({"action": "move", "direction": "left"})
|
||||
|
||||
if 'rechts' in command.lower():
|
||||
canvas_actions.append({"action": "move", "direction": "right"})
|
||||
|
||||
if 'a4' in command.lower() or 'drucklayout' in command.lower():
|
||||
canvas_actions.append({"action": "layout", "format": "A4"})
|
||||
|
||||
return str(canvas_actions)
|
||||
|
||||
async def _schedule_reminder(self, task: Task) -> str:
|
||||
"""Schedule a reminder for later."""
|
||||
# In production, this would use a scheduler service
|
||||
reminder_time = task.parameters.get('time', 'tomorrow')
|
||||
reminder_content = task.parameters.get('content', '')
|
||||
|
||||
return f"Erinnerung geplant für {reminder_time}: {reminder_content}"
|
||||
|
||||
async def _generate_task_summary(self, task: Task) -> str:
|
||||
"""Generate a summary of pending tasks."""
|
||||
session_tasks = self._session_tasks.get(task.session_id, [])
|
||||
|
||||
pending = []
|
||||
for task_id in session_tasks:
|
||||
t = self._tasks.get(task_id)
|
||||
if t and t.state not in [TaskState.COMPLETED, TaskState.EXPIRED]:
|
||||
pending.append(f"- {t.type.value}: {t.state.value}")
|
||||
|
||||
if not pending:
|
||||
return "Keine offenen Aufgaben"
|
||||
|
||||
return "Offene Aufgaben:\n" + "\n".join(pending)
|
||||
|
||||
async def execute_task(self, task: Task):
|
||||
"""Execute an approved task."""
|
||||
if task.state != TaskState.APPROVED:
|
||||
logger.warning("Task not approved", task_id=task.id[:8])
|
||||
return
|
||||
|
||||
# Mark as completed
|
||||
task.transition_to(TaskState.COMPLETED, "user_approved")
|
||||
|
||||
logger.info("Task completed", task_id=task.id[:8])
|
||||
|
||||
async def get_session_tasks(
|
||||
self,
|
||||
session_id: str,
|
||||
state: Optional[TaskState] = None,
|
||||
) -> List[Task]:
|
||||
"""Get tasks for a session, optionally filtered by state."""
|
||||
task_ids = self._session_tasks.get(session_id, [])
|
||||
tasks = []
|
||||
|
||||
for task_id in task_ids:
|
||||
task = self._tasks.get(task_id)
|
||||
if task:
|
||||
if state is None or task.state == state:
|
||||
tasks.append(task)
|
||||
|
||||
return tasks
|
||||
|
||||
async def create_task_from_intent(
|
||||
self,
|
||||
session_id: str,
|
||||
namespace_id: str,
|
||||
intent: Intent,
|
||||
transcript: str,
|
||||
) -> Task:
|
||||
"""Create a task from a detected intent."""
|
||||
task = Task(
|
||||
session_id=session_id,
|
||||
namespace_id=namespace_id,
|
||||
type=intent.type,
|
||||
intent_text=transcript,
|
||||
parameters=intent.parameters,
|
||||
)
|
||||
|
||||
await self.queue_task(task)
|
||||
return task
|
||||
|
||||
async def generate_response(
|
||||
self,
|
||||
session_messages: List[TranscriptMessage],
|
||||
intent: Optional[Intent],
|
||||
namespace_id: str,
|
||||
) -> str:
|
||||
"""Generate a conversational response."""
|
||||
from services.fallback_llm_client import FallbackLLMClient
|
||||
|
||||
llm = FallbackLLMClient()
|
||||
|
||||
# Build conversation context
|
||||
context = "\n".join([
|
||||
f"{msg.role}: {msg.content}"
|
||||
for msg in session_messages[-5:] # Last 5 messages
|
||||
])
|
||||
|
||||
# Generate response based on intent
|
||||
if intent:
|
||||
if intent.type in [TaskType.STUDENT_OBSERVATION, TaskType.REMINDER]:
|
||||
return "Verstanden, ich habe mir das notiert."
|
||||
|
||||
if intent.type == TaskType.WORKSHEET_GENERATE:
|
||||
return "Ich erstelle das Arbeitsblatt. Das kann einen Moment dauern."
|
||||
|
||||
if intent.type == TaskType.PARENT_LETTER:
|
||||
return "Ich bereite einen Elternbrief vor."
|
||||
|
||||
if intent.type == TaskType.QUIZ_GENERATE:
|
||||
return "Ich generiere den Quiz. Einen Moment bitte."
|
||||
|
||||
# Default: use LLM for conversational response
|
||||
prompt = f"""Du bist ein hilfreicher Assistent für Lehrer.
|
||||
Konversation:
|
||||
{context}
|
||||
|
||||
Antworte kurz und hilfreich auf die letzte Nachricht des Nutzers.
|
||||
Halte die Antwort unter 50 Wörtern."""
|
||||
|
||||
response = await llm.generate(prompt)
|
||||
return response
|
||||
Reference in New Issue
Block a user