""" PersonaPlex-7B Client Full-Duplex Speech-to-Speech with NVIDIA's PersonaPlex model Features: - Full-duplex audio streaming - 80ms latency target - 24kHz audio (Mimi codec compatible) - German language support - Teacher persona customization """ import structlog import asyncio import json from typing import Optional, AsyncIterator import websockets from websockets.client import WebSocketClientProtocol from config import settings logger = structlog.get_logger(__name__) class PersonaPlexClient: """ WebSocket client for PersonaPlex-7B Full-Duplex model. PersonaPlex is NVIDIA's speech-to-speech model that provides: - Real-time transcription - Intent understanding - Natural language responses - Voice synthesis In development mode, this falls back to text-only processing. """ def __init__(self): self._ws: Optional[WebSocketClientProtocol] = None self._connected = False self._persona_config: Optional[dict] = None async def connect(self) -> bool: """ Connect to PersonaPlex WebSocket server. Returns True if connected, False if in fallback mode. """ if not settings.use_personaplex: logger.info("PersonaPlex disabled, using fallback mode") return False try: self._ws = await websockets.connect( settings.personaplex_ws_url, ping_interval=20, ping_timeout=10, ) self._connected = True # Send persona configuration if self._persona_config: await self._ws.send(json.dumps({ "type": "config", "persona": self._persona_config, })) logger.info("Connected to PersonaPlex") return True except Exception as e: logger.warning("PersonaPlex connection failed, using fallback", error=str(e)) self._connected = False return False async def disconnect(self): """Disconnect from PersonaPlex.""" if self._ws: await self._ws.close() self._ws = None self._connected = False def load_persona(self, persona_path: str = "personas/lehrer_persona.json"): """ Load persona configuration for voice customization. """ try: with open(persona_path, 'r') as f: self._persona_config = json.load(f) logger.info("Loaded persona", path=persona_path) except FileNotFoundError: logger.warning("Persona file not found, using defaults", path=persona_path) self._persona_config = self._default_persona() def _default_persona(self) -> dict: """Default teacher persona configuration.""" return { "name": "Breakpilot Assistant", "language": "de-DE", "voice": { "gender": "neutral", "pitch": "medium", "speed": 1.0, }, "style": { "formal": True, "friendly": True, "concise": True, }, "domain_knowledge": [ "education", "teaching", "school_administration", "student_assessment", ], } async def transcribe(self, audio_data: bytes) -> str: """ Transcribe audio to text. Args: audio_data: PCM Int16 audio at 24kHz Returns: Transcribed text """ if not self._connected: # Fallback: return empty (audio not processed) logger.debug("PersonaPlex not connected, skipping transcription") return "" try: # Send audio for transcription await self._ws.send(audio_data) # Wait for transcription response response = await asyncio.wait_for( self._ws.recv(), timeout=settings.personaplex_timeout, ) if isinstance(response, str): data = json.loads(response) if data.get("type") == "transcript": return data.get("text", "") return "" except asyncio.TimeoutError: logger.warning("Transcription timeout") return "" except Exception as e: logger.error("Transcription failed", error=str(e)) return "" async def synthesize(self, text: str) -> bytes: """ Synthesize text to speech. Args: text: Text to synthesize Returns: PCM Int16 audio at 24kHz """ if not self._connected: logger.debug("PersonaPlex not connected, skipping synthesis") return b"" try: # Request synthesis await self._ws.send(json.dumps({ "type": "synthesize", "text": text, })) # Collect audio chunks audio_chunks = [] while True: response = await asyncio.wait_for( self._ws.recv(), timeout=settings.personaplex_timeout, ) if isinstance(response, bytes): audio_chunks.append(response) elif isinstance(response, str): data = json.loads(response) if data.get("type") == "synthesis_complete": break if data.get("type") == "error": logger.error("Synthesis error", error=data.get("message")) break return b"".join(audio_chunks) except asyncio.TimeoutError: logger.warning("Synthesis timeout") return b"" except Exception as e: logger.error("Synthesis failed", error=str(e)) return b"" async def stream_conversation( self, audio_stream: AsyncIterator[bytes], ) -> AsyncIterator[dict]: """ Full-duplex conversation streaming. Yields dictionaries with: - type: "transcript" | "response_text" | "response_audio" | "intent" - content: The actual content """ if not self._connected: logger.debug("PersonaPlex not connected, skipping stream") return try: # Start streaming task async def send_audio(): async for chunk in audio_stream: if self._ws: await self._ws.send(chunk) # Start receiving task send_task = asyncio.create_task(send_audio()) try: while True: response = await asyncio.wait_for( self._ws.recv(), timeout=settings.personaplex_timeout, ) if isinstance(response, bytes): yield { "type": "response_audio", "content": response, } elif isinstance(response, str): data = json.loads(response) yield data if data.get("type") == "end_of_turn": break finally: send_task.cancel() except asyncio.TimeoutError: logger.warning("Stream timeout") except Exception as e: logger.error("Stream failed", error=str(e)) async def detect_intent(self, text: str) -> Optional[dict]: """ Detect intent from text using PersonaPlex. Returns intent dict or None. """ if not self._connected: return None try: await self._ws.send(json.dumps({ "type": "detect_intent", "text": text, })) response = await asyncio.wait_for( self._ws.recv(), timeout=settings.personaplex_timeout, ) if isinstance(response, str): data = json.loads(response) if data.get("type") == "intent": return data return None except Exception as e: logger.error("Intent detection failed", error=str(e)) return None