breakpilot-lehrer/voice-service/services/personaplex_client.py

"""
PersonaPlex-7B Client
Full-Duplex Speech-to-Speech with NVIDIA's PersonaPlex model

Features:
- Full-duplex audio streaming
- 80ms latency target
- 24kHz audio (Mimi codec compatible)
- German language support
- Teacher persona customization
"""
import structlog
import asyncio
import json
from typing import Optional, AsyncIterator
import websockets
from websockets.client import WebSocketClientProtocol

from config import settings

logger = structlog.get_logger(__name__)


class PersonaPlexClient:
    """
    WebSocket client for PersonaPlex-7B Full-Duplex model.

    PersonaPlex is NVIDIA's speech-to-speech model that provides:
    - Real-time transcription
    - Intent understanding
    - Natural language responses
    - Voice synthesis

    In development mode, this falls back to text-only processing.
    """

    def __init__(self):
        self._ws: Optional[WebSocketClientProtocol] = None
        self._connected = False
        self._persona_config: Optional[dict] = None

    async def connect(self) -> bool:
        """
        Connect to PersonaPlex WebSocket server.

        Returns True if connected, False if in fallback mode.
        """
        if not settings.use_personaplex:
            logger.info("PersonaPlex disabled, using fallback mode")
            return False

        try:
            self._ws = await websockets.connect(
                settings.personaplex_ws_url,
                ping_interval=20,
                ping_timeout=10,
            )
            self._connected = True

            # Send persona configuration
            if self._persona_config:
                await self._ws.send(json.dumps({
                    "type": "config",
                    "persona": self._persona_config,
                }))

            logger.info("Connected to PersonaPlex")
            return True

        except Exception as e:
            logger.warning("PersonaPlex connection failed, using fallback", error=str(e))
            self._connected = False
            return False

    async def disconnect(self):
        """Disconnect from PersonaPlex."""
        if self._ws:
            await self._ws.close()
            self._ws = None
            self._connected = False

    def load_persona(self, persona_path: str = "personas/lehrer_persona.json"):
        """
        Load persona configuration for voice customization.
        """
        try:
            with open(persona_path, 'r') as f:
                self._persona_config = json.load(f)
            logger.info("Loaded persona", path=persona_path)
        except FileNotFoundError:
            logger.warning("Persona file not found, using defaults", path=persona_path)
            self._persona_config = self._default_persona()

    def _default_persona(self) -> dict:
        """Default teacher persona configuration."""
        return {
            "name": "Breakpilot Assistant",
            "language": "de-DE",
            "voice": {
                "gender": "neutral",
                "pitch": "medium",
                "speed": 1.0,
            },
            "style": {
                "formal": True,
                "friendly": True,
                "concise": True,
            },
            "domain_knowledge": [
                "education",
                "teaching",
                "school_administration",
                "student_assessment",
            ],
        }

    async def transcribe(self, audio_data: bytes) -> str:
        """
        Transcribe audio to text.

        Args:
            audio_data: PCM Int16 audio at 24kHz

        Returns:
            Transcribed text
        """
        if not self._connected:
            # Fallback: return empty (audio not processed)
            logger.debug("PersonaPlex not connected, skipping transcription")
            return ""

        try:
            # Send audio for transcription
            await self._ws.send(audio_data)

            # Wait for transcription response
            response = await asyncio.wait_for(
                self._ws.recv(),
                timeout=settings.personaplex_timeout,
            )

            if isinstance(response, str):
                data = json.loads(response)
                if data.get("type") == "transcript":
                    return data.get("text", "")

            return ""

        except asyncio.TimeoutError:
            logger.warning("Transcription timeout")
            return ""
        except Exception as e:
            logger.error("Transcription failed", error=str(e))
            return ""

    async def synthesize(self, text: str) -> bytes:
        """
        Synthesize text to speech.

        Args:
            text: Text to synthesize

        Returns:
            PCM Int16 audio at 24kHz
        """
        if not self._connected:
            logger.debug("PersonaPlex not connected, skipping synthesis")
            return b""

        try:
            # Request synthesis
            await self._ws.send(json.dumps({
                "type": "synthesize",
                "text": text,
            }))

            # Collect audio chunks
            audio_chunks = []

            while True:
                response = await asyncio.wait_for(
                    self._ws.recv(),
                    timeout=settings.personaplex_timeout,
                )

                if isinstance(response, bytes):
                    audio_chunks.append(response)
                elif isinstance(response, str):
                    data = json.loads(response)
                    if data.get("type") == "synthesis_complete":
                        break
                    if data.get("type") == "error":
                        logger.error("Synthesis error", error=data.get("message"))
                        break

            return b"".join(audio_chunks)

        except asyncio.TimeoutError:
            logger.warning("Synthesis timeout")
            return b""
        except Exception as e:
            logger.error("Synthesis failed", error=str(e))
            return b""

    async def stream_conversation(
        self,
        audio_stream: AsyncIterator[bytes],
    ) -> AsyncIterator[dict]:
        """
        Full-duplex conversation streaming.

        Yields dictionaries with:
        - type: "transcript" | "response_text" | "response_audio" | "intent"
        - content: The actual content
        """
        if not self._connected:
            logger.debug("PersonaPlex not connected, skipping stream")
            return

        try:
            # Start streaming task
            async def send_audio():
                async for chunk in audio_stream:
                    if self._ws:
                        await self._ws.send(chunk)

            # Start receiving task
            send_task = asyncio.create_task(send_audio())

            try:
                while True:
                    response = await asyncio.wait_for(
                        self._ws.recv(),
                        timeout=settings.personaplex_timeout,
                    )

                    if isinstance(response, bytes):
                        yield {
                            "type": "response_audio",
                            "content": response,
                        }
                    elif isinstance(response, str):
                        data = json.loads(response)
                        yield data

                        if data.get("type") == "end_of_turn":
                            break

            finally:
                send_task.cancel()

        except asyncio.TimeoutError:
            logger.warning("Stream timeout")
        except Exception as e:
            logger.error("Stream failed", error=str(e))

    async def detect_intent(self, text: str) -> Optional[dict]:
        """
        Detect intent from text using PersonaPlex.

        Returns intent dict or None.
        """
        if not self._connected:
            return None

        try:
            await self._ws.send(json.dumps({
                "type": "detect_intent",
                "text": text,
            }))

            response = await asyncio.wait_for(
                self._ws.recv(),
                timeout=settings.personaplex_timeout,
            )

            if isinstance(response, str):
                data = json.loads(response)
                if data.get("type") == "intent":
                    return data

            return None

        except Exception as e:
            logger.error("Intent detection failed", error=str(e))
            return None