Initial commit: breakpilot-lehrer - Lehrer KI Platform
Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website, Klausur-Service, School-Service, Voice-Service, Geo-Service, BreakPilot Drive, Agent-Core Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
286
voice-service/services/personaplex_client.py
Normal file
286
voice-service/services/personaplex_client.py
Normal file
@@ -0,0 +1,286 @@
|
||||
"""
|
||||
PersonaPlex-7B Client
|
||||
Full-Duplex Speech-to-Speech with NVIDIA's PersonaPlex model
|
||||
|
||||
Features:
|
||||
- Full-duplex audio streaming
|
||||
- 80ms latency target
|
||||
- 24kHz audio (Mimi codec compatible)
|
||||
- German language support
|
||||
- Teacher persona customization
|
||||
"""
|
||||
import structlog
|
||||
import asyncio
|
||||
import json
|
||||
from typing import Optional, AsyncIterator
|
||||
import websockets
|
||||
from websockets.client import WebSocketClientProtocol
|
||||
|
||||
from config import settings
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
|
||||
class PersonaPlexClient:
|
||||
"""
|
||||
WebSocket client for PersonaPlex-7B Full-Duplex model.
|
||||
|
||||
PersonaPlex is NVIDIA's speech-to-speech model that provides:
|
||||
- Real-time transcription
|
||||
- Intent understanding
|
||||
- Natural language responses
|
||||
- Voice synthesis
|
||||
|
||||
In development mode, this falls back to text-only processing.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._ws: Optional[WebSocketClientProtocol] = None
|
||||
self._connected = False
|
||||
self._persona_config: Optional[dict] = None
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""
|
||||
Connect to PersonaPlex WebSocket server.
|
||||
|
||||
Returns True if connected, False if in fallback mode.
|
||||
"""
|
||||
if not settings.use_personaplex:
|
||||
logger.info("PersonaPlex disabled, using fallback mode")
|
||||
return False
|
||||
|
||||
try:
|
||||
self._ws = await websockets.connect(
|
||||
settings.personaplex_ws_url,
|
||||
ping_interval=20,
|
||||
ping_timeout=10,
|
||||
)
|
||||
self._connected = True
|
||||
|
||||
# Send persona configuration
|
||||
if self._persona_config:
|
||||
await self._ws.send(json.dumps({
|
||||
"type": "config",
|
||||
"persona": self._persona_config,
|
||||
}))
|
||||
|
||||
logger.info("Connected to PersonaPlex")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("PersonaPlex connection failed, using fallback", error=str(e))
|
||||
self._connected = False
|
||||
return False
|
||||
|
||||
async def disconnect(self):
|
||||
"""Disconnect from PersonaPlex."""
|
||||
if self._ws:
|
||||
await self._ws.close()
|
||||
self._ws = None
|
||||
self._connected = False
|
||||
|
||||
def load_persona(self, persona_path: str = "personas/lehrer_persona.json"):
|
||||
"""
|
||||
Load persona configuration for voice customization.
|
||||
"""
|
||||
try:
|
||||
with open(persona_path, 'r') as f:
|
||||
self._persona_config = json.load(f)
|
||||
logger.info("Loaded persona", path=persona_path)
|
||||
except FileNotFoundError:
|
||||
logger.warning("Persona file not found, using defaults", path=persona_path)
|
||||
self._persona_config = self._default_persona()
|
||||
|
||||
def _default_persona(self) -> dict:
|
||||
"""Default teacher persona configuration."""
|
||||
return {
|
||||
"name": "Breakpilot Assistant",
|
||||
"language": "de-DE",
|
||||
"voice": {
|
||||
"gender": "neutral",
|
||||
"pitch": "medium",
|
||||
"speed": 1.0,
|
||||
},
|
||||
"style": {
|
||||
"formal": True,
|
||||
"friendly": True,
|
||||
"concise": True,
|
||||
},
|
||||
"domain_knowledge": [
|
||||
"education",
|
||||
"teaching",
|
||||
"school_administration",
|
||||
"student_assessment",
|
||||
],
|
||||
}
|
||||
|
||||
async def transcribe(self, audio_data: bytes) -> str:
|
||||
"""
|
||||
Transcribe audio to text.
|
||||
|
||||
Args:
|
||||
audio_data: PCM Int16 audio at 24kHz
|
||||
|
||||
Returns:
|
||||
Transcribed text
|
||||
"""
|
||||
if not self._connected:
|
||||
# Fallback: return empty (audio not processed)
|
||||
logger.debug("PersonaPlex not connected, skipping transcription")
|
||||
return ""
|
||||
|
||||
try:
|
||||
# Send audio for transcription
|
||||
await self._ws.send(audio_data)
|
||||
|
||||
# Wait for transcription response
|
||||
response = await asyncio.wait_for(
|
||||
self._ws.recv(),
|
||||
timeout=settings.personaplex_timeout,
|
||||
)
|
||||
|
||||
if isinstance(response, str):
|
||||
data = json.loads(response)
|
||||
if data.get("type") == "transcript":
|
||||
return data.get("text", "")
|
||||
|
||||
return ""
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("Transcription timeout")
|
||||
return ""
|
||||
except Exception as e:
|
||||
logger.error("Transcription failed", error=str(e))
|
||||
return ""
|
||||
|
||||
async def synthesize(self, text: str) -> bytes:
|
||||
"""
|
||||
Synthesize text to speech.
|
||||
|
||||
Args:
|
||||
text: Text to synthesize
|
||||
|
||||
Returns:
|
||||
PCM Int16 audio at 24kHz
|
||||
"""
|
||||
if not self._connected:
|
||||
logger.debug("PersonaPlex not connected, skipping synthesis")
|
||||
return b""
|
||||
|
||||
try:
|
||||
# Request synthesis
|
||||
await self._ws.send(json.dumps({
|
||||
"type": "synthesize",
|
||||
"text": text,
|
||||
}))
|
||||
|
||||
# Collect audio chunks
|
||||
audio_chunks = []
|
||||
|
||||
while True:
|
||||
response = await asyncio.wait_for(
|
||||
self._ws.recv(),
|
||||
timeout=settings.personaplex_timeout,
|
||||
)
|
||||
|
||||
if isinstance(response, bytes):
|
||||
audio_chunks.append(response)
|
||||
elif isinstance(response, str):
|
||||
data = json.loads(response)
|
||||
if data.get("type") == "synthesis_complete":
|
||||
break
|
||||
if data.get("type") == "error":
|
||||
logger.error("Synthesis error", error=data.get("message"))
|
||||
break
|
||||
|
||||
return b"".join(audio_chunks)
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("Synthesis timeout")
|
||||
return b""
|
||||
except Exception as e:
|
||||
logger.error("Synthesis failed", error=str(e))
|
||||
return b""
|
||||
|
||||
async def stream_conversation(
|
||||
self,
|
||||
audio_stream: AsyncIterator[bytes],
|
||||
) -> AsyncIterator[dict]:
|
||||
"""
|
||||
Full-duplex conversation streaming.
|
||||
|
||||
Yields dictionaries with:
|
||||
- type: "transcript" | "response_text" | "response_audio" | "intent"
|
||||
- content: The actual content
|
||||
"""
|
||||
if not self._connected:
|
||||
logger.debug("PersonaPlex not connected, skipping stream")
|
||||
return
|
||||
|
||||
try:
|
||||
# Start streaming task
|
||||
async def send_audio():
|
||||
async for chunk in audio_stream:
|
||||
if self._ws:
|
||||
await self._ws.send(chunk)
|
||||
|
||||
# Start receiving task
|
||||
send_task = asyncio.create_task(send_audio())
|
||||
|
||||
try:
|
||||
while True:
|
||||
response = await asyncio.wait_for(
|
||||
self._ws.recv(),
|
||||
timeout=settings.personaplex_timeout,
|
||||
)
|
||||
|
||||
if isinstance(response, bytes):
|
||||
yield {
|
||||
"type": "response_audio",
|
||||
"content": response,
|
||||
}
|
||||
elif isinstance(response, str):
|
||||
data = json.loads(response)
|
||||
yield data
|
||||
|
||||
if data.get("type") == "end_of_turn":
|
||||
break
|
||||
|
||||
finally:
|
||||
send_task.cancel()
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("Stream timeout")
|
||||
except Exception as e:
|
||||
logger.error("Stream failed", error=str(e))
|
||||
|
||||
async def detect_intent(self, text: str) -> Optional[dict]:
|
||||
"""
|
||||
Detect intent from text using PersonaPlex.
|
||||
|
||||
Returns intent dict or None.
|
||||
"""
|
||||
if not self._connected:
|
||||
return None
|
||||
|
||||
try:
|
||||
await self._ws.send(json.dumps({
|
||||
"type": "detect_intent",
|
||||
"text": text,
|
||||
}))
|
||||
|
||||
response = await asyncio.wait_for(
|
||||
self._ws.recv(),
|
||||
timeout=settings.personaplex_timeout,
|
||||
)
|
||||
|
||||
if isinstance(response, str):
|
||||
data = json.loads(response)
|
||||
if data.get("type") == "intent":
|
||||
return data
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Intent detection failed", error=str(e))
|
||||
return None
|
||||
Reference in New Issue
Block a user