Some checks failed
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-school (push) Successful in 25s
CI / test-go-edu-search (push) Successful in 26s
CI / test-python-klausur (push) Failing after 1m55s
CI / test-python-agent-core (push) Successful in 16s
CI / test-nodejs-website (push) Successful in 18s
- Voice-Service von Core nach Lehrer verschoben (bp-lehrer-voice-service) - 4 Jitsi-Services + 2 Synapse-Services in docker-compose.yml aufgenommen - Camunda komplett gelöscht: workflow pages, workflow-config.ts, bpmn-js deps - CAMUNDA_URL aus backend-lehrer environment entfernt - Sidebar: Kategorie "Compliance SDK" + "Katalogverwaltung" entfernt - Sidebar: Neue Kategorie "Kommunikation" mit Video & Chat, Voice Service, Alerts Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
287 lines
8.4 KiB
Python
287 lines
8.4 KiB
Python
"""
|
|
PersonaPlex-7B Client
|
|
Full-Duplex Speech-to-Speech with NVIDIA's PersonaPlex model
|
|
|
|
Features:
|
|
- Full-duplex audio streaming
|
|
- 80ms latency target
|
|
- 24kHz audio (Mimi codec compatible)
|
|
- German language support
|
|
- Teacher persona customization
|
|
"""
|
|
import structlog
|
|
import asyncio
|
|
import json
|
|
from typing import Optional, AsyncIterator
|
|
import websockets
|
|
from websockets.client import WebSocketClientProtocol
|
|
|
|
from config import settings
|
|
|
|
logger = structlog.get_logger(__name__)
|
|
|
|
|
|
class PersonaPlexClient:
|
|
"""
|
|
WebSocket client for PersonaPlex-7B Full-Duplex model.
|
|
|
|
PersonaPlex is NVIDIA's speech-to-speech model that provides:
|
|
- Real-time transcription
|
|
- Intent understanding
|
|
- Natural language responses
|
|
- Voice synthesis
|
|
|
|
In development mode, this falls back to text-only processing.
|
|
"""
|
|
|
|
def __init__(self):
|
|
self._ws: Optional[WebSocketClientProtocol] = None
|
|
self._connected = False
|
|
self._persona_config: Optional[dict] = None
|
|
|
|
async def connect(self) -> bool:
|
|
"""
|
|
Connect to PersonaPlex WebSocket server.
|
|
|
|
Returns True if connected, False if in fallback mode.
|
|
"""
|
|
if not settings.use_personaplex:
|
|
logger.info("PersonaPlex disabled, using fallback mode")
|
|
return False
|
|
|
|
try:
|
|
self._ws = await websockets.connect(
|
|
settings.personaplex_ws_url,
|
|
ping_interval=20,
|
|
ping_timeout=10,
|
|
)
|
|
self._connected = True
|
|
|
|
# Send persona configuration
|
|
if self._persona_config:
|
|
await self._ws.send(json.dumps({
|
|
"type": "config",
|
|
"persona": self._persona_config,
|
|
}))
|
|
|
|
logger.info("Connected to PersonaPlex")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.warning("PersonaPlex connection failed, using fallback", error=str(e))
|
|
self._connected = False
|
|
return False
|
|
|
|
async def disconnect(self):
|
|
"""Disconnect from PersonaPlex."""
|
|
if self._ws:
|
|
await self._ws.close()
|
|
self._ws = None
|
|
self._connected = False
|
|
|
|
def load_persona(self, persona_path: str = "personas/lehrer_persona.json"):
|
|
"""
|
|
Load persona configuration for voice customization.
|
|
"""
|
|
try:
|
|
with open(persona_path, 'r') as f:
|
|
self._persona_config = json.load(f)
|
|
logger.info("Loaded persona", path=persona_path)
|
|
except FileNotFoundError:
|
|
logger.warning("Persona file not found, using defaults", path=persona_path)
|
|
self._persona_config = self._default_persona()
|
|
|
|
def _default_persona(self) -> dict:
|
|
"""Default teacher persona configuration."""
|
|
return {
|
|
"name": "Breakpilot Assistant",
|
|
"language": "de-DE",
|
|
"voice": {
|
|
"gender": "neutral",
|
|
"pitch": "medium",
|
|
"speed": 1.0,
|
|
},
|
|
"style": {
|
|
"formal": True,
|
|
"friendly": True,
|
|
"concise": True,
|
|
},
|
|
"domain_knowledge": [
|
|
"education",
|
|
"teaching",
|
|
"school_administration",
|
|
"student_assessment",
|
|
],
|
|
}
|
|
|
|
async def transcribe(self, audio_data: bytes) -> str:
|
|
"""
|
|
Transcribe audio to text.
|
|
|
|
Args:
|
|
audio_data: PCM Int16 audio at 24kHz
|
|
|
|
Returns:
|
|
Transcribed text
|
|
"""
|
|
if not self._connected:
|
|
# Fallback: return empty (audio not processed)
|
|
logger.debug("PersonaPlex not connected, skipping transcription")
|
|
return ""
|
|
|
|
try:
|
|
# Send audio for transcription
|
|
await self._ws.send(audio_data)
|
|
|
|
# Wait for transcription response
|
|
response = await asyncio.wait_for(
|
|
self._ws.recv(),
|
|
timeout=settings.personaplex_timeout,
|
|
)
|
|
|
|
if isinstance(response, str):
|
|
data = json.loads(response)
|
|
if data.get("type") == "transcript":
|
|
return data.get("text", "")
|
|
|
|
return ""
|
|
|
|
except asyncio.TimeoutError:
|
|
logger.warning("Transcription timeout")
|
|
return ""
|
|
except Exception as e:
|
|
logger.error("Transcription failed", error=str(e))
|
|
return ""
|
|
|
|
async def synthesize(self, text: str) -> bytes:
|
|
"""
|
|
Synthesize text to speech.
|
|
|
|
Args:
|
|
text: Text to synthesize
|
|
|
|
Returns:
|
|
PCM Int16 audio at 24kHz
|
|
"""
|
|
if not self._connected:
|
|
logger.debug("PersonaPlex not connected, skipping synthesis")
|
|
return b""
|
|
|
|
try:
|
|
# Request synthesis
|
|
await self._ws.send(json.dumps({
|
|
"type": "synthesize",
|
|
"text": text,
|
|
}))
|
|
|
|
# Collect audio chunks
|
|
audio_chunks = []
|
|
|
|
while True:
|
|
response = await asyncio.wait_for(
|
|
self._ws.recv(),
|
|
timeout=settings.personaplex_timeout,
|
|
)
|
|
|
|
if isinstance(response, bytes):
|
|
audio_chunks.append(response)
|
|
elif isinstance(response, str):
|
|
data = json.loads(response)
|
|
if data.get("type") == "synthesis_complete":
|
|
break
|
|
if data.get("type") == "error":
|
|
logger.error("Synthesis error", error=data.get("message"))
|
|
break
|
|
|
|
return b"".join(audio_chunks)
|
|
|
|
except asyncio.TimeoutError:
|
|
logger.warning("Synthesis timeout")
|
|
return b""
|
|
except Exception as e:
|
|
logger.error("Synthesis failed", error=str(e))
|
|
return b""
|
|
|
|
async def stream_conversation(
|
|
self,
|
|
audio_stream: AsyncIterator[bytes],
|
|
) -> AsyncIterator[dict]:
|
|
"""
|
|
Full-duplex conversation streaming.
|
|
|
|
Yields dictionaries with:
|
|
- type: "transcript" | "response_text" | "response_audio" | "intent"
|
|
- content: The actual content
|
|
"""
|
|
if not self._connected:
|
|
logger.debug("PersonaPlex not connected, skipping stream")
|
|
return
|
|
|
|
try:
|
|
# Start streaming task
|
|
async def send_audio():
|
|
async for chunk in audio_stream:
|
|
if self._ws:
|
|
await self._ws.send(chunk)
|
|
|
|
# Start receiving task
|
|
send_task = asyncio.create_task(send_audio())
|
|
|
|
try:
|
|
while True:
|
|
response = await asyncio.wait_for(
|
|
self._ws.recv(),
|
|
timeout=settings.personaplex_timeout,
|
|
)
|
|
|
|
if isinstance(response, bytes):
|
|
yield {
|
|
"type": "response_audio",
|
|
"content": response,
|
|
}
|
|
elif isinstance(response, str):
|
|
data = json.loads(response)
|
|
yield data
|
|
|
|
if data.get("type") == "end_of_turn":
|
|
break
|
|
|
|
finally:
|
|
send_task.cancel()
|
|
|
|
except asyncio.TimeoutError:
|
|
logger.warning("Stream timeout")
|
|
except Exception as e:
|
|
logger.error("Stream failed", error=str(e))
|
|
|
|
async def detect_intent(self, text: str) -> Optional[dict]:
|
|
"""
|
|
Detect intent from text using PersonaPlex.
|
|
|
|
Returns intent dict or None.
|
|
"""
|
|
if not self._connected:
|
|
return None
|
|
|
|
try:
|
|
await self._ws.send(json.dumps({
|
|
"type": "detect_intent",
|
|
"text": text,
|
|
}))
|
|
|
|
response = await asyncio.wait_for(
|
|
self._ws.recv(),
|
|
timeout=settings.personaplex_timeout,
|
|
)
|
|
|
|
if isinstance(response, str):
|
|
data = json.loads(response)
|
|
if data.get("type") == "intent":
|
|
return data
|
|
|
|
return None
|
|
|
|
except Exception as e:
|
|
logger.error("Intent detection failed", error=str(e))
|
|
return None
|