Initial commit: breakpilot-lehrer - Lehrer KI Platform

Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website, Klausur-Service, School-Service, Voice-Service, Geo-Service, BreakPilot Drive, Agent-Core Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 23:47:26 +01:00
commit 5a31f52310
1224 changed files with 425430 additions and 0 deletions
@@ -0,0 +1,286 @@
+"""
+PersonaPlex-7B Client
+Full-Duplex Speech-to-Speech with NVIDIA's PersonaPlex model
+
+Features:
+- Full-duplex audio streaming
+- 80ms latency target
+- 24kHz audio (Mimi codec compatible)
+- German language support
+- Teacher persona customization
+"""
+import structlog
+import asyncio
+import json
+from typing import Optional, AsyncIterator
+import websockets
+from websockets.client import WebSocketClientProtocol
+
+from config import settings
+
+logger = structlog.get_logger(__name__)
+
+
+class PersonaPlexClient:
+    """
+    WebSocket client for PersonaPlex-7B Full-Duplex model.
+
+    PersonaPlex is NVIDIA's speech-to-speech model that provides:
+    - Real-time transcription
+    - Intent understanding
+    - Natural language responses
+    - Voice synthesis
+
+    In development mode, this falls back to text-only processing.
+    """
+
+    def __init__(self):
+        self._ws: Optional[WebSocketClientProtocol] = None
+        self._connected = False
+        self._persona_config: Optional[dict] = None
+
+    async def connect(self) -> bool:
+        """
+        Connect to PersonaPlex WebSocket server.
+
+        Returns True if connected, False if in fallback mode.
+        """
+        if not settings.use_personaplex:
+            logger.info("PersonaPlex disabled, using fallback mode")
+            return False
+
+        try:
+            self._ws = await websockets.connect(
+                settings.personaplex_ws_url,
+                ping_interval=20,
+                ping_timeout=10,
+            )
+            self._connected = True
+
+            # Send persona configuration
+            if self._persona_config:
+                await self._ws.send(json.dumps({
+                    "type": "config",
+                    "persona": self._persona_config,
+                }))
+
+            logger.info("Connected to PersonaPlex")
+            return True
+
+        except Exception as e:
+            logger.warning("PersonaPlex connection failed, using fallback", error=str(e))
+            self._connected = False
+            return False
+
+    async def disconnect(self):
+        """Disconnect from PersonaPlex."""
+        if self._ws:
+            await self._ws.close()
+            self._ws = None
+            self._connected = False
+
+    def load_persona(self, persona_path: str = "personas/lehrer_persona.json"):
+        """
+        Load persona configuration for voice customization.
+        """
+        try:
+            with open(persona_path, 'r') as f:
+                self._persona_config = json.load(f)
+            logger.info("Loaded persona", path=persona_path)
+        except FileNotFoundError:
+            logger.warning("Persona file not found, using defaults", path=persona_path)
+            self._persona_config = self._default_persona()
+
+    def _default_persona(self) -> dict:
+        """Default teacher persona configuration."""
+        return {
+            "name": "Breakpilot Assistant",
+            "language": "de-DE",
+            "voice": {
+                "gender": "neutral",
+                "pitch": "medium",
+                "speed": 1.0,
+            },
+            "style": {
+                "formal": True,
+                "friendly": True,
+                "concise": True,
+            },
+            "domain_knowledge": [
+                "education",
+                "teaching",
+                "school_administration",
+                "student_assessment",
+            ],
+        }
+
+    async def transcribe(self, audio_data: bytes) -> str:
+        """
+        Transcribe audio to text.
+
+        Args:
+            audio_data: PCM Int16 audio at 24kHz
+
+        Returns:
+            Transcribed text
+        """
+        if not self._connected:
+            # Fallback: return empty (audio not processed)
+            logger.debug("PersonaPlex not connected, skipping transcription")
+            return ""
+
+        try:
+            # Send audio for transcription
+            await self._ws.send(audio_data)
+
+            # Wait for transcription response
+            response = await asyncio.wait_for(
+                self._ws.recv(),
+                timeout=settings.personaplex_timeout,
+            )
+
+            if isinstance(response, str):
+                data = json.loads(response)
+                if data.get("type") == "transcript":
+                    return data.get("text", "")
+
+            return ""
+
+        except asyncio.TimeoutError:
+            logger.warning("Transcription timeout")
+            return ""
+        except Exception as e:
+            logger.error("Transcription failed", error=str(e))
+            return ""
+
+    async def synthesize(self, text: str) -> bytes:
+        """
+        Synthesize text to speech.
+
+        Args:
+            text: Text to synthesize
+
+        Returns:
+            PCM Int16 audio at 24kHz
+        """
+        if not self._connected:
+            logger.debug("PersonaPlex not connected, skipping synthesis")
+            return b""
+
+        try:
+            # Request synthesis
+            await self._ws.send(json.dumps({
+                "type": "synthesize",
+                "text": text,
+            }))
+
+            # Collect audio chunks
+            audio_chunks = []
+
+            while True:
+                response = await asyncio.wait_for(
+                    self._ws.recv(),
+                    timeout=settings.personaplex_timeout,
+                )
+
+                if isinstance(response, bytes):
+                    audio_chunks.append(response)
+                elif isinstance(response, str):
+                    data = json.loads(response)
+                    if data.get("type") == "synthesis_complete":
+                        break
+                    if data.get("type") == "error":
+                        logger.error("Synthesis error", error=data.get("message"))
+                        break
+
+            return b"".join(audio_chunks)
+
+        except asyncio.TimeoutError:
+            logger.warning("Synthesis timeout")
+            return b""
+        except Exception as e:
+            logger.error("Synthesis failed", error=str(e))
+            return b""
+
+    async def stream_conversation(
+        self,
+        audio_stream: AsyncIterator[bytes],
+    ) -> AsyncIterator[dict]:
+        """
+        Full-duplex conversation streaming.
+
+        Yields dictionaries with:
+        - type: "transcript" | "response_text" | "response_audio" | "intent"
+        - content: The actual content
+        """
+        if not self._connected:
+            logger.debug("PersonaPlex not connected, skipping stream")
+            return
+
+        try:
+            # Start streaming task
+            async def send_audio():
+                async for chunk in audio_stream:
+                    if self._ws:
+                        await self._ws.send(chunk)
+
+            # Start receiving task
+            send_task = asyncio.create_task(send_audio())
+
+            try:
+                while True:
+                    response = await asyncio.wait_for(
+                        self._ws.recv(),
+                        timeout=settings.personaplex_timeout,
+                    )
+
+                    if isinstance(response, bytes):
+                        yield {
+                            "type": "response_audio",
+                            "content": response,
+                        }
+                    elif isinstance(response, str):
+                        data = json.loads(response)
+                        yield data
+
+                        if data.get("type") == "end_of_turn":
+                            break
+
+            finally:
+                send_task.cancel()
+
+        except asyncio.TimeoutError:
+            logger.warning("Stream timeout")
+        except Exception as e:
+            logger.error("Stream failed", error=str(e))
+
+    async def detect_intent(self, text: str) -> Optional[dict]:
+        """
+        Detect intent from text using PersonaPlex.
+
+        Returns intent dict or None.
+        """
+        if not self._connected:
+            return None
+
+        try:
+            await self._ws.send(json.dumps({
+                "type": "detect_intent",
+                "text": text,
+            }))
+
+            response = await asyncio.wait_for(
+                self._ws.recv(),
+                timeout=settings.personaplex_timeout,
+            )
+
+            if isinstance(response, str):
+                data = json.loads(response)
+                if data.get("type") == "intent":
+                    return data
+
+            return None
+
+        except Exception as e:
+            logger.error("Intent detection failed", error=str(e))
+            return None