""" Fallback LLM Client - Ollama Integration Text-only fallback when PersonaPlex is not available Used in development on Mac Mini with: - qwen2.5:32b for conversation - Local processing (DSGVO-konform) """ import structlog import httpx from typing import Optional, List, Dict, Any from config import settings logger = structlog.get_logger(__name__) class FallbackLLMClient: """ Ollama LLM client for text-only processing. When PersonaPlex is not available (development mode), this client provides: - Intent detection (text-based) - Response generation - Task execution assistance Note: Audio transcription requires a separate ASR service (e.g., Whisper) when using this fallback. """ def __init__(self): self._base_url = settings.ollama_base_url self._model = settings.ollama_voice_model self._timeout = settings.ollama_timeout self._client: Optional[httpx.AsyncClient] = None async def _get_client(self) -> httpx.AsyncClient: """Get or create HTTP client.""" if self._client is None: self._client = httpx.AsyncClient(timeout=self._timeout) return self._client async def generate( self, prompt: str, system_prompt: Optional[str] = None, temperature: float = 0.7, max_tokens: int = 500, ) -> str: """ Generate text completion. Args: prompt: User prompt system_prompt: Optional system instructions temperature: Sampling temperature max_tokens: Maximum tokens to generate Returns: Generated text """ if settings.fallback_llm_provider == "none": logger.warning("No LLM provider configured") return "LLM nicht verfügbar" client = await self._get_client() # Build messages messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) messages.append({"role": "user", "content": prompt}) try: response = await client.post( f"{self._base_url}/api/chat", json={ "model": self._model, "messages": messages, "options": { "temperature": temperature, "num_predict": max_tokens, }, "stream": False, }, ) response.raise_for_status() data = response.json() return data.get("message", {}).get("content", "") except httpx.HTTPError as e: logger.error("Ollama request failed", error=str(e)) return "Fehler bei der Verarbeitung" except Exception as e: logger.error("Unexpected error", error=str(e)) return "Unerwarteter Fehler" async def detect_intent(self, text: str) -> Dict[str, Any]: """ Detect intent from text using LLM. Returns: { "type": "student_observation" | "reminder" | ..., "confidence": 0.0-1.0, "parameters": {...}, "is_actionable": bool } """ system_prompt = """Du bist ein Intent-Detektor für Lehrer-Sprachbefehle. Analysiere den Text und bestimme die Absicht. Mögliche Intents: - student_observation: Beobachtung zu einem Schüler - reminder: Erinnerung an etwas - homework_check: Hausaufgaben kontrollieren - conference_topic: Thema für Konferenz - correction_note: Notiz zur Korrektur - worksheet_generate: Arbeitsblatt erstellen - worksheet_differentiate: Differenzierung - quick_activity: Schnelle Aktivität - quiz_generate: Quiz erstellen - parent_letter: Elternbrief - class_message: Nachricht an Klasse - canvas_edit: Canvas bearbeiten - canvas_layout: Layout ändern - operator_checklist: Operatoren-Checkliste - eh_passage: EH-Passage suchen - feedback_suggest: Feedback vorschlagen - reminder_schedule: Erinnerung planen - task_summary: Aufgaben zusammenfassen - unknown: Unbekannt Antworte NUR mit JSON: {"type": "intent_name", "confidence": 0.0-1.0, "parameters": {...}, "is_actionable": true/false}""" result = await self.generate( prompt=f"Text: {text}", system_prompt=system_prompt, temperature=0.1, max_tokens=200, ) try: # Parse JSON from response import json # Find JSON in response start = result.find("{") end = result.rfind("}") + 1 if start >= 0 and end > start: return json.loads(result[start:end]) except Exception as e: logger.warning("Intent parsing failed", error=str(e)) return { "type": "unknown", "confidence": 0.0, "parameters": {}, "is_actionable": False, } async def process_audio_description(self, audio_data: bytes) -> str: """ Process audio by describing it (placeholder for ASR). In production, this would use Whisper or similar. For MVP, this returns a placeholder. """ # Calculate audio duration samples = len(audio_data) // 2 # 16-bit = 2 bytes duration_sec = samples / settings.audio_sample_rate logger.debug( "Audio received (no ASR in fallback mode)", duration_sec=duration_sec, bytes=len(audio_data), ) # Placeholder - in production, integrate with Whisper return "" async def chat( self, messages: List[Dict[str, str]], temperature: float = 0.7, ) -> str: """ Multi-turn conversation. Args: messages: List of {"role": "user"|"assistant", "content": "..."} temperature: Sampling temperature Returns: Assistant response """ if settings.fallback_llm_provider == "none": return "LLM nicht verfügbar" client = await self._get_client() # Add system prompt system_prompt = """Du bist Breakpilot, ein hilfreicher Assistent für Lehrer. Du hilfst bei: - Notizen und Beobachtungen - Unterrichtsvorbereitung - Elternkommunikation - Korrekturunterstützung Antworte kurz und präzise. Halte Antworten unter 100 Wörtern.""" full_messages = [{"role": "system", "content": system_prompt}] + messages try: response = await client.post( f"{self._base_url}/api/chat", json={ "model": self._model, "messages": full_messages, "options": { "temperature": temperature, "num_predict": 300, }, "stream": False, }, ) response.raise_for_status() data = response.json() return data.get("message", {}).get("content", "") except Exception as e: logger.error("Chat failed", error=str(e)) return "Entschuldigung, ein Fehler ist aufgetreten." async def health_check(self) -> bool: """Check if Ollama is available.""" if settings.fallback_llm_provider == "none": return False try: client = await self._get_client() response = await client.get(f"{self._base_url}/api/tags") return response.status_code == 200 except Exception: return False