Files
breakpilot-core/voice-service/services/fallback_llm_client.py

249 lines
7.5 KiB
Python

"""
Fallback LLM Client - Ollama Integration
Text-only fallback when PersonaPlex is not available
Used in development on Mac Mini with:
- qwen2.5:32b for conversation
- Local processing (DSGVO-konform)
"""
import structlog
import httpx
from typing import Optional, List, Dict, Any
from config import settings
logger = structlog.get_logger(__name__)
class FallbackLLMClient:
"""
Ollama LLM client for text-only processing.
When PersonaPlex is not available (development mode),
this client provides:
- Intent detection (text-based)
- Response generation
- Task execution assistance
Note: Audio transcription requires a separate ASR service
(e.g., Whisper) when using this fallback.
"""
def __init__(self):
self._base_url = settings.ollama_base_url
self._model = settings.ollama_voice_model
self._timeout = settings.ollama_timeout
self._client: Optional[httpx.AsyncClient] = None
async def _get_client(self) -> httpx.AsyncClient:
"""Get or create HTTP client."""
if self._client is None:
self._client = httpx.AsyncClient(timeout=self._timeout)
return self._client
async def generate(
self,
prompt: str,
system_prompt: Optional[str] = None,
temperature: float = 0.7,
max_tokens: int = 500,
) -> str:
"""
Generate text completion.
Args:
prompt: User prompt
system_prompt: Optional system instructions
temperature: Sampling temperature
max_tokens: Maximum tokens to generate
Returns:
Generated text
"""
if settings.fallback_llm_provider == "none":
logger.warning("No LLM provider configured")
return "LLM nicht verfügbar"
client = await self._get_client()
# Build messages
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
try:
response = await client.post(
f"{self._base_url}/api/chat",
json={
"model": self._model,
"messages": messages,
"options": {
"temperature": temperature,
"num_predict": max_tokens,
},
"stream": False,
},
)
response.raise_for_status()
data = response.json()
return data.get("message", {}).get("content", "")
except httpx.HTTPError as e:
logger.error("Ollama request failed", error=str(e))
return "Fehler bei der Verarbeitung"
except Exception as e:
logger.error("Unexpected error", error=str(e))
return "Unerwarteter Fehler"
async def detect_intent(self, text: str) -> Dict[str, Any]:
"""
Detect intent from text using LLM.
Returns:
{
"type": "student_observation" | "reminder" | ...,
"confidence": 0.0-1.0,
"parameters": {...},
"is_actionable": bool
}
"""
system_prompt = """Du bist ein Intent-Detektor für Lehrer-Sprachbefehle.
Analysiere den Text und bestimme die Absicht.
Mögliche Intents:
- student_observation: Beobachtung zu einem Schüler
- reminder: Erinnerung an etwas
- homework_check: Hausaufgaben kontrollieren
- conference_topic: Thema für Konferenz
- correction_note: Notiz zur Korrektur
- worksheet_generate: Arbeitsblatt erstellen
- worksheet_differentiate: Differenzierung
- quick_activity: Schnelle Aktivität
- quiz_generate: Quiz erstellen
- parent_letter: Elternbrief
- class_message: Nachricht an Klasse
- canvas_edit: Canvas bearbeiten
- canvas_layout: Layout ändern
- operator_checklist: Operatoren-Checkliste
- eh_passage: EH-Passage suchen
- feedback_suggest: Feedback vorschlagen
- reminder_schedule: Erinnerung planen
- task_summary: Aufgaben zusammenfassen
- unknown: Unbekannt
Antworte NUR mit JSON:
{"type": "intent_name", "confidence": 0.0-1.0, "parameters": {...}, "is_actionable": true/false}"""
result = await self.generate(
prompt=f"Text: {text}",
system_prompt=system_prompt,
temperature=0.1,
max_tokens=200,
)
try:
# Parse JSON from response
import json
# Find JSON in response
start = result.find("{")
end = result.rfind("}") + 1
if start >= 0 and end > start:
return json.loads(result[start:end])
except Exception as e:
logger.warning("Intent parsing failed", error=str(e))
return {
"type": "unknown",
"confidence": 0.0,
"parameters": {},
"is_actionable": False,
}
async def process_audio_description(self, audio_data: bytes) -> str:
"""
Process audio by describing it (placeholder for ASR).
In production, this would use Whisper or similar.
For MVP, this returns a placeholder.
"""
# Calculate audio duration
samples = len(audio_data) // 2 # 16-bit = 2 bytes
duration_sec = samples / settings.audio_sample_rate
logger.debug(
"Audio received (no ASR in fallback mode)",
duration_sec=duration_sec,
bytes=len(audio_data),
)
# Placeholder - in production, integrate with Whisper
return ""
async def chat(
self,
messages: List[Dict[str, str]],
temperature: float = 0.7,
) -> str:
"""
Multi-turn conversation.
Args:
messages: List of {"role": "user"|"assistant", "content": "..."}
temperature: Sampling temperature
Returns:
Assistant response
"""
if settings.fallback_llm_provider == "none":
return "LLM nicht verfügbar"
client = await self._get_client()
# Add system prompt
system_prompt = """Du bist Breakpilot, ein hilfreicher Assistent für Lehrer.
Du hilfst bei:
- Notizen und Beobachtungen
- Unterrichtsvorbereitung
- Elternkommunikation
- Korrekturunterstützung
Antworte kurz und präzise. Halte Antworten unter 100 Wörtern."""
full_messages = [{"role": "system", "content": system_prompt}] + messages
try:
response = await client.post(
f"{self._base_url}/api/chat",
json={
"model": self._model,
"messages": full_messages,
"options": {
"temperature": temperature,
"num_predict": 300,
},
"stream": False,
},
)
response.raise_for_status()
data = response.json()
return data.get("message", {}).get("content", "")
except Exception as e:
logger.error("Chat failed", error=str(e))
return "Entschuldigung, ein Fehler ist aufgetreten."
async def health_check(self) -> bool:
"""Check if Ollama is available."""
if settings.fallback_llm_provider == "none":
return False
try:
client = await self._get_client()
response = await client.get(f"{self._base_url}/api/tags")
return response.status_code == 200
except Exception:
return False