/** * Compliance-Advisor LLM-Kaskade. * * Reihenfolge: * 1. OVH / LiteLLM (OpenAI-kompatibel, SSE-Streaming) — prod-LLM, wenn * OVH_LLM_URL + OVH_LLM_MODEL gesetzt sind. * 2. Ollama-Chat (NDJSON-Streaming) — lokale Entwicklung / Fallback. * * Auf prod zeigt OLLAMA_URL auf den Embedding-only-Dienst (kein Chat-Modell), * deshalb ist OVH dort der einzige funktionierende Pfad. Lokal (ohne OVH-Env) * laeuft der Advisor weiter ueber Ollama. Beide Quellen werden auf einen * einheitlichen Plain-Text-Stream normalisiert. */ const OLLAMA_URL = process.env.OLLAMA_URL || 'http://host.docker.internal:11434' const OLLAMA_MODEL = process.env.COMPLIANCE_LLM_MODEL || 'qwen2.5vl:32b' const OVH_URL = (process.env.OVH_LLM_URL || '').replace(/\/+$/, '') const OVH_MODEL = process.env.OVH_LLM_MODEL || '' const OVH_KEY = process.env.OVH_LLM_KEY || '' export interface ChatMessage { role: string content: string } const encoder = new TextEncoder() /** Extrahiert den Text-Delta aus einer Ollama-NDJSON-Zeile (message.content). */ export function parseOllamaLine(line: string): string | null { const t = line.trim() if (!t) return null try { const j = JSON.parse(t) return j?.message?.content || null } catch { return null } } /** Extrahiert den Text-Delta aus einer OpenAI/OVH-SSE-Zeile (choices[].delta.content). */ export function parseSSELine(line: string): string | null { const t = line.trim() if (!t.startsWith('data:')) return null const payload = t.slice(5).trim() if (!payload || payload === '[DONE]') return null try { const j = JSON.parse(payload) return j?.choices?.[0]?.delta?.content || null } catch { return null } } function textStream( upstream: Response, parseLine: (line: string) => string | null, ): ReadableStream { return new ReadableStream({ async start(controller) { const reader = upstream.body!.getReader() const decoder = new TextDecoder() let buf = '' try { for (;;) { const { done, value } = await reader.read() if (done) break buf += decoder.decode(value, { stream: true }) const lines = buf.split('\n') buf = lines.pop() || '' for (const line of lines) { const delta = parseLine(line) if (delta) controller.enqueue(encoder.encode(delta)) } } const tail = parseLine(buf) if (tail) controller.enqueue(encoder.encode(tail)) } finally { controller.close() } }, }) } async function tryOVH(messages: ChatMessage[]): Promise { if (!OVH_URL || !OVH_MODEL) return null try { const headers: Record = { 'Content-Type': 'application/json' } if (OVH_KEY) headers['Authorization'] = `Bearer ${OVH_KEY}` const r = await fetch(`${OVH_URL}/v1/chat/completions`, { method: 'POST', headers, body: JSON.stringify({ model: OVH_MODEL, messages, stream: true, temperature: 0.3, max_tokens: 4096, }), signal: AbortSignal.timeout(120000), }) return r.ok && r.body ? r : null } catch { return null } } async function tryOllama(messages: ChatMessage[]): Promise { try { const r = await fetch(`${OLLAMA_URL}/api/chat`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ model: OLLAMA_MODEL, messages, stream: true, think: false, keep_alive: '30m', options: { temperature: 0.3, num_predict: 4096, num_ctx: 8192 }, }), signal: AbortSignal.timeout(120000), }) return r.ok && r.body ? r : null } catch { return null } } /** * Liefert einen Plain-Text-Stream der LLM-Antwort. OVH zuerst (prod), dann * Ollama (Dev/Fallback). null = kein LLM erreichbar (Caller antwortet mit 502). */ export async function streamAdvisorAnswer( messages: ChatMessage[], ): Promise | null> { const ovh = await tryOVH(messages) if (ovh) return textStream(ovh, parseSSELine) const ollama = await tryOllama(messages) if (ollama) return textStream(ollama, parseOllamaLine) return null }