From 2f68646c2da94a2306715323f566aef80b5de9c8 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Fri, 12 Jun 2026 13:20:13 +0200 Subject: [PATCH] fix(advisor): keep_alive 30m gegen Modell-Kaltstart ("Load failed") MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ollama entlädt das 35b-Modell nach 5 Min Leerlauf → jede Frage danach startet es kalt (Modell-Load) und läuft in den Frontend-Timeout ("Load failed"). keep_alive='30m' im Chat-Request hält es warm. Co-Authored-By: Claude Opus 4.7 --- admin-compliance/app/api/sdk/compliance-advisor/chat/route.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/admin-compliance/app/api/sdk/compliance-advisor/chat/route.ts b/admin-compliance/app/api/sdk/compliance-advisor/chat/route.ts index 695ea00b..37b5c5ed 100644 --- a/admin-compliance/app/api/sdk/compliance-advisor/chat/route.ts +++ b/admin-compliance/app/api/sdk/compliance-advisor/chat/route.ts @@ -179,6 +179,9 @@ Der Nutzer hat "${countryLabel} (${validCountry})" gewaehlt. messages, stream: true, think: false, + // Modell im VRAM halten → kein Kaltstart bei der naechsten Frage + // (Kaltstart eines 35b-Modells war die Ursache fuer "Load failed"). + keep_alive: '30m', options: { temperature: 0.3, num_predict: 8192,