fix(llm): qwen3.5 think:false + num_ctx 8192 in allen Chat/Draft-Routen

Compliance Advisor, Drafting Agent und Validator haben nicht geantwortet weil qwen3.5 standardmaessig im Thinking-Mode laeuft (interne Chain-of- Thought > 2min Timeout). Keiner der Agenten benoetigt Thinking-Mode — alle Aufgaben sind Chat/Textgenerierung/JSON-Validierung ohne tiefes Reasoning. think:false sorgt fuer direkte schnelle Antworten. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-06 08:35:53 +01:00
parent adc95267bd
commit 960b8e757c
4 changed files with 10 additions and 3 deletions
@@ -178,9 +178,11 @@ Der Nutzer hat "${countryLabel} (${validCountry})" gewaehlt.
        model: LLM_MODEL,
        messages,
        stream: true,
+        think: false,
        options: {
          temperature: 0.3,
          num_predict: 8192,
+          num_ctx: 8192,
        },
      }),
      signal: AbortSignal.timeout(120000),
@@ -88,9 +88,11 @@ export async function POST(request: NextRequest) {
        model: LLM_MODEL,
        messages,
        stream: true,
+        think: false,
        options: {
          temperature: mode === 'draft' ? 0.2 : 0.3,
          num_predict: mode === 'draft' ? 16384 : 8192,
+          num_ctx: 8192,
        },
      }),
      signal: AbortSignal.timeout(120000),
@@ -131,7 +131,8 @@ async function handleV1Draft(body: Record<string, unknown>): Promise<NextRespons
      model: LLM_MODEL,
      messages,
      stream: false,
-      options: { temperature: 0.15, num_predict: 16384 },
+      think: false,
+      options: { temperature: 0.15, num_predict: 16384, num_ctx: 8192 },
      format: 'json',
    }),
    signal: AbortSignal.timeout(180000),
@@ -327,7 +328,8 @@ async function callOllama(systemPrompt: string, userPrompt: string): Promise<str
        { role: 'user', content: userPrompt },
      ],
      stream: false,
-      options: { temperature: 0.15, num_predict: 4096 },
+      think: false,
+      options: { temperature: 0.15, num_predict: 4096, num_ctx: 8192 },
      format: 'json',
    }),
    signal: AbortSignal.timeout(120000),
@@ -187,7 +187,8 @@ export async function POST(request: NextRequest) {
            { role: 'user', content: crossCheckPrompt },
          ],
          stream: false,
-          options: { temperature: 0.1, num_predict: 8192 },
+          think: false,
+          options: { temperature: 0.1, num_predict: 8192, num_ctx: 8192 },
          format: 'json',
        }),
        signal: AbortSignal.timeout(120000),