fix(llm): qwen3.5 think:false + num_ctx 8192 in allen Chat/Draft-Routen
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 35s
CI / test-python-backend-compliance (push) Successful in 31s
CI / test-python-document-crawler (push) Successful in 22s
CI / test-python-dsms-gateway (push) Successful in 18s
All checks were successful
CI / go-lint (push) Has been skipped
CI / python-lint (push) Has been skipped
CI / nodejs-lint (push) Has been skipped
CI / test-go-ai-compliance (push) Successful in 35s
CI / test-python-backend-compliance (push) Successful in 31s
CI / test-python-document-crawler (push) Successful in 22s
CI / test-python-dsms-gateway (push) Successful in 18s
Compliance Advisor, Drafting Agent und Validator haben nicht geantwortet weil qwen3.5 standardmaessig im Thinking-Mode laeuft (interne Chain-of- Thought > 2min Timeout). Keiner der Agenten benoetigt Thinking-Mode — alle Aufgaben sind Chat/Textgenerierung/JSON-Validierung ohne tiefes Reasoning. think:false sorgt fuer direkte schnelle Antworten. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -178,9 +178,11 @@ Der Nutzer hat "${countryLabel} (${validCountry})" gewaehlt.
|
|||||||
model: LLM_MODEL,
|
model: LLM_MODEL,
|
||||||
messages,
|
messages,
|
||||||
stream: true,
|
stream: true,
|
||||||
|
think: false,
|
||||||
options: {
|
options: {
|
||||||
temperature: 0.3,
|
temperature: 0.3,
|
||||||
num_predict: 8192,
|
num_predict: 8192,
|
||||||
|
num_ctx: 8192,
|
||||||
},
|
},
|
||||||
}),
|
}),
|
||||||
signal: AbortSignal.timeout(120000),
|
signal: AbortSignal.timeout(120000),
|
||||||
|
|||||||
@@ -88,9 +88,11 @@ export async function POST(request: NextRequest) {
|
|||||||
model: LLM_MODEL,
|
model: LLM_MODEL,
|
||||||
messages,
|
messages,
|
||||||
stream: true,
|
stream: true,
|
||||||
|
think: false,
|
||||||
options: {
|
options: {
|
||||||
temperature: mode === 'draft' ? 0.2 : 0.3,
|
temperature: mode === 'draft' ? 0.2 : 0.3,
|
||||||
num_predict: mode === 'draft' ? 16384 : 8192,
|
num_predict: mode === 'draft' ? 16384 : 8192,
|
||||||
|
num_ctx: 8192,
|
||||||
},
|
},
|
||||||
}),
|
}),
|
||||||
signal: AbortSignal.timeout(120000),
|
signal: AbortSignal.timeout(120000),
|
||||||
|
|||||||
@@ -131,7 +131,8 @@ async function handleV1Draft(body: Record<string, unknown>): Promise<NextRespons
|
|||||||
model: LLM_MODEL,
|
model: LLM_MODEL,
|
||||||
messages,
|
messages,
|
||||||
stream: false,
|
stream: false,
|
||||||
options: { temperature: 0.15, num_predict: 16384 },
|
think: false,
|
||||||
|
options: { temperature: 0.15, num_predict: 16384, num_ctx: 8192 },
|
||||||
format: 'json',
|
format: 'json',
|
||||||
}),
|
}),
|
||||||
signal: AbortSignal.timeout(180000),
|
signal: AbortSignal.timeout(180000),
|
||||||
@@ -327,7 +328,8 @@ async function callOllama(systemPrompt: string, userPrompt: string): Promise<str
|
|||||||
{ role: 'user', content: userPrompt },
|
{ role: 'user', content: userPrompt },
|
||||||
],
|
],
|
||||||
stream: false,
|
stream: false,
|
||||||
options: { temperature: 0.15, num_predict: 4096 },
|
think: false,
|
||||||
|
options: { temperature: 0.15, num_predict: 4096, num_ctx: 8192 },
|
||||||
format: 'json',
|
format: 'json',
|
||||||
}),
|
}),
|
||||||
signal: AbortSignal.timeout(120000),
|
signal: AbortSignal.timeout(120000),
|
||||||
|
|||||||
@@ -187,7 +187,8 @@ export async function POST(request: NextRequest) {
|
|||||||
{ role: 'user', content: crossCheckPrompt },
|
{ role: 'user', content: crossCheckPrompt },
|
||||||
],
|
],
|
||||||
stream: false,
|
stream: false,
|
||||||
options: { temperature: 0.1, num_predict: 8192 },
|
think: false,
|
||||||
|
options: { temperature: 0.1, num_predict: 8192, num_ctx: 8192 },
|
||||||
format: 'json',
|
format: 'json',
|
||||||
}),
|
}),
|
||||||
signal: AbortSignal.timeout(120000),
|
signal: AbortSignal.timeout(120000),
|
||||||
|
|||||||
Reference in New Issue
Block a user