fix: Disable Qwen thinking mode for RAG checks (/no_think prefix)
Qwen 3.5 uses all tokens for thinking, leaving response empty. Using /no_think prefix to get direct JSON output. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -197,9 +197,9 @@ async def _verify_control_with_llm(
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
resp = await client.post(f"{OLLAMA_URL}/api/generate", json={
|
||||
"model": OLLAMA_MODEL,
|
||||
"prompt": prompt,
|
||||
"prompt": "/no_think\n" + prompt, # Disable thinking mode
|
||||
"stream": False,
|
||||
"options": {"num_predict": 200}, # Limit response length
|
||||
"options": {"num_predict": 300},
|
||||
})
|
||||
|
||||
if resp.status_code != 200:
|
||||
|
||||
Reference in New Issue
Block a user