fix: Disable Qwen thinking mode for RAG checks (/no_think prefix)
Qwen 3.5 uses all tokens for thinking, leaving response empty. Using /no_think prefix to get direct JSON output. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -197,9 +197,9 @@ async def _verify_control_with_llm(
|
|||||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||||
resp = await client.post(f"{OLLAMA_URL}/api/generate", json={
|
resp = await client.post(f"{OLLAMA_URL}/api/generate", json={
|
||||||
"model": OLLAMA_MODEL,
|
"model": OLLAMA_MODEL,
|
||||||
"prompt": prompt,
|
"prompt": "/no_think\n" + prompt, # Disable thinking mode
|
||||||
"stream": False,
|
"stream": False,
|
||||||
"options": {"num_predict": 200}, # Limit response length
|
"options": {"num_predict": 300},
|
||||||
})
|
})
|
||||||
|
|
||||||
if resp.status_code != 200:
|
if resp.status_code != 200:
|
||||||
|
|||||||
Reference in New Issue
Block a user