fix: Disable Qwen thinking mode for RAG checks (/no_think prefix)

Qwen 3.5 uses all tokens for thinking, leaving response empty.
Using /no_think prefix to get direct JSON output.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-05-06 15:12:51 +02:00
parent 9f16e6d535
commit e50c4d659e
@@ -197,9 +197,9 @@ async def _verify_control_with_llm(
async with httpx.AsyncClient(timeout=120.0) as client: async with httpx.AsyncClient(timeout=120.0) as client:
resp = await client.post(f"{OLLAMA_URL}/api/generate", json={ resp = await client.post(f"{OLLAMA_URL}/api/generate", json={
"model": OLLAMA_MODEL, "model": OLLAMA_MODEL,
"prompt": prompt, "prompt": "/no_think\n" + prompt, # Disable thinking mode
"stream": False, "stream": False,
"options": {"num_predict": 200}, # Limit response length "options": {"num_predict": 300},
}) })
if resp.status_code != 200: if resp.status_code != 200: