fix: Disable Qwen thinking mode for RAG checks (/no_think prefix)

Qwen 3.5 uses all tokens for thinking, leaving response empty. Using /no_think prefix to get direct JSON output. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-06 15:12:51 +02:00
parent 9f16e6d535
commit e50c4d659e
1 changed files with 2 additions and 2 deletions
@@ -197,9 +197,9 @@ async def _verify_control_with_llm(
        async with httpx.AsyncClient(timeout=120.0) as client:
            resp = await client.post(f"{OLLAMA_URL}/api/generate", json={
                "model": OLLAMA_MODEL,
-                "prompt": prompt,
+                "prompt": "/no_think\n" + prompt,  # Disable thinking mode
                "stream": False,
-                "options": {"num_predict": 200},  # Limit response length
+                "options": {"num_predict": 300},
            })

        if resp.status_code != 200: