From e50c4d659ed071681085292692257340e7f5b06e Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Wed, 6 May 2026 15:12:51 +0200 Subject: [PATCH] fix: Disable Qwen thinking mode for RAG checks (/no_think prefix) Qwen 3.5 uses all tokens for thinking, leaving response empty. Using /no_think prefix to get direct JSON output. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../compliance/services/rag_document_checker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend-compliance/compliance/services/rag_document_checker.py b/backend-compliance/compliance/services/rag_document_checker.py index db3a547..cd88843 100644 --- a/backend-compliance/compliance/services/rag_document_checker.py +++ b/backend-compliance/compliance/services/rag_document_checker.py @@ -197,9 +197,9 @@ async def _verify_control_with_llm( async with httpx.AsyncClient(timeout=120.0) as client: resp = await client.post(f"{OLLAMA_URL}/api/generate", json={ "model": OLLAMA_MODEL, - "prompt": prompt, + "prompt": "/no_think\n" + prompt, # Disable thinking mode "stream": False, - "options": {"num_predict": 200}, # Limit response length + "options": {"num_predict": 300}, }) if resp.status_code != 200: