From e50c4d659ed071681085292692257340e7f5b06e Mon Sep 17 00:00:00 2001
From: Benjamin Admin <benjaminadmin@MacBook-Pro.local>
Date: Wed, 6 May 2026 15:12:51 +0200
Subject: [PATCH] fix: Disable Qwen thinking mode for RAG checks (/no_think
 prefix)

Qwen 3.5 uses all tokens for thinking, leaving response empty.
Using /no_think prefix to get direct JSON output.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../compliance/services/rag_document_checker.py               | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/backend-compliance/compliance/services/rag_document_checker.py b/backend-compliance/compliance/services/rag_document_checker.py
index db3a547..cd88843 100644
--- a/backend-compliance/compliance/services/rag_document_checker.py
+++ b/backend-compliance/compliance/services/rag_document_checker.py
@@ -197,9 +197,9 @@ async def _verify_control_with_llm(
         async with httpx.AsyncClient(timeout=120.0) as client:
             resp = await client.post(f"{OLLAMA_URL}/api/generate", json={
                 "model": OLLAMA_MODEL,
-                "prompt": prompt,
+                "prompt": "/no_think\n" + prompt,  # Disable thinking mode
                 "stream": False,
-                "options": {"num_predict": 200},  # Limit response length
+                "options": {"num_predict": 300},
             })
 
         if resp.status_code != 200: