From 2188d6645e3fc8a81c43138f0435bd190590b079 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Wed, 22 Apr 2026 20:27:58 +0200 Subject: [PATCH] fix(llm-dedup): increase timeout to 120s, add /no_think, limit output to 200 tokens qwen3.5 uses extended thinking by default which causes 95s+ responses and 30s timeouts. Add /no_think to system prompt and num_predict=200 to keep responses short. Co-Authored-By: Claude Opus 4.6 (1M context) --- control-pipeline/api/control_generator_routes.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/control-pipeline/api/control_generator_routes.py b/control-pipeline/api/control_generator_routes.py index 9d67bfb..b91fb3f 100644 --- a/control-pipeline/api/control_generator_routes.py +++ b/control-pipeline/api/control_generator_routes.py @@ -1475,14 +1475,15 @@ async def _run_llm_dedup(req: LLMDedupRequest, job_id: str): prompt = f"Control A ({row.candidate_control_id}):\n{candidate_ctx}\n\nControl B ({row.matched_control_id}):\n{matched_ctx}\n\nSind diese Controls Duplikate?" - async with httpx.AsyncClient(timeout=30.0) as client: + async with httpx.AsyncClient(timeout=120.0) as client: resp = await client.post( f"{OLLAMA_URL}/api/chat", json={ "model": req.model, "stream": False, + "options": {"num_predict": 200}, "messages": [ - {"role": "system", "content": "Du bist ein Compliance-Experte. Vergleiche zwei Controls und entscheide: DUPLIKAT (gleiche Anforderung, nur anders formuliert) oder VERSCHIEDEN (unterschiedlicher Scope/Inhalt). Antworte NUR mit einem JSON: {\"verdict\": \"DUPLIKAT\" oder \"VERSCHIEDEN\", \"reason\": \"kurze Begruendung\"}"}, + {"role": "system", "content": "Du bist ein Compliance-Experte. Vergleiche zwei Controls und entscheide: DUPLIKAT (gleiche Anforderung, nur anders formuliert) oder VERSCHIEDEN (unterschiedlicher Scope/Inhalt). Antworte NUR mit einem JSON: {\"verdict\": \"DUPLIKAT\" oder \"VERSCHIEDEN\", \"reason\": \"kurze Begruendung\"} /no_think"}, {"role": "user", "content": prompt}, ], },