From 0ccc6c4047015543f2c9f4a3b2e581b7002d1d8c Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Tue, 28 Apr 2026 00:51:06 +0200 Subject: [PATCH] fix: handle Qwen think mode in classification, add German term matching Co-Authored-By: Claude Opus 4.6 (1M context) --- .../compliance/api/agent_analyze_routes.py | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/backend-compliance/compliance/api/agent_analyze_routes.py b/backend-compliance/compliance/api/agent_analyze_routes.py index 246d27d..b4cb5da 100644 --- a/backend-compliance/compliance/api/agent_analyze_routes.py +++ b/backend-compliance/compliance/api/agent_analyze_routes.py @@ -121,18 +121,35 @@ async def _classify(client: httpx.AsyncClient, text: str) -> str: resp = await client.post(f"{SDK_URL}/sdk/v1/llm/chat", headers=SDK_HEADERS, json={ "messages": [ {"role": "system", "content": ( + "/no_think\n" "Klassifiziere das Dokument in GENAU EINE Kategorie: " "privacy_policy, cookie_banner, terms_of_service, imprint, dpa, other. " - "Antworte NUR mit dem Kategorienamen, nichts anderes." + "Antworte NUR mit dem Kategorienamen, nichts anderes. Kein Denken, keine Erklaerung." )}, {"role": "user", "content": text[:2000]}, ], }) data = resp.json() - raw = data.get("response", data.get("content", "other")).strip().lower() + # Qwen 3.5 may use think mode — content can be in message.content or response + raw = ( + data.get("response", "") + or data.get("content", "") + or (data.get("message", {}) or {}).get("content", "") + or "" + ).strip().lower() + # Strip Qwen think tags if present + raw = re.sub(r".*?", "", raw, flags=re.DOTALL).strip() + logger.info("Classification raw response: %s", raw[:200]) for cat in ["privacy_policy", "cookie_banner", "terms_of_service", "imprint", "dpa"]: if cat in raw: return cat + # Also check German terms + if "datenschutz" in raw: + return "privacy_policy" + if "cookie" in raw: + return "cookie_banner" + if "impressum" in raw: + return "imprint" return "other" except Exception as e: logger.warning("Classification failed: %s", e)