fix: handle Qwen think mode in classification, add German term matching

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-28 00:51:06 +02:00
parent 5ff65b3402
commit 0ccc6c4047

View File

@@ -121,18 +121,35 @@ async def _classify(client: httpx.AsyncClient, text: str) -> str:
resp = await client.post(f"{SDK_URL}/sdk/v1/llm/chat", headers=SDK_HEADERS, json={ resp = await client.post(f"{SDK_URL}/sdk/v1/llm/chat", headers=SDK_HEADERS, json={
"messages": [ "messages": [
{"role": "system", "content": ( {"role": "system", "content": (
"/no_think\n"
"Klassifiziere das Dokument in GENAU EINE Kategorie: " "Klassifiziere das Dokument in GENAU EINE Kategorie: "
"privacy_policy, cookie_banner, terms_of_service, imprint, dpa, other. " "privacy_policy, cookie_banner, terms_of_service, imprint, dpa, other. "
"Antworte NUR mit dem Kategorienamen, nichts anderes." "Antworte NUR mit dem Kategorienamen, nichts anderes. Kein Denken, keine Erklaerung."
)}, )},
{"role": "user", "content": text[:2000]}, {"role": "user", "content": text[:2000]},
], ],
}) })
data = resp.json() data = resp.json()
raw = data.get("response", data.get("content", "other")).strip().lower() # Qwen 3.5 may use think mode — content can be in message.content or response
raw = (
data.get("response", "")
or data.get("content", "")
or (data.get("message", {}) or {}).get("content", "")
or ""
).strip().lower()
# Strip Qwen think tags if present
raw = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip()
logger.info("Classification raw response: %s", raw[:200])
for cat in ["privacy_policy", "cookie_banner", "terms_of_service", "imprint", "dpa"]: for cat in ["privacy_policy", "cookie_banner", "terms_of_service", "imprint", "dpa"]:
if cat in raw: if cat in raw:
return cat return cat
# Also check German terms
if "datenschutz" in raw:
return "privacy_policy"
if "cookie" in raw:
return "cookie_banner"
if "impressum" in raw:
return "imprint"
return "other" return "other"
except Exception as e: except Exception as e:
logger.warning("Classification failed: %s", e) logger.warning("Classification failed: %s", e)