fix: handle Qwen think mode in classification, add German term matching
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -121,18 +121,35 @@ async def _classify(client: httpx.AsyncClient, text: str) -> str:
|
|||||||
resp = await client.post(f"{SDK_URL}/sdk/v1/llm/chat", headers=SDK_HEADERS, json={
|
resp = await client.post(f"{SDK_URL}/sdk/v1/llm/chat", headers=SDK_HEADERS, json={
|
||||||
"messages": [
|
"messages": [
|
||||||
{"role": "system", "content": (
|
{"role": "system", "content": (
|
||||||
|
"/no_think\n"
|
||||||
"Klassifiziere das Dokument in GENAU EINE Kategorie: "
|
"Klassifiziere das Dokument in GENAU EINE Kategorie: "
|
||||||
"privacy_policy, cookie_banner, terms_of_service, imprint, dpa, other. "
|
"privacy_policy, cookie_banner, terms_of_service, imprint, dpa, other. "
|
||||||
"Antworte NUR mit dem Kategorienamen, nichts anderes."
|
"Antworte NUR mit dem Kategorienamen, nichts anderes. Kein Denken, keine Erklaerung."
|
||||||
)},
|
)},
|
||||||
{"role": "user", "content": text[:2000]},
|
{"role": "user", "content": text[:2000]},
|
||||||
],
|
],
|
||||||
})
|
})
|
||||||
data = resp.json()
|
data = resp.json()
|
||||||
raw = data.get("response", data.get("content", "other")).strip().lower()
|
# Qwen 3.5 may use think mode — content can be in message.content or response
|
||||||
|
raw = (
|
||||||
|
data.get("response", "")
|
||||||
|
or data.get("content", "")
|
||||||
|
or (data.get("message", {}) or {}).get("content", "")
|
||||||
|
or ""
|
||||||
|
).strip().lower()
|
||||||
|
# Strip Qwen think tags if present
|
||||||
|
raw = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip()
|
||||||
|
logger.info("Classification raw response: %s", raw[:200])
|
||||||
for cat in ["privacy_policy", "cookie_banner", "terms_of_service", "imprint", "dpa"]:
|
for cat in ["privacy_policy", "cookie_banner", "terms_of_service", "imprint", "dpa"]:
|
||||||
if cat in raw:
|
if cat in raw:
|
||||||
return cat
|
return cat
|
||||||
|
# Also check German terms
|
||||||
|
if "datenschutz" in raw:
|
||||||
|
return "privacy_policy"
|
||||||
|
if "cookie" in raw:
|
||||||
|
return "cookie_banner"
|
||||||
|
if "impressum" in raw:
|
||||||
|
return "imprint"
|
||||||
return "other"
|
return "other"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("Classification failed: %s", e)
|
logger.warning("Classification failed: %s", e)
|
||||||
|
|||||||
Reference in New Issue
Block a user