fix: use Ollama directly for correction generation (bypass SDK think-mode)
SDK LLM chat returns empty content due to Qwen think-mode. Direct Ollama /api/generate call with stream:false gets the full response including think tags which we strip. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -233,30 +233,27 @@ async def _add_corrections(findings: list[ScanFinding], dse_text: str) -> None:
|
|||||||
if finding.severity in ("HIGH", "MEDIUM") and "MISSING" in finding.code:
|
if finding.severity in ("HIGH", "MEDIUM") and "MISSING" in finding.code:
|
||||||
service_name = finding.code.replace("DSE-MISSING-", "").replace("_", " ").title()
|
service_name = finding.code.replace("DSE-MISSING-", "").replace("_", " ").title()
|
||||||
try:
|
try:
|
||||||
async with httpx.AsyncClient(timeout=90.0) as client:
|
# Call Ollama directly (bypasses SDK RBAC + Think-mode issues)
|
||||||
resp = await client.post(f"{SDK_URL}/sdk/v1/llm/chat", headers=SDK_HEADERS, json={
|
ollama_url = os.environ.get("OLLAMA_URL", "http://host.docker.internal:11434")
|
||||||
"messages": [
|
ollama_model = os.environ.get("OLLAMA_MODEL", "qwen3.5:35b-a3b")
|
||||||
{"role": "system", "content": (
|
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||||
"/no_think\n"
|
resp = await client.post(f"{ollama_url}/api/generate", json={
|
||||||
"Du bist Datenschutzexperte. Erstelle einen einbaufertigen "
|
"model": ollama_model,
|
||||||
"Textbaustein fuer eine deutsche Datenschutzerklaerung fuer "
|
"prompt": (
|
||||||
f"den Dienst '{service_name}'. Enthalte: Ueberschrift, "
|
f"Erstelle einen einbaufertigen Textbaustein fuer eine deutsche "
|
||||||
"Anbietername, Zweck, Rechtsgrundlage nach DSGVO, "
|
f"Datenschutzerklaerung fuer den Dienst '{service_name}'. "
|
||||||
"Drittlandtransfer-Hinweis wenn noetig, "
|
f"Enthalte: Ueberschrift, Anbietername mit Sitz, Zweck der Verarbeitung, "
|
||||||
"Widerspruchsmoeglichkeit. Max 150 Woerter."
|
f"Rechtsgrundlage nach DSGVO, Drittlandtransfer-Hinweis wenn noetig, "
|
||||||
)},
|
f"Widerspruchsmoeglichkeit. Max 150 Woerter. "
|
||||||
{"role": "user", "content": f"Erstelle DSE-Textbaustein fuer: {service_name}"},
|
f"Antworte NUR mit dem fertigen Textbaustein."
|
||||||
],
|
),
|
||||||
|
"stream": False,
|
||||||
})
|
})
|
||||||
data = resp.json()
|
data = resp.json()
|
||||||
import re
|
import re
|
||||||
raw = (
|
raw = data.get("response", "").strip()
|
||||||
data.get("response", "")
|
|
||||||
or (data.get("message", {}) or {}).get("content", "")
|
|
||||||
or ""
|
|
||||||
).strip()
|
|
||||||
raw = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip()
|
raw = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip()
|
||||||
if raw:
|
if raw and len(raw) > 50:
|
||||||
finding.correction = raw
|
finding.correction = raw
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("Correction generation failed for %s: %s", service_name, e)
|
logger.warning("Correction generation failed for %s: %s", service_name, e)
|
||||||
|
|||||||
Reference in New Issue
Block a user