From dc5d76ecf5ef698d6db61f324fb63dec49796597 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Tue, 3 Mar 2026 14:43:42 +0100 Subject: [PATCH] fix(llm-review): think=false und Logging in Streaming-Version fehlten MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Die UI nutzt llm_review_entries_streaming, nicht llm_review_entries. Die Streaming-Version hatte kein think:false → qwen3:0.6b verbrachte 9 Sekunden im Denkprozess ohne Token-Budget für die eigentliche Antwort. - think: false in Streaming-Version ergänzt - num_predict: 4096 → 8192 (konsistent mit nicht-streaming) - Logging für batch-Fortschritt, Response-Länge, geparste Einträge Co-Authored-By: Claude Sonnet 4.6 --- klausur-service/backend/cv_vocab_pipeline.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py index 6f65d67..0fd82f1 100644 --- a/klausur-service/backend/cv_vocab_pipeline.py +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -5657,6 +5657,9 @@ async def llm_review_entries_streaming( prompt = _build_llm_prompt(table_lines) + logger.info("LLM review streaming: batch %d — sending %d entries to %s", + batch_start // batch_size, len(batch_entries), model) + t0 = time.time() async with httpx.AsyncClient(timeout=300.0) as client: resp = await client.post( @@ -5665,7 +5668,8 @@ async def llm_review_entries_streaming( "model": model, "messages": [{"role": "user", "content": prompt}], "stream": False, - "options": {"temperature": 0.1, "num_predict": 4096}, + "think": False, # qwen3: disable chain-of-thought + "options": {"temperature": 0.1, "num_predict": 8192}, }, ) resp.raise_for_status() @@ -5673,7 +5677,11 @@ async def llm_review_entries_streaming( batch_ms = int((time.time() - t0) * 1000) total_duration_ms += batch_ms + logger.info("LLM review streaming: response %dms, length=%d chars", batch_ms, len(content)) + logger.debug("LLM review streaming raw (first 500): %.500s", content) + corrected = _parse_llm_json_array(content) + logger.info("LLM review streaming: parsed %d entries, applying diff...", len(corrected)) batch_changes, batch_corrected = _diff_batch(batch_entries, corrected) # Merge back