diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py index 6f65d67..0fd82f1 100644 --- a/klausur-service/backend/cv_vocab_pipeline.py +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -5657,6 +5657,9 @@ async def llm_review_entries_streaming( prompt = _build_llm_prompt(table_lines) + logger.info("LLM review streaming: batch %d — sending %d entries to %s", + batch_start // batch_size, len(batch_entries), model) + t0 = time.time() async with httpx.AsyncClient(timeout=300.0) as client: resp = await client.post( @@ -5665,7 +5668,8 @@ async def llm_review_entries_streaming( "model": model, "messages": [{"role": "user", "content": prompt}], "stream": False, - "options": {"temperature": 0.1, "num_predict": 4096}, + "think": False, # qwen3: disable chain-of-thought + "options": {"temperature": 0.1, "num_predict": 8192}, }, ) resp.raise_for_status() @@ -5673,7 +5677,11 @@ async def llm_review_entries_streaming( batch_ms = int((time.time() - t0) * 1000) total_duration_ms += batch_ms + logger.info("LLM review streaming: response %dms, length=%d chars", batch_ms, len(content)) + logger.debug("LLM review streaming raw (first 500): %.500s", content) + corrected = _parse_llm_json_array(content) + logger.info("LLM review streaming: parsed %d entries, applying diff...", len(corrected)) batch_changes, batch_corrected = _diff_batch(batch_entries, corrected) # Merge back