From f1b6246838a92b89b9a6549fc53d3906e8a39040 Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Tue, 3 Mar 2026 14:13:08 +0100 Subject: [PATCH] fix(llm-review): Diagnose-Logging + think=false + -Tag-Stripping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - think: false in Ollama API Request (qwen3 disables CoT nativ) - ... Stripping in _parse_llm_json_array (Fallback falls think:false nicht greift) - INFO-Logging: wie viele Einträge gesendet werden, Response-Länge, Anzahl geparster Einträge - DEBUG-Logging: erste 3 Eingabe-Einträge, ersten 500 Zeichen der Antwort - Bessere Fehlermeldung wenn JSON-Parsing fehlschlägt Co-Authored-By: Claude Sonnet 4.6 --- klausur-service/backend/cv_vocab_pipeline.py | 21 ++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/klausur-service/backend/cv_vocab_pipeline.py b/klausur-service/backend/cv_vocab_pipeline.py index 7c7a53e..90b1caa 100644 --- a/klausur-service/backend/cv_vocab_pipeline.py +++ b/klausur-service/backend/cv_vocab_pipeline.py @@ -5562,6 +5562,10 @@ async def llm_review_entries( for e in review_entries ] + logger.info("LLM review: sending %d/%d entries to %s (skipped %d without digit-pattern)", + len(review_entries), len(entries), model, len(entries) - len(reviewable)) + logger.debug("LLM review input: %s", _json.dumps(table_lines[:3], ensure_ascii=False)) + prompt = _build_llm_prompt(table_lines) t0 = time.time() @@ -5572,6 +5576,7 @@ async def llm_review_entries( "model": model, "messages": [{"role": "user", "content": prompt}], "stream": False, + "think": False, # qwen3: disable chain-of-thought (Ollama >=0.6) "options": {"temperature": 0.1, "num_predict": 8192}, }, ) @@ -5579,7 +5584,11 @@ async def llm_review_entries( content = resp.json().get("message", {}).get("content", "") duration_ms = int((time.time() - t0) * 1000) + logger.info("LLM review: response in %dms, raw length=%d chars", duration_ms, len(content)) + logger.debug("LLM review raw response (first 500): %.500s", content) + corrected = _parse_llm_json_array(content) + logger.info("LLM review: parsed %d corrected entries, applying diff...", len(corrected)) changes, corrected_entries = _diff_batch(review_entries, corrected) # Merge corrected entries back into the full list @@ -5696,15 +5705,19 @@ async def llm_review_entries_streaming( def _parse_llm_json_array(text: str) -> List[Dict]: - """Extract JSON array from LLM response (may contain markdown fences).""" + """Extract JSON array from LLM response (handles markdown fences and qwen3 think-tags).""" + # Strip qwen3 ... blocks (present even with think=False on some builds) + text = _re.sub(r'.*?', '', text, flags=_re.DOTALL) # Strip markdown code fences text = _re.sub(r'```json\s*', '', text) text = _re.sub(r'```\s*', '', text) - # Find array + # Find first [ ... last ] (non-greedy would miss nested structures, greedy is correct here) match = _re.search(r'\[.*\]', text, _re.DOTALL) if match: try: return _json.loads(match.group()) - except (ValueError, _json.JSONDecodeError): - pass + except (ValueError, _json.JSONDecodeError) as e: + logger.warning("LLM review: JSON parse failed: %s | raw snippet: %.200s", e, match.group()[:200]) + else: + logger.warning("LLM review: no JSON array found in response (%.200s)", text[:200]) return []