""" LLM verification for regex check results. When a regex check FAILs, the LLM re-checks the original text to confirm or overturn the finding. This eliminates false positives caused by regex limitations (unusual formatting, synonyms, etc.). Uses the self-hosted Ollama endpoint (Qwen) for fast local inference. """ import logging import os import httpx logger = logging.getLogger(__name__) OLLAMA_URL = os.getenv("OLLAMA_URL", "http://host.docker.internal:11434") # P13: qwen3:30b-a3b liefert zuverlaessige JSON-Antworten im Batch-Modus. # qwen3.5:35b-a3b lieferte mit format='json' + langem Prompt leere Strings. OLLAMA_MODEL = os.getenv("OLLAMA_VERIFY_MODEL", "qwen3:30b-a3b") TIMEOUT = 30.0 async def verify_failed_checks( text: str, failed_checks: list[dict], doc_title: str, ) -> dict[str, dict]: """Verify regex FAIL results using LLM — single batched call. Sends ALL failed checks in one LLM prompt instead of one call per check. Returns a dict mapping check_id -> {"overturned": bool, "evidence": str}. """ results: dict[str, dict] = {} checks_with_hints = [c for c in failed_checks if c.get("hint")] if not checks_with_hints: return results # Truncate text to fit context window text_excerpt = text[:8000] try: batch_results = await _ask_llm_batch( text_excerpt, checks_with_hints, doc_title, ) for check_id, answer in batch_results.items(): overturned = answer.get("found", False) results[check_id] = { "overturned": overturned, "evidence": answer.get("evidence", ""), } if overturned: logger.info( "LLM overturned regex FAIL for '%s' in '%s': %s", check_id, doc_title, answer.get("evidence", "")[:80], ) except Exception as e: logger.warning("LLM batch verify failed for '%s': %s", doc_title, e) return results async def _ask_llm_batch( text: str, checks: list[dict], doc_title: str, ) -> dict[str, dict]: """Ask the LLM to verify ALL failed checks in a single call. Uses /api/chat with format='json' so Ollama enforces a valid JSON response object — much more reliable than the previous /api/generate + free-text approach which qwen3 often wrapped in ... reasoning tokens. """ checklist_lines = [] for i, c in enumerate(checks, 1): checklist_lines.append( f'{i}. ID="{c["id"]}" | {c["label"]} | {c.get("hint", "")[:120]}' ) checklist_str = "\n".join(checklist_lines) system_msg = ( "Du pruefst ob ein Dokument bestimmte Pflichtangaben enthaelt. " "Antworte AUSSCHLIESSLICH mit einem JSON-Objekt: " '{"results": [{"id": "", "found": true|false, ' '"evidence": ""}]}. ' "Keine Erklaerungen, keine Reasoning-Tags, kein Markdown." ) user_msg = ( f'DOKUMENT: "{doc_title}"\n\n' f"ANFORDERUNGEN:\n{checklist_str}\n\n" f"TEXT:\n{text}" ) payload = { "model": OLLAMA_MODEL, "messages": [ {"role": "system", "content": system_msg}, {"role": "user", "content": user_msg}, ], "stream": False, "format": "json", # forces valid JSON output "options": {"temperature": 0.0, "num_predict": 3000}, } async with httpx.AsyncClient(timeout=120.0) as client: resp = await client.post(f"{OLLAMA_URL}/api/chat", json=payload) resp.raise_for_status() data = resp.json() raw = (data.get("message") or {}).get("content", "") return _parse_batch_response(raw, checks) def _parse_batch_response(raw: str, checks: list[dict]) -> dict[str, dict]: """Parse batch LLM response. Tolerates wrappers, code-fences, and either {results: [...]} or top-level [...].""" import json import re results: dict[str, dict] = {} if not raw: logger.info("LLM batch: empty response from model") return results text = raw.strip() # Strip qwen3 thinking tags text = re.sub(r".*?", "", text, flags=re.DOTALL).strip() # Strip markdown code fences m = re.search(r"```(?:json)?\s*(.+?)\s*```", text, re.DOTALL) if m: text = m.group(1).strip() # Try parse as-is parsed = None try: parsed = json.loads(text) except (json.JSONDecodeError, ValueError): # Try finding the first JSON object or array in the text for pattern in (r"\{.*\}", r"\[.*\]"): mm = re.search(pattern, text, re.DOTALL) if mm: try: parsed = json.loads(mm.group(0)) break except (json.JSONDecodeError, ValueError): continue if parsed is None: logger.info( "LLM batch: 0/%d checks parsed (raw head: %r)", len(checks), raw[:120], ) return results # Accept both {"results": [...]} (preferred) and bare list items = None if isinstance(parsed, dict): for key in ("results", "checks", "items", "verifications"): if isinstance(parsed.get(key), list): items = parsed[key] break elif isinstance(parsed, list): items = parsed if not items: # Final fallback: regex over individual id/found pairs for mm in re.finditer( r'\{[^}]*"id"\s*:\s*"([^"]+)"[^}]*"found"\s*:\s*(true|false)[^}]*\}', raw, re.DOTALL, ): results[mm.group(1)] = { "found": mm.group(2) == "true", "evidence": "", } logger.info("LLM batch: %d/%d checks parsed (regex fallback)", len(results), len(checks)) return results for item in items: if not isinstance(item, dict): continue cid = item.get("id", "") if not cid: continue results[cid] = { "found": bool(item.get("found", False)), "evidence": str(item.get("evidence", ""))[:150], } logger.info("LLM batch: %d/%d checks parsed", len(results), len(checks)) return results