fix(ocr-pipeline): increase LLM timeout to 300s and disable qwen3 thinking
- Add /no_think tag to prompt (qwen3 thinking mode causes massive slowdown) - Increase httpx timeout from 120s to 300s for large vocab tables - Improve error logging with traceback and exception type Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -4354,11 +4354,13 @@ Antworte NUR mit dem korrigierten JSON-Array. Kein erklaerener Text.
|
||||
Fuer jeden Eintrag den du aenderst, setze "corrected": true.
|
||||
Fuer unveraenderte Eintraege setze "corrected": false.
|
||||
|
||||
/no_think
|
||||
|
||||
Eingabe:
|
||||
{_json.dumps(table_lines, ensure_ascii=False, indent=2)}"""
|
||||
|
||||
t0 = time.time()
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
async with httpx.AsyncClient(timeout=300.0) as client:
|
||||
resp = await client.post(
|
||||
f"{_OLLAMA_URL}/api/chat",
|
||||
json={
|
||||
|
||||
@@ -1420,8 +1420,9 @@ async def run_llm_review(session_id: str, request: Request):
|
||||
try:
|
||||
result = await llm_review_entries(entries, model=model)
|
||||
except Exception as e:
|
||||
logger.error(f"LLM review failed for session {session_id}: {e}")
|
||||
raise HTTPException(status_code=502, detail=f"LLM review failed: {e}")
|
||||
import traceback
|
||||
logger.error(f"LLM review failed for session {session_id}: {type(e).__name__}: {e}\n{traceback.format_exc()}")
|
||||
raise HTTPException(status_code=502, detail=f"LLM review failed ({type(e).__name__}): {e}")
|
||||
|
||||
# Store result inside word_result as a sub-key
|
||||
word_result["llm_review"] = {
|
||||
|
||||
Reference in New Issue
Block a user