From 7fc5464df7baa5f766768eceb8c9cb27336ecacf Mon Sep 17 00:00:00 2001 From: Benjamin Admin Date: Fri, 24 Apr 2026 00:44:59 +0200 Subject: [PATCH] Switch Vision-LLM Fusion to llama3.2-vision:11b qwen2.5vl:32b needs ~100GB RAM and crashes Ollama. llama3.2-vision:11b is already installed and fits in memory. Co-Authored-By: Claude Opus 4.6 (1M context) --- klausur-service/backend/vision_ocr_fusion.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/klausur-service/backend/vision_ocr_fusion.py b/klausur-service/backend/vision_ocr_fusion.py index bfbd71e..c2ef216 100644 --- a/klausur-service/backend/vision_ocr_fusion.py +++ b/klausur-service/backend/vision_ocr_fusion.py @@ -22,7 +22,7 @@ import numpy as np logger = logging.getLogger(__name__) OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://host.docker.internal:11434") -OLLAMA_HTR_MODEL = os.getenv("OLLAMA_HTR_MODEL", "qwen2.5vl:32b") +VISION_FUSION_MODEL = os.getenv("VISION_FUSION_MODEL", "llama3.2-vision:11b") # Document category → prompt context CATEGORY_PROMPTS: Dict[str, Dict[str, str]] = { @@ -225,7 +225,7 @@ async def vision_fuse_ocr( resp = await client.post( f"{OLLAMA_BASE_URL}/api/generate", json={ - "model": OLLAMA_HTR_MODEL, + "model": VISION_FUSION_MODEL, "prompt": prompt, "images": [img_b64], "stream": False,