Switch embedding model to bge-m3 (1024-dim)

The Qdrant collections use 1024-dim vectors (bge-m3) but the
embedding-service was configured with all-MiniLM-L6-v2 (384-dim).
Also increase memory limit to 8G for the larger model.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-02-26 23:29:23 +01:00
parent 13ba1457b0
commit d7cc6bfbc7

View File

@@ -414,7 +414,7 @@ services:
- embedding_models:/root/.cache/huggingface - embedding_models:/root/.cache/huggingface
environment: environment:
EMBEDDING_BACKEND: ${EMBEDDING_BACKEND:-local} EMBEDDING_BACKEND: ${EMBEDDING_BACKEND:-local}
LOCAL_EMBEDDING_MODEL: ${LOCAL_EMBEDDING_MODEL:-sentence-transformers/all-MiniLM-L6-v2} LOCAL_EMBEDDING_MODEL: ${LOCAL_EMBEDDING_MODEL:-BAAI/bge-m3}
LOCAL_RERANKER_MODEL: ${LOCAL_RERANKER_MODEL:-cross-encoder/ms-marco-MiniLM-L-6-v2} LOCAL_RERANKER_MODEL: ${LOCAL_RERANKER_MODEL:-cross-encoder/ms-marco-MiniLM-L-6-v2}
PDF_EXTRACTION_BACKEND: ${PDF_EXTRACTION_BACKEND:-pymupdf} PDF_EXTRACTION_BACKEND: ${PDF_EXTRACTION_BACKEND:-pymupdf}
OPENAI_API_KEY: ${OPENAI_API_KEY:-} OPENAI_API_KEY: ${OPENAI_API_KEY:-}
@@ -423,7 +423,7 @@ services:
deploy: deploy:
resources: resources:
limits: limits:
memory: 4G memory: 8G
healthcheck: healthcheck:
test: ["CMD", "python", "-c", "import httpx; r=httpx.get('http://127.0.0.1:8087/health'); r.raise_for_status()"] test: ["CMD", "python", "-c", "import httpx; r=httpx.get('http://127.0.0.1:8087/health'); r.raise_for_status()"]
interval: 30s interval: 30s