feat(rag): use Ollama for embeddings instead of embedding-service

Switch to Ollama's bge-m3 model (1024-dim) for generating embeddings, solving the dimension mismatch with Qdrant collections. Embedding-service still used for chunking, reranking, and PDF extraction. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-27 07:46:57 +01:00
parent d7cc6bfbc7
commit 92ca5b7ba5
2 changed files with 65 additions and 22 deletions
@@ -385,8 +385,12 @@ services:
      MINIO_BUCKET: ${MINIO_BUCKET:-breakpilot-rag}
      MINIO_SECURE: "false"
      EMBEDDING_SERVICE_URL: http://embedding-service:8087
+      OLLAMA_URL: ${OLLAMA_URL:-http://host.docker.internal:11434}
+      OLLAMA_EMBED_MODEL: ${OLLAMA_EMBED_MODEL:-bge-m3}
      JWT_SECRET: ${JWT_SECRET:-your-super-secret-jwt-key-change-in-production}
      ENVIRONMENT: ${ENVIRONMENT:-development}
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
    depends_on:
      qdrant:
        condition: service_healthy