You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by zh...@apache.org on 2020/09/07 02:40:28 UTC
[incubator-mxnet] branch master updated: update runtime setting
default values (#18987)
This is an automated email from the ASF dual-hosted git repository.
zhasheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 04e394a update runtime setting default values (#18987)
04e394a is described below
commit 04e394aa8f9f95439f8009e0ce13419b767ccc8a
Author: Sheng Zha <sz...@users.noreply.github.com>
AuthorDate: Sun Sep 6 19:39:20 2020 -0700
update runtime setting default values (#18987)
---
src/resource.cc | 4 ++--
src/storage/pooled_storage_manager.h | 15 ++++++++++-----
src/storage/storage.cc | 3 ++-
3 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/src/resource.cc b/src/resource.cc
index f4f9da2..ff5a9fb 100644
--- a/src/resource.cc
+++ b/src/resource.cc
@@ -96,9 +96,9 @@ class ResourceManagerImpl : public ResourceManager {
cpu_temp_space_copy_ = dmlc::GetEnv("MXNET_CPU_TEMP_COPY", 4);
gpu_temp_space_copy_ = dmlc::GetEnv("MXNET_GPU_TEMP_COPY", 1);
cpu_native_rand_copy_ = dmlc::GetEnv("MXNET_CPU_PARALLEL_RAND_COPY", 1);
- gpu_native_rand_copy_ = dmlc::GetEnv("MXNET_GPU_PARALLEL_RAND_COPY", 4);
+ gpu_native_rand_copy_ = dmlc::GetEnv("MXNET_GPU_PARALLEL_RAND_COPY", 1);
#if MXNET_USE_CUDNN == 1
- gpu_cudnn_dropout_state_copy_ = dmlc::GetEnv("MXNET_GPU_CUDNN_DROPOUT_STATE_COPY", 4);
+ gpu_cudnn_dropout_state_copy_ = dmlc::GetEnv("MXNET_GPU_CUDNN_DROPOUT_STATE_COPY", 1);
#endif // MXNET_USE_CUDNN == 1
engine_ref_ = Engine::_GetSharedRef();
storage_ref_ = Storage::_GetSharedRef();
diff --git a/src/storage/pooled_storage_manager.h b/src/storage/pooled_storage_manager.h
index 897725a..194a1a2 100644
--- a/src/storage/pooled_storage_manager.h
+++ b/src/storage/pooled_storage_manager.h
@@ -178,14 +178,19 @@ void PooledStorageManager<BucketingStrategy, StoringMethod>::Alloc(Storage::Hand
void *ret = nullptr;
auto e = contextHelper_->Malloc(&ret, roundSize);
if (e) {
- const std::string err(
+ // retry in case of fragmentation
+ ReleaseAllNoLock(false);
+ e = contextHelper_->Malloc(&ret, roundSize);
+ if (e) {
+ const std::string err(
#if MXNET_USE_CUDA
- dev_type_ == Context::kGPU?
- cudaGetErrorString(static_cast<cudaError_t>(e)) :
+ dev_type_ == Context::kGPU?
+ cudaGetErrorString(static_cast<cudaError_t>(e)) :
#endif
- std::strerror(errno));
+ std::strerror(errno));
- LOG(FATAL) << "Memory allocation failed " << err;
+ LOG(FATAL) << "Memory allocation failed " << err;
+ }
}
UNSET_DEVICE(device_store);
diff --git a/src/storage/storage.cc b/src/storage/storage.cc
index f359b30..d83860c 100644
--- a/src/storage/storage.cc
+++ b/src/storage/storage.cc
@@ -67,8 +67,9 @@ StorageManager *CreateStorageManager(const Context &ctx, const char *context,
int num_gpu_device, std::string *pStrategy) {
const auto env_var = env_var_name(context, pool_type);
const char *type = getenv(env_var.c_str());
- if (type == nullptr)
+ if (type == nullptr) {
type = "Naive"; // default pool
+ }
*pStrategy = type;
StorageManager *ptr = nullptr;