You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/06/01 19:42:05 UTC
[GitHub] piiswrong commented on a change in pull request #11041: gpu mem pool strategy

piiswrong commented on a change in pull request #11041: gpu mem pool strategy
URL: https://github.com/apache/incubator-mxnet/pull/11041#discussion_r192496824
 
 

 ##########
 File path: src/storage/pooled_storage_manager.h
 ##########
 @@ -129,13 +136,224 @@ void GPUPooledStorageManager::ReleaseAll() {
     for (auto&& j : i.second) {
       Storage::Handle handle;
       handle.dptr = j;
-      handle.size = i.first - NDEV;
+      handle.size = i.first;
       DirectFreeNoLock(handle);
     }
   }
   memory_pool_.clear();
 }
 
+/*!
+ * \brief Storage manager with a memory pool, with rounded size, on gpu.
+ *
+ * This GPU mem pool uses a mixture of nearest pow2 (exponential) rounding and
+ * nearest multiple (linear) rounding to help alleviate the memory allocation stress
+ * in which the default naive exact-size-match pool falls short, such as in variable-length
+ * input/output cases like RNN workloads.
+ *
+ * \param cutoff the cutoff at which rounding is switched from exponential to linear. It's set
+ * through MXNET_GPU_MEM_POOL_ROUND_LINEAR_CUTOFF environment variable. Must be between 20 (1 MB)
+ * and 34 (16 GB).
+ * Suppose the cutoff is X, the memory size buckets look like this:
+ * exp2(0), exp2(1), ..., exp2(X), 2*exp2(X), 3*exp2(X), ...
+ */
+class GPUPooledRoundedStorageManager final : public StorageManager {
+ public:
+  /*!
+   * \brief Default constructor.
+   */
+  GPUPooledRoundedStorageManager() {
+    reserve_ = dmlc::GetEnv("MXNET_GPU_MEM_POOL_RESERVE", 5);
+    min_chunk_ = dmlc::GetEnv("MXNET_GPU_MEM_POOL_MIN_CHUNK", 4096);
+    cut_off_ = dmlc::GetEnv("MXNET_GPU_MEM_POOL_ROUND_LINEAR_CUTOFF", 24);
+    if (min_chunk_ < 32) {
+      LOG(FATAL) << "MXNET_GPU_MEM_POOL_MIN_CHUNK cannot be set to a value smaller than 32. " \
+                 << "Got: " << min_chunk_ << ".";
+    }
+    if (min_chunk_ != 1ul << log2_round_up(min_chunk_)) {
+      LOG(FATAL) << "MXNET_GPU_MEM_POOL_MIN_CHUNK must be a power of 2. Got: " << min_chunk_ << ".";
+    } else {
+      min_chunk_ = log2_round_up(min_chunk_);
+    }
+    if (cut_off_ < 20 || cut_off_ > LOG2_MAX_MEM) {
+      LOG(FATAL) << "MXNET_GPU_MEM_POOL_ROUND_LINEAR_CUTOFF cannot be set to a value " \
+                 << "smaller than 20 or greater than " << LOG2_MAX_MEM << ". Got: " \
+                 << cut_off_ << ".";
+    }
+    if (cut_off_ < min_chunk_) {
+      LOG(FATAL) << "MXNET_GPU_MEM_POOL_ROUND_LINEAR_CUTOFF cannot be set to a value " \
+                 << "smaller than log2 of MXNET_GPU_MEM_POOL_MIN_CHUNK. Got: " \
+                 << cut_off_ << " vs " << min_chunk_ << ".";
+    }
+    memory_pool_ = std::vector<std::vector<void*>>((1ul << (LOG2_MAX_MEM - cut_off_)) + cut_off_);
+  }
+  /*!
+   * \brief Default destructor.
+   */
+  ~GPUPooledRoundedStorageManager() {
+    ReleaseAll();
+  }
+
+  void Alloc(Storage::Handle* handle) override;
+  void Free(Storage::Handle handle) override;
+
+  void DirectFree(Storage::Handle handle) override {
+    handle.size = get_size(get_bucket(handle.size));
+    std::lock_guard<std::mutex> lock(Storage::Get()->GetMutex(Context::kGPU));
+    DirectFreeNoLock(handle);
+  }
+
+ private:
+#if __SIZEOF_SIZE_T__ == __SIZEOF_LONG__
+
+#if defined(__clang__) || defined(__GNUC__)
 
 Review comment:
   Is it really faster? It looks too complicated.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services