You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2020/07/07 18:03:38 UTC

[GitHub] [incubator-mxnet] szha commented on a change in pull request #18582: Refactoring of Pooled Storage Manager classes

szha commented on a change in pull request #18582:
URL: https://github.com/apache/incubator-mxnet/pull/18582#discussion_r451039888



##########
File path: src/storage/cpu_device_storage.h
##########
@@ -63,15 +60,12 @@ class CPUDeviceStorage {
 };  // class CPUDeviceStorage
 
 inline void CPUDeviceStorage::Alloc(Storage::Handle* handle) {
-  handle->dptr = nullptr;
-  const size_t size = handle->size;
-  if (size == 0) return;
-
+  // NOTE: handle->size is NOT 0. See calling method: StorageImpl::Alloc

Review comment:
       remove the comment as it doesn't help future reader of the code. if needed, document the invariant in interface definition instead. same for other occurrences.

##########
File path: src/storage/pooled_storage_manager.h
##########
@@ -25,372 +25,420 @@
 #ifndef MXNET_STORAGE_POOLED_STORAGE_MANAGER_H_
 #define MXNET_STORAGE_POOLED_STORAGE_MANAGER_H_
 
-#if MXNET_USE_CUDA
-  #include <cuda_runtime.h>
-#endif  // MXNET_USE_CUDA
-
-#include <mxnet/base.h>
-#include <mxnet/storage.h>
-#include <unordered_map>
-#include <algorithm>
+#include <string>
 #include <vector>
+#include <algorithm>
 #include <mutex>
-#include <new>
+#include <tuple>
 #include "./storage_manager.h"
-#include "../common/cuda_utils.h"
-#include "../common/utils.h"
 #include "../profiler/storage_profiler.h"
 
 
 namespace mxnet {
 namespace storage {
 
+typedef enum {
+  pool_type,
+  pool_page_size,
+  large_alloc_size,
+  round_linear_cutoff,
+  pool_reserve,
+} env_var_type;
+
+const std::string env_var_name(const char* dev_type, env_var_type type);
+
 #if MXNET_USE_CUDA
+#define SET_DEVICE(device_store, contextHelper, ctx, flag) \
+      const auto *device_store = flag? contextHelper.get()->SetCurrentDevice(ctx) : nullptr;
+#define UNSET_DEVICE(device_store)    delete device_store
+
+#define SET_GPU_PROFILER(prof, contextHelper)                          \
+      auto prof = contextHelper->contextGPU()?                         \
+                  profiler::GpuDeviceStorageProfiler::Get() : nullptr; \
+      if (!prof->IsProfiling()) prof = nullptr
+
+#define GPU_PROFILER_ON_FREE(prof, pntr)    if (prof) prof->OnFree(pntr)
+#else
+// empty macros when MxNet is compile without CUDA support
+#define SET_DEVICE(...)
+#define UNSET_DEVICE(...)
+#define SET_GPU_PROFILER(prof, ...)
+#define GPU_PROFILER_ON_FREE(prof, ...)
+#endif
+
 /*!
- * \brief Storage manager with a memory pool on gpu. Memory chunks are reused based on exact size
- * match.
+ * \brief Storage manager with a memory pool for GPU/CPU/CPUPunned memory chunks
+ * memory chunks which reused based on rounded size match.
+ * Rounding method is defined by the template parameter BucketingStrategy.
+ * Memory pool type is defined by the template parameter StoringMethod
+ * Allocation/freeing of memory is done by contextHelper_, which is the pointer
+ * to one of memory specific instance of the class, derived from ContextHelper
  */
-class GPUPooledStorageManager final : public StorageManager {
+template<typename BucketingStrategy, typename StoringMethod>
+class PooledStorageManager : public StorageManager,
+             public BucketingStrategy, public StoringMethod {
  public:
-  /*!
-   * \brief Default constructor.
-   *
-   * \param initial_context context used by this Storage Manager
-   */
-  explicit GPUPooledStorageManager(Context initial_context) :
-    initial_context_(initial_context) {
-    reserve_ = dmlc::GetEnv("MXNET_GPU_MEM_POOL_RESERVE", 5);
-    page_size_ = dmlc::GetEnv("MXNET_GPU_MEM_POOL_PAGE_SIZE", 4096);
-    large_alloc_round_size_ = dmlc::GetEnv("MXNET_GPU_MEM_LARGE_ALLOC_ROUND_SIZE", 2 * 1024 * 1024);
-    if (large_alloc_round_size_ <= 0) {
-      LOG(FATAL) << "MXNET_GPU_MEM_LARGE_ALLOC_ROUND_SIZE cannot be set to a value <= 0, found: "
-                 << large_alloc_round_size_;
+  explicit PooledStorageManager(const Context &ctx, int num_gpu_device) {
+    const char *dev_type = nullptr;
+    switch (dev_type_ = ctx.dev_type) {
+#if MXNET_USE_CUDA
+      case Context::kGPU:       contextHelper_ = std::make_unique<ContextHelperGPU>();
+                                dev_type = "GPU";
+                                break;
+      case Context::kCPUPinned: dev_type = "CPU_PINNED";
+                                if (num_gpu_device > 1) {
+                                  contextHelper_ = std::make_unique<ContextHelperPinned>();
+                                  dev_type_ = Context::kGPU;
+                                  break;
+                                }
+#else
+      case Context::kCPUPinned: dev_type = "CPU_PINNED";
+#endif
+                                dev_type_ = Context::kCPU;
+      case Context::kCPU:       contextHelper_ = std::make_unique<ContextHelperCPU>();
+                                dev_type = "CPU";
+      default:                  break;
     }
-    if (page_size_ < NDEV) {
-      LOG(FATAL) << "MXNET_GPU_MEM_POOL_PAGE_SIZE cannot be set to a value smaller than " << NDEV \
-                 << ". Got " << page_size_ << ".";
+
+    BucketingStrategy::InitRoundHelper(dev_type);
+    StoringMethod::InitContainer(this);
+    contextHelper_->set_initilal_context(ctx);
+
+    // percentage of reserved memory
+    if (dev_type) {
+      const auto env_var = env_var_name(dev_type, pool_reserve);
+      const size_t reserve = dmlc::GetEnv(env_var.c_str(), 5);
+      const size_t total = std::get<1>(contextHelper_->getMemoryInfo());
+      memory_allocation_limit_ = total * reserve / 100;
     }
   }
   /*!
    * \brief Default destructor.
    */
-  ~GPUPooledStorageManager() {
+  ~PooledStorageManager() override {
     ReleaseAll();
   }
 
   void Alloc(Storage::Handle* handle) override;
-  void Free(Storage::Handle handle) override;
+  void Free(Storage::Handle handle) override {
+    // Insert returned memory in cache
+    // NOTE: handle.dptr is NOT nullptr. See calling method: StorageImpl::Free

Review comment:
       remove comment since it doesn't refer to existing code. if needed, document in the interface definition instead.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org