You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2020/05/10 07:58:35 UTC
[arrow] branch master updated: ARROW-8577: [Plasma][CUDA] Make CUDA initialization lazy

This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new e215e89  ARROW-8577: [Plasma][CUDA] Make CUDA initialization lazy
e215e89 is described below

commit e215e89ba920cf19564caf24acbfe5022b20d0f0
Author: Sutou Kouhei <ko...@clear-code.com>
AuthorDate: Sun May 10 16:57:55 2020 +0900

    ARROW-8577: [Plasma][CUDA] Make CUDA initialization lazy
    
    If we make CUDA initialization lazy, we can use CUDA enabled binary on
    no CUDA host.
    
    Closes #7138 from kou/cpp-plasma-cuda-init-delay
    
    Authored-by: Sutou Kouhei <ko...@clear-code.com>
    Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
 cpp/src/plasma/client.cc                        | 31 ++++++++++++-------------
 cpp/src/plasma/plasma.pc.in                     |  1 +
 cpp/src/plasma/store.cc                         | 16 ++++++-------
 cpp/src/plasma/store.h                          |  4 +---
 dev/tasks/linux-packages/github.linux.amd64.yml |  1 +
 dev/tasks/linux-packages/travis.linux.arm64.yml |  1 +
 6 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/cpp/src/plasma/client.cc b/cpp/src/plasma/client.cc
index 127ee81..1c2ec68 100644
--- a/cpp/src/plasma/client.cc
+++ b/cpp/src/plasma/client.cc
@@ -313,6 +313,10 @@ class PlasmaClient::Impl : public std::enable_shared_from_this<PlasmaClient::Imp
   uint64_t ComputeObjectHashCPU(const uint8_t* data, int64_t data_size,
                                 const uint8_t* metadata, int64_t metadata_size);
 
+#ifdef PLASMA_CUDA
+  arrow::Result<std::shared_ptr<CudaContext>> GetCudaContext(int device_number);
+#endif
+
   /// File descriptor of the Unix domain socket that connects to the store.
   int store_conn_;
   /// Table of dlmalloc buffer files that have been memory mapped so far. This
@@ -332,22 +336,11 @@ class PlasmaClient::Impl : public std::enable_shared_from_this<PlasmaClient::Imp
   std::deque<std::tuple<ObjectID, int64_t, int64_t>> pending_notification_;
   /// A mutex which protects this class.
   std::recursive_mutex client_mutex_;
-
-#ifdef PLASMA_CUDA
-  /// Cuda Device Manager.
-  arrow::cuda::CudaDeviceManager* manager_;
-#endif
 };
 
 PlasmaBuffer::~PlasmaBuffer() { ARROW_UNUSED(client_->Release(object_id_)); }
 
-PlasmaClient::Impl::Impl() : store_conn_(0), store_capacity_(0) {
-#ifdef PLASMA_CUDA
-  auto maybe_manager = CudaDeviceManager::Instance();
-  DCHECK_OK(maybe_manager.status());
-  manager_ = *maybe_manager;
-#endif
-}
+PlasmaClient::Impl::Impl() : store_conn_(0), store_capacity_(0) {}
 
 PlasmaClient::Impl::~Impl() {}
 
@@ -416,6 +409,14 @@ void PlasmaClient::Impl::IncrementObjectCount(const ObjectID& object_id,
   object_entry->count += 1;
 }
 
+#ifdef PLASMA_CUDA
+arrow::Result<std::shared_ptr<CudaContext>> PlasmaClient::Impl::GetCudaContext(
+    int device_number) {
+  ARROW_ASSIGN_OR_RAISE(auto manager, CudaDeviceManager::Instance());
+  return manager->GetContext(device_number - 1);
+}
+#endif
+
 Status PlasmaClient::Impl::Create(const ObjectID& object_id, int64_t data_size,
                                   const uint8_t* metadata, int64_t metadata_size,
                                   std::shared_ptr<Buffer>* data, int device_num,
@@ -454,8 +455,7 @@ Status PlasmaClient::Impl::Create(const ObjectID& object_id, int64_t data_size,
     }
   } else {
 #ifdef PLASMA_CUDA
-    std::shared_ptr<CudaContext> context;
-    ARROW_ASSIGN_OR_RAISE(context, manager_->GetContext(device_num - 1));
+    ARROW_ASSIGN_OR_RAISE(auto context, GetCudaContext(device_num));
     GpuProcessHandle* handle = new GpuProcessHandle();
     handle->client_count = 2;
     ARROW_ASSIGN_OR_RAISE(handle->ptr, context->OpenIpcBuffer(*object.ipc_handle));
@@ -639,8 +639,7 @@ Status PlasmaClient::Impl::GetBuffers(
         std::lock_guard<std::mutex> lock(gpu_mutex);
         auto iter = gpu_object_map.find(object_ids[i]);
         if (iter == gpu_object_map.end()) {
-          std::shared_ptr<CudaContext> context;
-          ARROW_ASSIGN_OR_RAISE(context, manager_->GetContext(object->device_num - 1));
+          ARROW_ASSIGN_OR_RAISE(auto context, GetCudaContext(object->device_num));
           GpuProcessHandle* obj_handle = new GpuProcessHandle();
           obj_handle->client_count = 1;
           ARROW_ASSIGN_OR_RAISE(obj_handle->ptr,
diff --git a/cpp/src/plasma/plasma.pc.in b/cpp/src/plasma/plasma.pc.in
index 36a7c82..17af015 100644
--- a/cpp/src/plasma/plasma.pc.in
+++ b/cpp/src/plasma/plasma.pc.in
@@ -28,5 +28,6 @@ executable=${plasma_store_server}
 Name: Plasma
 Description: Plasma is an in-memory object store and cache for big data.
 Version: @PLASMA_VERSION@
+Requires: arrow
 Libs: -L${libdir} -lplasma
 Cflags: -I${includedir}
diff --git a/cpp/src/plasma/store.cc b/cpp/src/plasma/store.cc
index 9a42a59..b12e842 100644
--- a/cpp/src/plasma/store.cc
+++ b/cpp/src/plasma/store.cc
@@ -123,11 +123,6 @@ PlasmaStore::PlasmaStore(EventLoop* loop, std::string directory, bool hugepages_
       external_store_(external_store) {
   store_info_.directory = directory;
   store_info_.hugepages_enabled = hugepages_enabled;
-#ifdef PLASMA_CUDA
-  auto maybe_manager = CudaDeviceManager::Instance();
-  DCHECK_OK(maybe_manager.status());
-  manager_ = *maybe_manager;
-#endif
 }
 
 // TODO(pcm): Get rid of this destructor by using RAII to clean up data.
@@ -207,11 +202,16 @@ uint8_t* PlasmaStore::AllocateMemory(size_t size, bool evict_if_full, int* fd,
 }
 
 #ifdef PLASMA_CUDA
+arrow::Result<std::shared_ptr<CudaContext>> PlasmaStore::GetCudaContext(int device_num) {
+  DCHECK_NE(device_num, 0);
+  ARROW_ASSIGN_OR_RAISE(auto manager, CudaDeviceManager::Instance());
+  return manager->GetContext(device_num - 1);
+}
+
 Status PlasmaStore::AllocateCudaMemory(
     int device_num, int64_t size, uint8_t** out_pointer,
     std::shared_ptr<CudaIpcMemHandle>* out_ipc_handle) {
-  DCHECK_NE(device_num, 0);
-  ARROW_ASSIGN_OR_RAISE(auto context, manager_->GetContext(device_num - 1));
+  ARROW_ASSIGN_OR_RAISE(auto context, GetCudaContext(device_num));
   ARROW_ASSIGN_OR_RAISE(auto cuda_buffer, context->Allocate(static_cast<int64_t>(size)));
   *out_pointer = reinterpret_cast<uint8_t*>(cuda_buffer->address());
   // The IPC handle will keep the buffer memory alive
@@ -219,7 +219,7 @@ Status PlasmaStore::AllocateCudaMemory(
 }
 
 Status PlasmaStore::FreeCudaMemory(int device_num, int64_t size, uint8_t* pointer) {
-  ARROW_ASSIGN_OR_RAISE(auto context, manager_->GetContext(device_num - 1));
+  ARROW_ASSIGN_OR_RAISE(auto context, GetCudaContext(device_num));
   RETURN_NOT_OK(context->Free(pointer, size));
   return Status::OK();
 }
diff --git a/cpp/src/plasma/store.h b/cpp/src/plasma/store.h
index 1638db4..1827989 100644
--- a/cpp/src/plasma/store.h
+++ b/cpp/src/plasma/store.h
@@ -206,6 +206,7 @@ class PlasmaStore {
   uint8_t* AllocateMemory(size_t size, bool evict_if_full, int* fd, int64_t* map_size,
                           ptrdiff_t* offset, Client* client, bool is_create);
 #ifdef PLASMA_CUDA
+  arrow::Result<std::shared_ptr<arrow::cuda::CudaContext>> GetCudaContext(int device_num);
   Status AllocateCudaMemory(int device_num, int64_t size, uint8_t** out_pointer,
                             std::shared_ptr<CudaIpcMemHandle>* out_ipc_handle);
 
@@ -239,9 +240,6 @@ class PlasmaStore {
   /// Manages worker threads for handling asynchronous/multi-threaded requests
   /// for reading/writing data to/from external store.
   std::shared_ptr<ExternalStore> external_store_;
-#ifdef PLASMA_CUDA
-  arrow::cuda::CudaDeviceManager* manager_;
-#endif
 };
 
 }  // namespace plasma
diff --git a/dev/tasks/linux-packages/github.linux.amd64.yml b/dev/tasks/linux-packages/github.linux.amd64.yml
index 843350f..58931aa 100644
--- a/dev/tasks/linux-packages/github.linux.amd64.yml
+++ b/dev/tasks/linux-packages/github.linux.amd64.yml
@@ -47,6 +47,7 @@ jobs:
           rake version:update
           rake docker:pull || :
           rake BUILD_DIR=build {{ build_task }}
+          sudo rm -rf */*/build
           popd
         env:
           APT_TARGETS: {{ target }}
diff --git a/dev/tasks/linux-packages/travis.linux.arm64.yml b/dev/tasks/linux-packages/travis.linux.arm64.yml
index 2827820..5d1db3d 100644
--- a/dev/tasks/linux-packages/travis.linux.arm64.yml
+++ b/dev/tasks/linux-packages/travis.linux.arm64.yml
@@ -52,6 +52,7 @@ script:
   - |
     rake docker:pull || :
   - rake BUILD_DIR=build {{ build_task }}
+  - sudo rm -rf */*/build
   - popd
 
 after_success: