You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2020/05/10 07:58:35 UTC
[arrow] branch master updated: ARROW-8577: [Plasma][CUDA] Make CUDA
initialization lazy
This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new e215e89 ARROW-8577: [Plasma][CUDA] Make CUDA initialization lazy
e215e89 is described below
commit e215e89ba920cf19564caf24acbfe5022b20d0f0
Author: Sutou Kouhei <ko...@clear-code.com>
AuthorDate: Sun May 10 16:57:55 2020 +0900
ARROW-8577: [Plasma][CUDA] Make CUDA initialization lazy
If we make CUDA initialization lazy, we can use CUDA enabled binary on
no CUDA host.
Closes #7138 from kou/cpp-plasma-cuda-init-delay
Authored-by: Sutou Kouhei <ko...@clear-code.com>
Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
cpp/src/plasma/client.cc | 31 ++++++++++++-------------
cpp/src/plasma/plasma.pc.in | 1 +
cpp/src/plasma/store.cc | 16 ++++++-------
cpp/src/plasma/store.h | 4 +---
dev/tasks/linux-packages/github.linux.amd64.yml | 1 +
dev/tasks/linux-packages/travis.linux.arm64.yml | 1 +
6 files changed, 27 insertions(+), 27 deletions(-)
diff --git a/cpp/src/plasma/client.cc b/cpp/src/plasma/client.cc
index 127ee81..1c2ec68 100644
--- a/cpp/src/plasma/client.cc
+++ b/cpp/src/plasma/client.cc
@@ -313,6 +313,10 @@ class PlasmaClient::Impl : public std::enable_shared_from_this<PlasmaClient::Imp
uint64_t ComputeObjectHashCPU(const uint8_t* data, int64_t data_size,
const uint8_t* metadata, int64_t metadata_size);
+#ifdef PLASMA_CUDA
+ arrow::Result<std::shared_ptr<CudaContext>> GetCudaContext(int device_number);
+#endif
+
/// File descriptor of the Unix domain socket that connects to the store.
int store_conn_;
/// Table of dlmalloc buffer files that have been memory mapped so far. This
@@ -332,22 +336,11 @@ class PlasmaClient::Impl : public std::enable_shared_from_this<PlasmaClient::Imp
std::deque<std::tuple<ObjectID, int64_t, int64_t>> pending_notification_;
/// A mutex which protects this class.
std::recursive_mutex client_mutex_;
-
-#ifdef PLASMA_CUDA
- /// Cuda Device Manager.
- arrow::cuda::CudaDeviceManager* manager_;
-#endif
};
PlasmaBuffer::~PlasmaBuffer() { ARROW_UNUSED(client_->Release(object_id_)); }
-PlasmaClient::Impl::Impl() : store_conn_(0), store_capacity_(0) {
-#ifdef PLASMA_CUDA
- auto maybe_manager = CudaDeviceManager::Instance();
- DCHECK_OK(maybe_manager.status());
- manager_ = *maybe_manager;
-#endif
-}
+PlasmaClient::Impl::Impl() : store_conn_(0), store_capacity_(0) {}
PlasmaClient::Impl::~Impl() {}
@@ -416,6 +409,14 @@ void PlasmaClient::Impl::IncrementObjectCount(const ObjectID& object_id,
object_entry->count += 1;
}
+#ifdef PLASMA_CUDA
+arrow::Result<std::shared_ptr<CudaContext>> PlasmaClient::Impl::GetCudaContext(
+ int device_number) {
+ ARROW_ASSIGN_OR_RAISE(auto manager, CudaDeviceManager::Instance());
+ return manager->GetContext(device_number - 1);
+}
+#endif
+
Status PlasmaClient::Impl::Create(const ObjectID& object_id, int64_t data_size,
const uint8_t* metadata, int64_t metadata_size,
std::shared_ptr<Buffer>* data, int device_num,
@@ -454,8 +455,7 @@ Status PlasmaClient::Impl::Create(const ObjectID& object_id, int64_t data_size,
}
} else {
#ifdef PLASMA_CUDA
- std::shared_ptr<CudaContext> context;
- ARROW_ASSIGN_OR_RAISE(context, manager_->GetContext(device_num - 1));
+ ARROW_ASSIGN_OR_RAISE(auto context, GetCudaContext(device_num));
GpuProcessHandle* handle = new GpuProcessHandle();
handle->client_count = 2;
ARROW_ASSIGN_OR_RAISE(handle->ptr, context->OpenIpcBuffer(*object.ipc_handle));
@@ -639,8 +639,7 @@ Status PlasmaClient::Impl::GetBuffers(
std::lock_guard<std::mutex> lock(gpu_mutex);
auto iter = gpu_object_map.find(object_ids[i]);
if (iter == gpu_object_map.end()) {
- std::shared_ptr<CudaContext> context;
- ARROW_ASSIGN_OR_RAISE(context, manager_->GetContext(object->device_num - 1));
+ ARROW_ASSIGN_OR_RAISE(auto context, GetCudaContext(object->device_num));
GpuProcessHandle* obj_handle = new GpuProcessHandle();
obj_handle->client_count = 1;
ARROW_ASSIGN_OR_RAISE(obj_handle->ptr,
diff --git a/cpp/src/plasma/plasma.pc.in b/cpp/src/plasma/plasma.pc.in
index 36a7c82..17af015 100644
--- a/cpp/src/plasma/plasma.pc.in
+++ b/cpp/src/plasma/plasma.pc.in
@@ -28,5 +28,6 @@ executable=${plasma_store_server}
Name: Plasma
Description: Plasma is an in-memory object store and cache for big data.
Version: @PLASMA_VERSION@
+Requires: arrow
Libs: -L${libdir} -lplasma
Cflags: -I${includedir}
diff --git a/cpp/src/plasma/store.cc b/cpp/src/plasma/store.cc
index 9a42a59..b12e842 100644
--- a/cpp/src/plasma/store.cc
+++ b/cpp/src/plasma/store.cc
@@ -123,11 +123,6 @@ PlasmaStore::PlasmaStore(EventLoop* loop, std::string directory, bool hugepages_
external_store_(external_store) {
store_info_.directory = directory;
store_info_.hugepages_enabled = hugepages_enabled;
-#ifdef PLASMA_CUDA
- auto maybe_manager = CudaDeviceManager::Instance();
- DCHECK_OK(maybe_manager.status());
- manager_ = *maybe_manager;
-#endif
}
// TODO(pcm): Get rid of this destructor by using RAII to clean up data.
@@ -207,11 +202,16 @@ uint8_t* PlasmaStore::AllocateMemory(size_t size, bool evict_if_full, int* fd,
}
#ifdef PLASMA_CUDA
+arrow::Result<std::shared_ptr<CudaContext>> PlasmaStore::GetCudaContext(int device_num) {
+ DCHECK_NE(device_num, 0);
+ ARROW_ASSIGN_OR_RAISE(auto manager, CudaDeviceManager::Instance());
+ return manager->GetContext(device_num - 1);
+}
+
Status PlasmaStore::AllocateCudaMemory(
int device_num, int64_t size, uint8_t** out_pointer,
std::shared_ptr<CudaIpcMemHandle>* out_ipc_handle) {
- DCHECK_NE(device_num, 0);
- ARROW_ASSIGN_OR_RAISE(auto context, manager_->GetContext(device_num - 1));
+ ARROW_ASSIGN_OR_RAISE(auto context, GetCudaContext(device_num));
ARROW_ASSIGN_OR_RAISE(auto cuda_buffer, context->Allocate(static_cast<int64_t>(size)));
*out_pointer = reinterpret_cast<uint8_t*>(cuda_buffer->address());
// The IPC handle will keep the buffer memory alive
@@ -219,7 +219,7 @@ Status PlasmaStore::AllocateCudaMemory(
}
Status PlasmaStore::FreeCudaMemory(int device_num, int64_t size, uint8_t* pointer) {
- ARROW_ASSIGN_OR_RAISE(auto context, manager_->GetContext(device_num - 1));
+ ARROW_ASSIGN_OR_RAISE(auto context, GetCudaContext(device_num));
RETURN_NOT_OK(context->Free(pointer, size));
return Status::OK();
}
diff --git a/cpp/src/plasma/store.h b/cpp/src/plasma/store.h
index 1638db4..1827989 100644
--- a/cpp/src/plasma/store.h
+++ b/cpp/src/plasma/store.h
@@ -206,6 +206,7 @@ class PlasmaStore {
uint8_t* AllocateMemory(size_t size, bool evict_if_full, int* fd, int64_t* map_size,
ptrdiff_t* offset, Client* client, bool is_create);
#ifdef PLASMA_CUDA
+ arrow::Result<std::shared_ptr<arrow::cuda::CudaContext>> GetCudaContext(int device_num);
Status AllocateCudaMemory(int device_num, int64_t size, uint8_t** out_pointer,
std::shared_ptr<CudaIpcMemHandle>* out_ipc_handle);
@@ -239,9 +240,6 @@ class PlasmaStore {
/// Manages worker threads for handling asynchronous/multi-threaded requests
/// for reading/writing data to/from external store.
std::shared_ptr<ExternalStore> external_store_;
-#ifdef PLASMA_CUDA
- arrow::cuda::CudaDeviceManager* manager_;
-#endif
};
} // namespace plasma
diff --git a/dev/tasks/linux-packages/github.linux.amd64.yml b/dev/tasks/linux-packages/github.linux.amd64.yml
index 843350f..58931aa 100644
--- a/dev/tasks/linux-packages/github.linux.amd64.yml
+++ b/dev/tasks/linux-packages/github.linux.amd64.yml
@@ -47,6 +47,7 @@ jobs:
rake version:update
rake docker:pull || :
rake BUILD_DIR=build {{ build_task }}
+ sudo rm -rf */*/build
popd
env:
APT_TARGETS: {{ target }}
diff --git a/dev/tasks/linux-packages/travis.linux.arm64.yml b/dev/tasks/linux-packages/travis.linux.arm64.yml
index 2827820..5d1db3d 100644
--- a/dev/tasks/linux-packages/travis.linux.arm64.yml
+++ b/dev/tasks/linux-packages/travis.linux.arm64.yml
@@ -52,6 +52,7 @@ script:
- |
rake docker:pull || :
- rake BUILD_DIR=build {{ build_task }}
+ - sudo rm -rf */*/build
- popd
after_success: