You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by ma...@apache.org on 2023/09/25 01:32:27 UTC

[tvm] branch main updated: [OpenCL] Don't initialize OpenCL runtime on host (#15745)

This is an automated email from the ASF dual-hosted git repository.

masahi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new ae89c1e56d [OpenCL] Don't initialize OpenCL runtime on host (#15745)
ae89c1e56d is described below

commit ae89c1e56da5ac38e68575c0baec047c442b266d
Author: Egor Churaev <eg...@gmail.com>
AuthorDate: Mon Sep 25 04:32:22 2023 +0300

    [OpenCL] Don't initialize OpenCL runtime on host (#15745)
    
    * [OpenCL] Don't initialize OpenCL runtime on host
    
    After adding OpenCL wrapper, it is possible to build TVM with OpenCL
    support also on the host which doesn't have OpenCL libraries. But if
    you want to compile OpenCL module for a remote device on such host
    machine then you will see an error that OpenCL lib cannot be open.
    
    To avoid such problem, we need to call OpenCL functions only in
    runtime. So function for initializing OpenCL workspace was removed from
    OpenCLModuleNode. And a new function `IsProgramCreated` was added. The
    last function is necessary to prepare vectors with OpenCL programs,
    associated with OpenCL devices. Previously it was done during
    OpenCLModule initialization. So, now we create such vectors only
    in runtime after getting list of available OpenCL devices.
    
    * Call workspace init function before all OpenCL API calls
---
 src/runtime/opencl/opencl_common.h      |  6 ++++--
 src/runtime/opencl/opencl_device_api.cc |  4 ++++
 src/runtime/opencl/opencl_module.cc     | 22 +++++++++++++++-------
 src/runtime/opencl/opencl_module.h      |  1 +
 4 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/src/runtime/opencl/opencl_common.h b/src/runtime/opencl/opencl_common.h
index a303141357..8c1607c4e5 100644
--- a/src/runtime/opencl/opencl_common.h
+++ b/src/runtime/opencl/opencl_common.h
@@ -220,7 +220,7 @@ struct BufferDescriptor;
 class OpenCLWorkspace : public DeviceAPI {
  public:
   // type key
-  std::string type_key;
+  std::string type_key{"opencl"};
   // available platforms
   std::vector<cl_platform_id> platform_ids;
   // map platform to its context
@@ -253,7 +253,7 @@ class OpenCLWorkspace : public DeviceAPI {
   // Initialize the device.
   void Init(const std::string& type_key, const std::string& device_type,
             const std::string& platform_name = "");
-  virtual void Init() { Init("opencl", "gpu"); }
+  virtual void Init() { Init(this->type_key, "gpu"); }
   // Check whether the context is OpenCL or not.
   virtual bool IsOpenCLDevice(Device dev) { return dev.device_type == kDLOpenCL; }
   // get the queue of the device
@@ -465,6 +465,8 @@ class OpenCLModuleNode : public OpenCLModuleNodeBase {
       : OpenCLModuleNodeBase(fmap), data_(data), fmt_(fmt), source_(source) {}
 
   PackedFunc GetFunction(const String& name, const ObjectPtr<Object>& sptr_to_self) final;
+  // Return true if OpenCL program for the requested function and device was created
+  bool IsProgramCreated(const std::string& func_name, int device_id);
   void SaveToFile(const String& file_name, const String& format) final;
   void SaveToBinary(dmlc::Stream* stream) final;
   void SetPreCompiledPrograms(const std::string& bytes);
diff --git a/src/runtime/opencl/opencl_device_api.cc b/src/runtime/opencl/opencl_device_api.cc
index 35e77eb6d1..fb9adc2757 100644
--- a/src/runtime/opencl/opencl_device_api.cc
+++ b/src/runtime/opencl/opencl_device_api.cc
@@ -111,6 +111,7 @@ OpenCLWorkspace* OpenCLWorkspace::Global() {
 }
 
 cl_device_id OpenCLWorkspace::GetCLDeviceID(int device_id) {
+  this->Init();
   ICHECK_LT(device_id, devices.size()) << "Invalid device id " << device_id << ". " << GetError();
   return devices[device_id];
 }
@@ -210,6 +211,7 @@ void OpenCLWorkspace::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv)
 
 void* OpenCLWorkspace::CreateHostPtrIfEnabled(cl::BufferDescriptor* desc, Device dev, size_t size) {
 #if defined(OPENCL_ENABLE_HOST_PTR)
+  this->Init();
   cl_int err_code;
   desc->host_ptr = reinterpret_cast<cl_uchar*>(
       clEnqueueMapBuffer(this->GetQueue(dev), desc->buffer, CL_TRUE, CL_MAP_WRITE, 0,
@@ -300,6 +302,7 @@ void OpenCLWorkspace::FreeTextureWorkspace(Device dev, void* ptr) {
 }
 
 void OpenCLWorkspace::CopyDataFromTo(DLTensor* from, DLTensor* to, TVMStreamHandle stream) {
+  this->Init();
   size_t nbytes = GetDataSize(*from);
   ICHECK_EQ(nbytes, GetDataSize(*to));
   ICHECK(IsContiguous(*from) && IsContiguous(*to))
@@ -379,6 +382,7 @@ void OpenCLWorkspace::CopyDataFromTo(DLTensor* from, DLTensor* to, TVMStreamHand
 }
 
 void OpenCLWorkspace::StreamSync(Device dev, TVMStreamHandle stream) {
+  this->Init();
   ICHECK(stream == nullptr);
   OPENCL_CALL(clFinish(this->GetQueue(dev)));
 }
diff --git a/src/runtime/opencl/opencl_module.cc b/src/runtime/opencl/opencl_module.cc
index 6829d46d43..567b7ad88a 100644
--- a/src/runtime/opencl/opencl_module.cc
+++ b/src/runtime/opencl/opencl_module.cc
@@ -185,7 +185,6 @@ String OpenCLModuleNode::GetSource(const String& format) {
 
 void OpenCLModuleNode::Init() {
   workspace_ = GetGlobalWorkspace();
-  workspace_->Init();
   // initialize the kernel id, need to lock global table.
   std::lock_guard<std::mutex> lock(workspace_->mu);
   for (const auto& kv : fmap_) {
@@ -208,10 +207,17 @@ void OpenCLModuleNode::Init() {
                                    << "delimiter was found.";
   ICHECK_EQ(fmap_.size(), parsed_kernels_.size())
       << "The number of parsed kernel sources does not match the number of kernel functions";
+}
+
+bool OpenCLModuleNode::IsProgramCreated(const std::string& func_name, int device_id) {
+  auto size = programs_[func_name].size();
+  if (size > 0 && programs_[func_name][device_id] != nullptr) return true;
+  auto dev_size = GetGlobalWorkspace()->devices.size();
+  ICHECK(device_id < static_cast<int>(dev_size))
+      << "Device id " << device_id << " is bigger than number of available devices";
   // zero initialize cl_program pointers for each device kernel
-  for (auto& kv : parsed_kernels_) {
-    programs_.insert({kv.first, std::vector<cl_program>(workspace_->devices.size(), nullptr)});
-  }
+  if (size == 0) programs_[func_name].resize(dev_size, nullptr);
+  return false;
 }
 
 cl_kernel OpenCLModuleNode::InstallKernel(cl::OpenCLWorkspace* w, cl::OpenCLThreadEntry* t,
@@ -220,7 +226,7 @@ cl_kernel OpenCLModuleNode::InstallKernel(cl::OpenCLWorkspace* w, cl::OpenCLThre
   int device_id = t->device.device_id;
   auto did = w->GetCLDeviceID(device_id);
   auto platform = w->device_to_platform[did];
-  if (programs_[func_name][device_id] == nullptr) {
+  if (!IsProgramCreated(func_name, device_id)) {
     // create program
     if (fmt_ == "cl") {
       const char* s = parsed_kernels_[func_name].c_str();
@@ -268,6 +274,7 @@ cl_kernel OpenCLModuleNode::InstallKernel(cl::OpenCLWorkspace* w, cl::OpenCLThre
 }
 
 void OpenCLModuleNode::SetPreCompiledPrograms(const std::string& bytes) {
+  workspace_->Init();
   std::string data = bytes;
   dmlc::MemoryStringStream reader(&data);
   dmlc::Stream* strm = &reader;
@@ -280,7 +287,7 @@ void OpenCLModuleNode::SetPreCompiledPrograms(const std::string& bytes) {
     std::vector<unsigned char> bin_vector;
     strm->Read(&name);
     strm->Read(&bin_vector);
-    if (programs_[name][device_id] == nullptr) {
+    if (!IsProgramCreated(name, device_id)) {
       cl_int err = 0;
       cl_int binaryStatus;
       size_t binarySize = bin_vector.size();
@@ -310,6 +317,7 @@ void OpenCLModuleNode::SetPreCompiledPrograms(const std::string& bytes) {
 }
 
 std::string OpenCLModuleNode::GetPreCompiledPrograms() {
+  workspace_->Init();
   std::string data;
   dmlc::MemoryStringStream writer(&data);
   dmlc::Stream* strm = &writer;
@@ -319,7 +327,7 @@ std::string OpenCLModuleNode::GetPreCompiledPrograms() {
     cl::OpenCLThreadEntry* t = workspace_->GetThreadEntry();
     int device_id = t->device.device_id;
     t->kernel_table.resize(workspace_->num_registered_kernels);
-    if (programs_[std::string(name)][device_id] == nullptr) {
+    if (!IsProgramCreated(name, device_id)) {
       InstallKernel(workspace_, t, name, kid_map_[name]);
     }
     size_t size;
diff --git a/src/runtime/opencl/opencl_module.h b/src/runtime/opencl/opencl_module.h
index 834f53510e..22fc119e03 100644
--- a/src/runtime/opencl/opencl_module.h
+++ b/src/runtime/opencl/opencl_module.h
@@ -42,6 +42,7 @@ namespace runtime {
  * \param data The module data.
  * \param fmt The format of the data, can be "clbin", "cl"
  * \param fmap The map function information map of each function.
+ * \param source Generated OpenCL kernels.
  */
 Module OpenCLModuleCreate(std::string data, std::string fmt,
                           std::unordered_map<std::string, FunctionInfo> fmap, std::string source);