You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by GitBox <gi...@apache.org> on 2021/05/29 22:05:15 UTC

[GitHub] [tvm] csullivan commented on a change in pull request #7711: [Texture support][Part 0] Device API and runtime support

csullivan commented on a change in pull request #7711:
URL: https://github.com/apache/tvm/pull/7711#discussion_r641988881



##########
File path: src/runtime/opencl/opencl_device_api.cc
##########
@@ -138,41 +195,122 @@ void* OpenCLWorkspace::AllocDataSpace(Device dev, size_t size, size_t alignment,
   this->Init();
   ICHECK(context != nullptr) << "No OpenCL device";
   cl_int err_code;
-  cl_mem mptr = clCreateBuffer(this->context, CL_MEM_READ_WRITE, size, nullptr, &err_code);
+  cl::BufferDescriptor* desc = new cl::BufferDescriptor;
+  desc->buffer = clCreateBuffer(this->context, CL_MEM_READ_WRITE, size, nullptr, &err_code);
+  desc->layout = cl::BufferDescriptor::MemoryLayout::BUFFER_1D;
+  desc->shape.push_back(size);
+  desc->dtype = type_hint;
   OPENCL_CHECK_ERROR(err_code);
-  return mptr;
+  return desc;
+}
+
+void* OpenCLWorkspace::AllocDataSpace(Device dev, int ndim, const int64_t* shape, DLDataType dtype,
+                                      Optional<String> mem_scope) {
+  if (!mem_scope.defined() || mem_scope.value() == "global") {
+    return DeviceAPI::AllocDataSpace(dev, ndim, shape, dtype, mem_scope);
+  }
+  ICHECK(IsTextureStorage(std::string(mem_scope.value())))
+      << "Device does not support allocate data space with "
+      << "specified memory scope: " << mem_scope.value();
+
+  ICHECK(ndim > 2) << "Shape for texture allocation must be at least rank 3; "
+                   << "provided shape is rank " << ndim;
+
+  cl::BufferDescriptor* desc = new cl::BufferDescriptor(mem_scope);
+  size_t axis = DefaultTextureLayoutSeparator(ndim, mem_scope.value());
+  auto texture = ApplyTexture2DFlattening<int64_t>(shape, ndim, axis);
+  desc->buffer = AllocTexture(dev, texture.width, texture.height, dtype);
+  desc->shape.insert(desc->shape.end(), &shape[0], &shape[ndim]);
+  desc->dtype = dtype;
+  return desc;
 }
 
 void OpenCLWorkspace::FreeDataSpace(Device dev, void* ptr) {
   // We have to make sure that the memory object is not in the command queue
   // for some OpenCL platforms.
   OPENCL_CALL(clFinish(this->GetQueue(dev)));
 
-  cl_mem mptr = static_cast<cl_mem>(ptr);
-  OPENCL_CALL(clReleaseMemObject(mptr));
+  cl::BufferDescriptor* desc = static_cast<cl::BufferDescriptor*>(ptr);
+  OPENCL_CALL(clReleaseMemObject(desc->buffer));
+  delete desc;
 }
 
-void OpenCLWorkspace::CopyDataFromTo(const void* from, size_t from_offset, void* to,
-                                     size_t to_offset, size_t size, Device dev_from, Device dev_to,
-                                     DLDataType type_hint, TVMStreamHandle stream) {
+cl_mem OpenCLWorkspace::AllocTexture(Device dev, size_t width, size_t height,
+                                     DLDataType type_hint) {
   this->Init();
-  ICHECK(stream == nullptr);
-  if (IsOpenCLDevice(dev_from) && IsOpenCLDevice(dev_to)) {
-    OPENCL_CALL(clEnqueueCopyBuffer(this->GetQueue(dev_to),
-                                    static_cast<cl_mem>((void*)from),  // NOLINT(*)
-                                    static_cast<cl_mem>(to), from_offset, to_offset, size, 0,
-                                    nullptr, nullptr));
-  } else if (IsOpenCLDevice(dev_from) && dev_to.device_type == kDLCPU) {
-    OPENCL_CALL(clEnqueueReadBuffer(this->GetQueue(dev_from),
-                                    static_cast<cl_mem>((void*)from),  // NOLINT(*)
-                                    CL_FALSE, from_offset, size, static_cast<char*>(to) + to_offset,
-                                    0, nullptr, nullptr));
-    OPENCL_CALL(clFinish(this->GetQueue(dev_from)));
-  } else if (dev_from.device_type == kDLCPU && IsOpenCLDevice(dev_to)) {
-    OPENCL_CALL(clEnqueueWriteBuffer(this->GetQueue(dev_to), static_cast<cl_mem>(to), CL_FALSE,
-                                     to_offset, size, static_cast<const char*>(from) + from_offset,
-                                     0, nullptr, nullptr));
-    OPENCL_CALL(clFinish(this->GetQueue(dev_to)));
+  ICHECK(context != nullptr) << "No OpenCL device";
+  cl_int err_code;
+  cl_channel_type cl_type = DTypeToOpenCLChannelType(type_hint);
+  cl_image_format format = {CL_RGBA, cl_type};
+  cl_image_desc descriptor = {CL_MEM_OBJECT_IMAGE2D, width, height, 0, 0, 0, 0, 0, 0};
+  cl_mem mptr =
+      clCreateImage(this->context, CL_MEM_READ_WRITE, &format, &descriptor, nullptr, &err_code);
+  OPENCL_CHECK_ERROR(err_code);
+  return mptr;
+}
+
+void* OpenCLWorkspace::AllocTextureWorkspace(Device dev, size_t width, size_t height,
+                                             DLDataType type_hint) {
+  return GetThreadEntry()->texture_pool.AllocTexture(dev, width, height, type_hint);
+}
+
+void OpenCLWorkspace::FreeTextureWorkspace(Device dev, void* ptr) {
+  GetThreadEntry()->texture_pool.FreeTexture(dev, ptr);
+}
+
+void OpenCLWorkspace::CopyDataFromTo(DLTensor* from, DLTensor* to, TVMStreamHandle stream) {
+  size_t nbytes = GetDataSize(*from);
+  ICHECK_EQ(nbytes, GetDataSize(*to));
+  ICHECK(IsContiguous(*from) && IsContiguous(*to))
+      << "CopyDataFromTo only support contiguous array for now";
+
+  if (IsOpenCLDevice(from->device) && IsOpenCLDevice(to->device)) {
+    const auto* from_desc = static_cast<const cl::BufferDescriptor*>(from->data);
+    ICHECK(from_desc->layout == cl::BufferDescriptor::MemoryLayout::BUFFER_1D)
+        << "Device to device copying is currently only implemented for OpenCL buffer storage";
+    auto* to_desc = static_cast<cl::BufferDescriptor*>(to->data);
+    OPENCL_CALL(clEnqueueCopyBuffer(this->GetQueue(to->device), from_desc->buffer, to_desc->buffer,
+                                    from->byte_offset, to->byte_offset, nbytes, 0, nullptr,
+                                    nullptr));
+  } else if (IsOpenCLDevice(from->device) && to->device.device_type == kDLCPU) {
+    const auto* from_desc = static_cast<const cl::BufferDescriptor*>(from->data);
+    switch (from_desc->layout) {
+      case cl::BufferDescriptor::MemoryLayout::BUFFER_1D:
+        OPENCL_CALL(clEnqueueReadBuffer(
+            this->GetQueue(from->device), from_desc->buffer, CL_FALSE, from->byte_offset, nbytes,
+            static_cast<char*>(to->data) + to->byte_offset, 0, nullptr, nullptr));
+        break;
+      case cl::BufferDescriptor::MemoryLayout::IMAGE_2D_ACTIVATION:
+      case cl::BufferDescriptor::MemoryLayout::IMAGE_2D_WEIGHT:
+        auto image_info = GetImageInfo(from_desc, from);
+        // TODO(csullivan): Support calculating row_pitch correctly in the case of reuse.

Review comment:
       Thank you for the good suggestion, I added a [test](https://github.com/apache/tvm/pull/7711/files#diff-e9741363006d59a2ce0f9cdac61da8f0b55b79aed6e9cab9a0922113266df46eR65) to demonstrate writing to a subview of a texture and check that the data from larger allocation contains the [expected](https://github.com/apache/tvm/pull/7711/files#diff-e9741363006d59a2ce0f9cdac61da8f0b55b79aed6e9cab9a0922113266df46eR117) two dimensional strides when copied back to host. 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org