You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by GitBox <gi...@apache.org> on 2021/02/25 19:51:25 UTC
[GitHub] [tvm] csullivan commented on a change in pull request #7488: [Runtime] Special Memory Scope Support

csullivan commented on a change in pull request #7488:
URL: https://github.com/apache/tvm/pull/7488#discussion_r580596610



##########
File path: python/tvm/runtime/ndarray.py
##########
@@ -253,42 +254,35 @@ def numpyasarray(np_data):
     return arr, shape
 
 
-def empty(shape, dtype="float32", ctx=context(1, 0)):
+def empty(shape, dtype="float32", ctx=context(1, 0), mem_scope=None):
     """Create an empty array given shape and device
 
     Parameters
     ----------
     shape : tuple of int
-        The shape of the array
+        The shape of the array.
 
     dtype : type or str
         The data type of the array.
 
     ctx : TVMContext
-        The context of the array
+        The context of the array.
+
+    mem_scope : str
+        The memory scope of the array.
 
     Returns
     -------
     arr : tvm.nd.NDArray
         The array tvm supported.
     """
-    shape = c_array(tvm_shape_index_t, shape)
-    ndim = ctypes.c_int(len(shape))
-    handle = TVMArrayHandle()
+    arr = np.array(shape, 'int64')
+    ptr = arr.ctypes.data_as(ctypes.POINTER(ctypes.c_int64))
+    shape_ptr =  ctypes.cast(ptr, ctypes.c_void_p)
+    ndim = len(shape)
     dtype = DataType(dtype)
-    check_call(
-        _LIB.TVMArrayAlloc(
-            shape,
-            ndim,
-            ctypes.c_int(dtype.type_code),
-            ctypes.c_int(dtype.bits),
-            ctypes.c_int(dtype.lanes),
-            ctx.device_type,
-            ctx.device_id,
-            ctypes.byref(handle),
-        )
-    )
-    return _make_array(handle, False, False)
+    arr = _ffi_api.TVMArrayAllocWithScope(shape_ptr, ndim, dtype, ctx, mem_scope)

Review comment:
       What necessitates the change from calling via _ffi_api vs _LIB?

##########
File path: include/tvm/runtime/device_api.h
##########
@@ -90,6 +90,17 @@ class TVM_DLL DeviceAPI {
    */
   virtual void* AllocDataSpace(TVMContext ctx, size_t nbytes, size_t alignment,
                                DLDataType type_hint) = 0;
+  /*!
+   * \brief Allocate a data space on device with memory scope support.
+   * \param ctx The device context to perform operation.
+   * \param ndim The number of dimension of allocated tensor.
+   * \param shape The shape of allocated tensor.
+   * \param dtype The type of elements.
+   * \param mem_scope The memory scope of allocated tensor.
+   * \return The allocated device pointer.
+   */
+  virtual void* AllocDataSpace(TVMContext ctx, int ndim, const int64_t* shape, DLDataType dtype,
+                               Optional<String> mem_scope = NullOpt);

Review comment:
       In the RFC you mention introducing `mem_scope` as an enum or other structure. In this implementation we are opting for an optional string. Why the change from RFC? 
   
   I'm considering the case for how a backend could interpret different sub scopes, e.g. `texture:activation` or `texture:weight`. The string format provides more flexibility, just want to make sure our thinking is aligned on this.

##########
File path: src/runtime/rpc/rpc_endpoint.h
##########
@@ -147,8 +146,7 @@ class RPCEndpoint {
    * \param ctx_from The source context.
    * \param type_hint Hint of content data type.
    */
-  void CopyFromRemote(void* from, size_t from_offset, void* to, size_t to_offset, size_t nbytes,
-                      TVMContext ctx_from, DLDataType type_hint);
+  void CopyFromRemote(DLTensor* from, void* to_bytes, uint64_t nbytes);

Review comment:
       All over we use mutable `from` tensor, is there a good reason?

##########
File path: src/runtime/minrpc/minrpc_server.h
##########
@@ -209,30 +220,39 @@ class MinRPCServer {
   }
 
   void HandleCopyToRemote() {
-    uint64_t handle, offset, num_bytes;
-    TVMContext ctx;
-    DLDataType type_hint;
-
-    this->Read(&handle);
-    this->Read(&offset);
+    DLTensor* arr = this->ArenaAlloc<DLTensor>(1);
+    uint64_t data_handle;
+    this->Read(&data_handle);
+    arr->data = reinterpret_cast<void*>(data_handle);
+    this->Read(&(arr->ctx));
+    this->Read(&(arr->ndim));
+    this->Read(&(arr->dtype));
+    arr->shape = this->ArenaAlloc<int64_t>(arr->ndim);
+    this->ReadArray(arr->shape, arr->ndim);
+    arr->strides = nullptr;
+    this->Read(&(arr->byte_offset));
+    uint64_t num_bytes;
     this->Read(&num_bytes);
-    this->Read(&ctx);
-    this->Read(&type_hint);
-    int call_ecode = 0;
 
-    if (ctx.device_type == kDLCPU) {
-      uint8_t* dptr = reinterpret_cast<uint8_t*>(handle) + offset;
+    int call_ecode = 0;
+    if (arr->ctx.device_type == kDLCPU) {
+      uint8_t* dptr = reinterpret_cast<uint8_t*>(data_handle) + arr->byte_offset;
       this->ReadArray(dptr, num_bytes);
     } else {
       uint8_t* temp_data = this->ArenaAlloc<uint8_t>(num_bytes);
       this->ReadArray(temp_data, num_bytes);
-
-      call_ecode =
-          TVMDeviceCopyDataFromTo(temp_data, 0, reinterpret_cast<void*>(handle), offset, num_bytes,
-                                  DLContext{kDLCPU, 0}, ctx, type_hint, nullptr);
+      DLTensor temp;
+      temp.data = temp_data;
+      temp.ctx = DLContext{kDLCPU, 0};
+      temp.ndim = arr->ndim;
+      temp.dtype = arr->dtype;
+      temp.shape = arr->shape;
+      temp.strides = nullptr;
+      temp.byte_offset = 0;

Review comment:
       Blocks like this are repeated quite a bit, a make_(dl)tensor helper would be nice.

##########
File path: include/tvm/runtime/device_api.h
##########
@@ -98,20 +109,13 @@ class TVM_DLL DeviceAPI {
   virtual void FreeDataSpace(TVMContext ctx, void* ptr) = 0;
   /*!
    * \brief copy data from one place to another
+   * \note This API is designed to support special memory with shape dependent layout.
+   *       We pass in DLTensor* with shape information to support these cases.
    * \param from The source array.
-   * \param from_offset The byte offeset in the from.
    * \param to The target array.
-   * \param to_offset The byte offset in the to.
-   * \param num_bytes The size of the memory in bytes
-   * \param ctx_from The source context
-   * \param ctx_to The target context
-   * \param type_hint The type of elements, only neded by certain backends.
-   *                  can be useful for cross device endian converison.
    * \param stream Optional stream object.
    */
-  virtual void CopyDataFromTo(const void* from, size_t from_offset, void* to, size_t to_offset,
-                              size_t num_bytes, TVMContext ctx_from, TVMContext ctx_to,
-                              DLDataType type_hint, TVMStreamHandle stream) = 0;
+  virtual void CopyDataFromTo(DLTensor* from, DLTensor* to, TVMStreamHandle stream);

Review comment:
       `const DLTensor* from` ?

##########
File path: src/runtime/rpc/rpc_local_session.cc
##########
@@ -27,6 +27,7 @@
 #include <tvm/runtime/registry.h>
 
 #include <memory>
+#include <vector>

Review comment:
       Looks like this is no longer necessary, remove if so.

##########
File path: include/tvm/runtime/c_runtime_api.h
##########
@@ -559,6 +559,23 @@ TVM_DLL int TVMByteArrayFree(TVMByteArray* arr);
 TVM_DLL int TVMDeviceAllocDataSpace(DLContext ctx, size_t nbytes, size_t alignment,
                                     DLDataType type_hint, void** out_data);
 
+/*!
+ * \brief Allocate a data space on device with special memory scope.
+ * \note The memory could use a special multi-dimensional memory layout.
+ *       That is why we pass shape and dtype instead of raw number of bytes.
+ * \param ctx The device context to perform operation.
+ * \param ndim The number of dimension of the tensor.
+ * \param shape The shape of the tensor.
+ * \param dtype The type of elements.
+ * \param mem_scope The memory scope of the tensor,
+ *        can be nullptr, which indicate the default global DRAM
+ * \param out_data The allocated device pointer.
+ * \return 0 when success, -1 when failure happens
+ */
+TVM_DLL int TVMDeviceAllocDataSpaceWithScope(DLContext ctx, int ndim, const int64_t* shape,
+                                             DLDataType dtype, const char* mem_scope,
+                                             void** out_data);
+

Review comment:
       For supporting device to device copying between memory types we could consider including a TVMArrayCopyFromToWithScope? The device backend may or may not be able to infer the storage type from the raw to/from handles alone. This shouldn't be an issue for OpenCL, but I can imagine a device api that uses completely opaque memory pointers and relies on user management. In which case having a from_scope and to_scope would be helpful.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org