You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by ju...@apache.org on 2022/11/15 17:42:02 UTC

[tvm] branch main updated: [RUNTIME][ALIGNMENT] Configurable kAllocAlignment if needed (#13307)

This is an automated email from the ASF dual-hosted git repository.

junrushao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new 24790d1d56 [RUNTIME][ALIGNMENT] Configurable kAllocAlignment if needed (#13307)
24790d1d56 is described below

commit 24790d1d56c9aa8baceb054383a626dba604b959
Author: Siva <qu...@quicinc.com>
AuthorDate: Tue Nov 15 23:11:56 2022 +0530

    [RUNTIME][ALIGNMENT] Configurable kAllocAlignment if needed (#13307)
    
    Not all plarforms 64bit aligned allocations. Platforms with 32bit alignment fail to support
    set_input_zero_copy even though the ndarray is allocated by the tvm runtime itself.
    
    This change enabled configurable option for such targets.
    
    Co-authored-by: Siva Rama Krishna Reddy B <si...@blr-ubuntu-ripper.qualcomm.com>
---
 CMakeLists.txt                          | 5 +++++
 cmake/config.cmake                      | 3 +++
 include/tvm/runtime/device_api.h        | 8 ++++++++
 tests/scripts/task_build_adreno_bins.sh | 1 +
 4 files changed, 17 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d0e45c3d3a..b8d8f4c023 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -463,6 +463,11 @@ if(USE_PIPELINE_EXECUTOR)
   list(APPEND RUNTIME_SRCS ${RUNTIME_PIPELINE_SRCS})
 endif(USE_PIPELINE_EXECUTOR)
 
+if(USE_KALLOC_ALIGNMENT)
+  message(STATUS "Build Alloc alignment set to ${USE_KALLOC_ALIGNMENT}")
+  add_definitions(-DTVM_KALLOC_ALIGNMENT=${USE_KALLOC_ALIGNMENT})
+endif(USE_KALLOC_ALIGNMENT)
+
 # Caches the build.
 # Note that ccache-3.x doesn't support nvcc well, so CUDA kernels may never hit the cache and still
 # need to be re-compiled every time. Using ccache 4.0+ can resolve this issue.
diff --git a/cmake/config.cmake b/cmake/config.cmake
index 0b72047603..22a548d298 100644
--- a/cmake/config.cmake
+++ b/cmake/config.cmake
@@ -401,3 +401,6 @@ set(USE_LIBTORCH OFF)
 
 # Whether to use the Universal Modular Accelerator Interface
 set(USE_UMA OFF)
+
+# Set custom Alloc Alignment for device allocated memory ndarray points to
+set(USE_KALLOC_ALIGNMENT 64)
diff --git a/include/tvm/runtime/device_api.h b/include/tvm/runtime/device_api.h
index 9613563f95..e517eb0d7f 100644
--- a/include/tvm/runtime/device_api.h
+++ b/include/tvm/runtime/device_api.h
@@ -51,11 +51,19 @@ enum DeviceAttrKind : int {
   kDriverVersion = 12
 };
 
+#ifdef TVM_KALLOC_ALIGNMENT
+/*! \brief Number of bytes each allocation must align to */
+constexpr int kAllocAlignment = TVM_KALLOC_ALIGNMENT;
+
+/*! \brief Number of bytes each allocation must align to in temporary allocation */
+constexpr int kTempAllocaAlignment = TVM_KALLOC_ALIGNMENT;
+#else
 /*! \brief Number of bytes each allocation must align to */
 constexpr int kAllocAlignment = 64;
 
 /*! \brief Number of bytes each allocation must align to in temporary allocation */
 constexpr int kTempAllocaAlignment = 64;
+#endif  // TVM_KALLOC_ALIGNMENT
 
 /*! \brief Maximum size that can be allocated on stack */
 constexpr int kMaxStackAlloca = 1024;
diff --git a/tests/scripts/task_build_adreno_bins.sh b/tests/scripts/task_build_adreno_bins.sh
index 5d45325160..6a9bbd9554 100755
--- a/tests/scripts/task_build_adreno_bins.sh
+++ b/tests/scripts/task_build_adreno_bins.sh
@@ -34,6 +34,7 @@ echo set\(USE_RPC ON\) >> config.cmake
 echo set\(USE_CPP_RPC ON\) >> config.cmake
 echo set\(USE_GRAPH_EXECUTOR ON\) >> config.cmake
 echo set\(USE_LIBBACKTRACE AUTO\) >> config.cmake
+echo set\(USE_KALLOC_ALIGNMENT 32\) >> config.cmake
 
 echo set\(ANDROID_ABI arm64-v8a\) >> config.cmake
 echo set\(ANDROID_PLATFORM android-28\) >> config.cmake