You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by mo...@apache.org on 2021/12/11 16:55:05 UTC
[tvm] branch main updated: Add Hexagon VTCM and discontiguous allocation support (#9525)
This is an automated email from the ASF dual-hosted git repository.
moreau pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 2b35cfd Add Hexagon VTCM and discontiguous allocation support (#9525)
2b35cfd is described below
commit 2b35cfd6ddb73afecd3f550f33881e1fdc7c3267
Author: Adam Straw <as...@octoml.ai>
AuthorDate: Sat Dec 11 08:54:40 2021 -0800
Add Hexagon VTCM and discontiguous allocation support (#9525)
* WIP Allocation abstraction for VTCM and DDR.
* Add Hexagon VTCM and discontiguous allocation support
* differentiate between dimensions and allocations
* remove change to llvm codegen
* add integration test_add_vtcm to demo vtcm alloc
* remove cmake change
* forcing contiguous allocation in device API, for now
Co-authored-by: Chris Sullivan <cs...@octoml.ai>
---
src/runtime/hexagon/hexagon/hexagon_buffer.cc | 239 ++++++++++++++++-----
src/runtime/hexagon/hexagon/hexagon_buffer.h | 91 +++++---
src/runtime/hexagon/hexagon/hexagon_common.cc | 8 +-
src/runtime/hexagon/hexagon/hexagon_common.h | 2 +
.../hexagon/hexagon/hexagon_device_api_v2.cc | 83 ++++---
.../contrib/test_hexagon/rpc/test_launcher.py | 48 +++++
6 files changed, 352 insertions(+), 119 deletions(-)
diff --git a/src/runtime/hexagon/hexagon/hexagon_buffer.cc b/src/runtime/hexagon/hexagon/hexagon_buffer.cc
index 38f91c8..a35759d 100644
--- a/src/runtime/hexagon/hexagon/hexagon_buffer.cc
+++ b/src/runtime/hexagon/hexagon/hexagon_buffer.cc
@@ -23,83 +23,151 @@
#include <tvm/runtime/module.h>
+#include "hexagon_common.h"
+
+#if defined(__hexagon__)
+#include "HAP_compute_res.h"
+#endif
+
#include <string>
#include <utility>
-#include "hexagon_common.h"
-
namespace tvm {
namespace runtime {
namespace hexagon {
-static size_t GetDataAlignment(const DLDataType dtype) {
- size_t align = (dtype.bits / 8) * dtype.lanes;
- if (align < kAllocAlignment) return kAllocAlignment;
- return align;
-}
+struct Allocation {
+ Allocation(size_t nbytes, size_t alignment) : nbytes_(nbytes), alignment_(alignment) {}
+ virtual ~Allocation() {}
+ Allocation(const Allocation&) = delete;
+ Allocation& operator=(const Allocation&) = delete;
+ Allocation(Allocation&&) = delete;
+ Allocation& operator=(Allocation&&) = delete;
-HexagonBuffer::HexagonBuffer(int ndim, const int64_t* shape, DLDataType dtype,
- Optional<String> scope) {
- // TODO(csullivan): Re-enable check on ndim <= 2 when physical layout support
- // in MakePackedAPI is added.
- // ICHECK_LE(ndim, 1) << "Hexagon currently only supports flat allocations "
- // << "and arrays of flat allocations.";
-
- DLTensor t;
- t.shape = const_cast<int64_t*>(shape);
- t.ndim = ndim;
- t.dtype = dtype;
- size_t nbytes = GetDataSize(t);
- size_t alignment = GetDataAlignment(dtype);
- // TODO(csullivan): Extend to support arrays of allocations.
- // Move assignment from r-value constructed flat allocation.
- *this = HexagonBuffer(nbytes, alignment, scope);
-}
+ void* data_{nullptr};
+ size_t nbytes_;
+ size_t alignment_;
+};
-HexagonBuffer::HexagonBuffer(size_t nbytes, size_t alignment, Optional<String> scope) {
- void* ptr = nullptr;
- int ret = posix_memalign(&ptr, alignment, nbytes);
- if (ret != 0) {
- throw std::bad_alloc();
+struct DDRAllocation : public Allocation {
+ DDRAllocation(size_t nbytes, size_t alignment) : Allocation(nbytes, alignment) {
+#ifdef _WIN32
+ data_ = _aligned_malloc(nbytes, alignment);
+ CHECK(data_ != nullptr);
+#else
+ int ret = posix_memalign(&data_, alignment, nbytes);
+ CHECK_EQ(ret, 0);
+#endif
}
- allocations_.push_back(ptr);
- SetStorageScope(scope);
+ ~DDRAllocation() {
+#ifdef _WIN32
+ _aligned_free(data_);
+#else
+ free(data_);
+#endif
+ }
+};
+
+#if defined(__hexagon__)
+struct VTCMAllocation : public Allocation {
+ VTCMAllocation(size_t nbytes, size_t alignment) : Allocation(nbytes, alignment) {
+ compute_res_attr_t res_info;
+ HEXAGON_SAFE_CALL(HAP_compute_res_attr_init(&res_info));
+
+ // allocate nbytes of vtcm on a single page
+ HEXAGON_SAFE_CALL(HAP_compute_res_attr_set_vtcm_param(&res_info, /*vtcm_size = */ nbytes,
+ /*b_single_page = */ 1));
+ context_id_ = HAP_compute_res_acquire(&res_info, /*timeout = */ 10000);
+
+ if (context_id_) {
+ data_ = HAP_compute_res_attr_get_vtcm_ptr(&res_info);
+ if (!data_) {
+ HEXAGON_PRINT(ERROR, "ERROR: Allocated VTCM ptr is null.");
+ HEXAGON_SAFE_CALL(HAP_compute_res_release(context_id_));
+ return;
+ }
+ } else {
+ HEXAGON_PRINT(ERROR, "ERROR: Unable to acquire requeisted resource.");
+ return;
+ }
+ // HEXAGON_PRINT(ALWAYS, "VTCMAllocation() - Context ID: %u, VTCM ptr: %p", context_id_, data_);
+ }
+ ~VTCMAllocation() {
+ // HEXAGON_PRINT(ALWAYS, "~VTCMAllocation() - Context ID: %u, VTCM ptr: %p", context_id_,
+ // data_);
+ HEXAGON_SAFE_CALL(HAP_compute_res_release(context_id_));
+ data_ = nullptr;
+ }
+ unsigned int context_id_{0};
+};
+#else
+struct VTCMAllocation : public DDRAllocation {
+ VTCMAllocation(size_t nbytes, size_t alignment) : DDRAllocation(nbytes, alignment) {}
+};
+#endif
+
+template <HexagonBuffer::StorageScope S>
+std::unique_ptr<Allocation> Allocator(size_t nbytes, size_t alignment);
+
+template <>
+std::unique_ptr<Allocation> Allocator<HexagonBuffer::StorageScope::kDDR>(size_t nbytes,
+ size_t alignment) {
+ return std::make_unique<DDRAllocation>(nbytes, alignment);
}
-HexagonBuffer::HexagonBuffer(void* data, Optional<String> scope) : managed_{false} {
+template <>
+std::unique_ptr<Allocation> Allocator<HexagonBuffer::StorageScope::kVTCM>(size_t nbytes,
+ size_t alignment) {
+ return std::make_unique<VTCMAllocation>(nbytes, alignment);
+}
+
+HexagonBuffer::HexagonBuffer(size_t nbytes, size_t alignment, Optional<String> scope)
+ : nallocs_(1), nbytes_(nbytes) {
SetStorageScope(scope);
- allocations_.push_back(data);
+
+ std::unique_ptr<Allocation> alloca = nullptr;
+ if (GetStorageScope() == StorageScope::kDDR) {
+ alloca = Allocator<StorageScope::kDDR>(nbytes, alignment);
+ } else if (GetStorageScope() == StorageScope::kVTCM) {
+ alloca = Allocator<StorageScope::kVTCM>(nbytes, alignment);
+ }
+ CHECK(alloca != nullptr);
+ allocations_.push_back(alloca->data_);
+ managed_allocations_.push_back(std::move(alloca));
}
-HexagonBuffer::~HexagonBuffer() {
- if (managed_) {
- for (auto& ptr : allocations_) {
- free(ptr);
+HexagonBuffer::HexagonBuffer(size_t nallocs, size_t nbytes, size_t alignment,
+ Optional<String> scope)
+ : nallocs_(nallocs), nbytes_(nallocs * nbytes) {
+ SetStorageScope(scope);
+ for (size_t i = 0; i < nallocs; ++i) {
+ std::unique_ptr<Allocation> alloca = nullptr;
+ if (GetStorageScope() == StorageScope::kDDR) {
+ alloca = Allocator<StorageScope::kDDR>(nbytes, alignment);
+ } else if (GetStorageScope() == StorageScope::kVTCM) {
+ alloca = Allocator<StorageScope::kVTCM>(nbytes, alignment);
}
+ CHECK(alloca != nullptr);
+ allocations_.push_back(alloca->data_);
+ managed_allocations_.push_back(std::move(alloca));
}
}
-HexagonBuffer::HexagonBuffer(HexagonBuffer&& other)
- : allocations_(other.allocations_),
- managed_(other.managed_),
- storage_scope_(other.storage_scope_) {
- other.allocations_.clear();
- other.managed_ = false;
- other.storage_scope_ = StorageScope::kDDR;
+HexagonBuffer::HexagonBuffer(void* data, size_t nbytes, Optional<String> scope)
+ : nallocs_(1), nbytes_(nbytes) {
+ SetStorageScope(scope);
+ // disallow external VTCM allocations
+ CHECK(GetStorageScope() != HexagonBuffer::StorageScope::kVTCM);
+ allocations_.push_back(data);
}
-HexagonBuffer& HexagonBuffer::operator=(HexagonBuffer&& other) {
- std::swap(allocations_, other.allocations_);
- std::swap(managed_, other.managed_);
- std::swap(storage_scope_, other.storage_scope_);
- return *this;
-}
+HexagonBuffer::~HexagonBuffer() { managed_allocations_.clear(); }
-void* HexagonBuffer::GetPointer() {
+void** HexagonBuffer::GetPointer() {
if (!allocations_.size()) {
return nullptr;
}
- return (allocations_.size() > 1) ? allocations_.data() : allocations_[0];
+ return allocations_.data();
}
HexagonBuffer::StorageScope HexagonBuffer::GetStorageScope() const { return storage_scope_; }
@@ -119,11 +187,70 @@ void HexagonBuffer::SetStorageScope(Optional<String> scope) {
}
}
-HexagonBuffer* IsHexagonBuffer(DLTensor* tensor) {
- if (TVMDeviceExtType(tensor->device.device_type) == kDLHexagon) {
- return static_cast<HexagonBuffer*>(tensor->data);
+void HexagonBuffer::CopyTo(void* data, size_t nbytes) {
+ CHECK(nbytes_ == nbytes);
+ size_t offset = 0;
+ for (size_t i = 0; i < nallocs_; ++i) {
+ CHECK(nbytes / nallocs_ == managed_allocations_[i]->nbytes_);
+
+ memcpy(static_cast<char*>(data) + offset,
+ static_cast<const char*>(managed_allocations_[i]->data_),
+ managed_allocations_[i]->nbytes_);
+
+ offset += managed_allocations_[i]->nbytes_;
+ }
+}
+
+void HexagonBuffer::CopyFrom(void* data, size_t nbytes) {
+ CHECK(nbytes_ == nbytes);
+ size_t offset = 0;
+ for (size_t i = 0; i < nallocs_; ++i) {
+ CHECK(nbytes / nallocs_ == managed_allocations_[i]->nbytes_);
+
+ memcpy(static_cast<char*>(managed_allocations_[i]->data_),
+ static_cast<const char*>(data) + offset, managed_allocations_[i]->nbytes_);
+
+ offset += managed_allocations_[i]->nbytes_;
+ }
+}
+
+void HexagonBuffer::CopyFrom(const HexagonBuffer& other) {
+ CHECK(nbytes_ == other.nbytes_);
+
+ if (nallocs_ == other.nallocs_) {
+ for (size_t i = 0; i < nallocs_; ++i) {
+ CHECK(managed_allocations_[i]->nbytes_ == other.managed_allocations_[i]->nbytes_);
+
+ memcpy(static_cast<char*>(managed_allocations_[i]->data_),
+ static_cast<const char*>(other.managed_allocations_[i]->data_),
+ managed_allocations_[i]->nbytes_);
+ }
+ } else if (nallocs_ == 1) {
+ size_t offset = 0;
+ for (size_t i = 0; i < other.nallocs_; ++i) {
+ CHECK(nbytes_ / other.nallocs_ == other.managed_allocations_[i]->nbytes_);
+
+ memcpy(static_cast<char*>(managed_allocations_[0]->data_) + offset,
+ static_cast<const char*>(other.managed_allocations_[i]->data_),
+ other.managed_allocations_[i]->nbytes_);
+
+ offset += other.managed_allocations_[i]->nbytes_;
+ }
+ } else if (other.nallocs_ == 1) {
+ size_t offset = 0;
+ for (size_t i = 0; i < nallocs_; ++i) {
+ CHECK(other.nbytes_ / nallocs_ == managed_allocations_[i]->nbytes_);
+
+ memcpy(static_cast<char*>(managed_allocations_[i]->data_),
+ static_cast<const char*>(other.managed_allocations_[0]->data_) + offset,
+ managed_allocations_[i]->nbytes_);
+
+ offset += managed_allocations_[i]->nbytes_;
+ }
+ } else {
+ CHECK(false) << "To copy between Hexagon Buffers they must either have the same number of "
+ "dimensions or one of the Hexagon Buffers must have a single dimension.";
}
- return nullptr;
}
} // namespace hexagon
diff --git a/src/runtime/hexagon/hexagon/hexagon_buffer.h b/src/runtime/hexagon/hexagon/hexagon_buffer.h
index c62cee6..139c38e 100644
--- a/src/runtime/hexagon/hexagon/hexagon_buffer.h
+++ b/src/runtime/hexagon/hexagon/hexagon_buffer.h
@@ -26,36 +26,38 @@
#include <tvm/runtime/ndarray.h>
#include <tvm/runtime/packed_func.h>
+#include <memory>
#include <vector>
namespace tvm {
namespace runtime {
namespace hexagon {
+struct Allocation;
+
class HexagonBuffer {
public:
- /* \brief Allocate memory within hexagon accessible memory
- * scopes.
+ /* \brief Allocate 1d (contiguous) memory within Hexagon accessible
+ * memory scopes.
*
- * \param ndim The number of dimensions of physical storage
+ * \param nbytes The number of bytes of physical storage
* to allocate.
*
- * \param shape The shape of the ndarray for which to allocate
- * physical storage.
- *
- * \param dtype The data type of the physical storage.
+ * \param alignment The byte alignment to be used when allocating.
*
* \param scope Optional storage scope indicating the memory
* space in which to allocate. Defaults to global system
* memory (DDR).
*/
- HexagonBuffer(int ndim, const int64_t* shape, DLDataType dtype, Optional<String> scope);
+ HexagonBuffer(size_t nbytes, size_t alignment, Optional<String> scope);
- /* \brief Allocate memory within hexagon accessible memory
- * scopes.
+ /* \brief Allocate 2d (discontiguous) memory within Hexagon accessible
+ * memory scopes.
*
- * \param nbytes The number of bytes of flat physical storage
- * to allocate.
+ * \param nallocs The number of allocations.
+ *
+ * \param nbytes The number of bytes of physical storage
+ * to allocate per allocation.
*
* \param alignment The byte alignment to be used when allocating.
*
@@ -63,17 +65,19 @@ class HexagonBuffer {
* space in which to allocate. Defaults to global system
* memory (DDR).
*/
- HexagonBuffer(size_t nbytes, size_t alignment, Optional<String> scope);
+ HexagonBuffer(size_t nallocs, size_t nbytes, size_t alignment, Optional<String> scope);
- /* \brief Construct a hexagon buffer from externally allocated storage.
+ /* \brief Construct a Hexagon Buffer from an external buffer.
+ *
+ * \param data The pointer to the external buffer.
*
- * \param data The externally allocated storage.
+ * \param nbytes The size of the external buffer in bytes.
*
* \param scope Optional storage scope indicating the memory
- * space in the external allocation belongs. Assumes global system
- * memory if not provided.
+ * space in which to allocate. Defaults to global system
+ * memory (DDR).
*/
- explicit HexagonBuffer(void* data, Optional<String> scope = Optional<String>());
+ explicit HexagonBuffer(void* data, size_t nbytes, Optional<String> scope);
//! \brief Destruction deallocates the underlying allocations.
~HexagonBuffer();
@@ -84,14 +88,14 @@ class HexagonBuffer {
//! \brief Prevent copy assignment with HexagonBuffers.
HexagonBuffer& operator=(const HexagonBuffer&) = delete;
- //! \brief Allow move construction.
- HexagonBuffer(HexagonBuffer&&);
+ //! \brief Prevent move construction.
+ HexagonBuffer(HexagonBuffer&&) = delete;
- //! \brief Allow move assignment.
- HexagonBuffer& operator=(HexagonBuffer&&);
+ //! \brief Prevent move assignment.
+ HexagonBuffer& operator=(HexagonBuffer&&) = delete;
- //! \brief Return pointer to allocation or allocations.
- void* GetPointer();
+ //! \brief Return pointer to allocations.
+ void** GetPointer();
//! \brief Memory scopes managed by a Hexagon Buffer.
enum class StorageScope {
@@ -106,28 +110,49 @@ class HexagonBuffer {
//! \brief Return storage scope of underlying allocation.
StorageScope GetStorageScope() const;
+ /* \brief Copy data from a Hexagon Buffer an external buffer.
+ *
+ * \param data The pointer to the external buffer.
+ *
+ * \param nbytes The number of bytes to copy.
+ */
+ void CopyTo(void* data, size_t nbytes);
+
+ /* \brief Copy data from an external buffer to a Hexagon Buffer.
+ *
+ * \param data The pointer to the external buffer.
+ *
+ * \param nbytes The number of bytes to copy.
+ */
+ void CopyFrom(void* data, size_t nbytes);
+
+ /* \brief Copy data from one Hexagon Buffer to another.
+ *
+ * \param other The other Hexagon Buffer.
+ */
+ void CopyFrom(const HexagonBuffer& other);
+
private:
//! \brief Assign a storage scope to the buffer.
void SetStorageScope(Optional<String> scope);
- /*! \brief Array of allocations required by the buffer.
+ /*! \brief Array of raw pointer allocations required by the buffer.
*
- * For a 1d (flat) storage, a single contiguous allocation will
- * result. For 2d storage, (count, nbytes) = shape, which will
- * result in `count` discrete allocations.
+ * For 1d (contiguous) storage a single allocation will result.
+ * For 2d (discontiguous) storage `nallocs` allocations will result.
*/
std::vector<void*> allocations_;
- /*! \brief Whether the allocation(s) present are managed
- * and should be deallocated upon destruction.
+ /*! \brief Managed allocations which follow RAII and are released
+ * during destruction.
*/
- bool managed_{true};
+ std::vector<std::unique_ptr<Allocation>> managed_allocations_;
/*! \brief The underlying storage type in which the allocation
* resides.
*/
+ size_t nallocs_;
+ size_t nbytes_;
StorageScope storage_scope_;
};
-HexagonBuffer* IsHexagonBuffer(DLTensor* tensor);
-
} // namespace hexagon
} // namespace runtime
} // namespace tvm
diff --git a/src/runtime/hexagon/hexagon/hexagon_common.cc b/src/runtime/hexagon/hexagon/hexagon_common.cc
index bed6f45..6927cd5 100644
--- a/src/runtime/hexagon/hexagon/hexagon_common.cc
+++ b/src/runtime/hexagon/hexagon/hexagon_common.cc
@@ -59,7 +59,9 @@ void HexagonLookupLinkedParam(TVMArgs args, TVMRetValue* rv) {
std::vector<int64_t> shape_vec{template_tensor->shape,
template_tensor->shape + template_tensor->ndim};
- auto* param_buffer = new HexagonBuffer(static_cast<void*>(opaque_handle));
+ Optional<String> scope("global");
+ auto* param_buffer =
+ new HexagonBuffer(static_cast<void*>(opaque_handle), GetDataSize(*template_tensor), scope);
auto* container = new NDArray::Container(static_cast<void*>(param_buffer), shape_vec,
template_tensor->dtype, dev);
container->SetDeleter([](Object* container) {
@@ -85,7 +87,9 @@ PackedFunc WrapPackedFunc(TVMBackendPackedCFunc faddr, const ObjectPtr<Object>&
if (args.type_codes[i] == kTVMDLTensorHandle) {
DLTensor* tensor = static_cast<DLTensor*>(arg_values[i].v_handle);
buffer_args.emplace_back(i, static_cast<HexagonBuffer*>(tensor->data));
- tensor->data = buffer_args.back().second->GetPointer();
+ // Assumes a single contiguous allocation
+ // TODO(Straw): Enable discontiguous allocation after RFC 39 lands
+ tensor->data = buffer_args.back().second->GetPointer()[0];
}
}
int ret = (*faddr)(const_cast<TVMValue*>(args.values), const_cast<int*>(args.type_codes),
diff --git a/src/runtime/hexagon/hexagon/hexagon_common.h b/src/runtime/hexagon/hexagon/hexagon_common.h
index 87d36c9..e1eca72 100644
--- a/src/runtime/hexagon/hexagon/hexagon_common.h
+++ b/src/runtime/hexagon/hexagon/hexagon_common.h
@@ -62,4 +62,6 @@ inline bool IsHexagonDevice(DLDevice dev) {
return TVMDeviceExtType(dev.device_type) == kDLHexagon;
}
+constexpr int kHexagonAllocAlignment = 2048;
+
#endif // TVM_RUNTIME_HEXAGON_HEXAGON_HEXAGON_COMMON_H_
diff --git a/src/runtime/hexagon/hexagon/hexagon_device_api_v2.cc b/src/runtime/hexagon/hexagon/hexagon_device_api_v2.cc
index 5b79f80..4461ac5 100644
--- a/src/runtime/hexagon/hexagon/hexagon_device_api_v2.cc
+++ b/src/runtime/hexagon/hexagon/hexagon_device_api_v2.cc
@@ -51,63 +51,90 @@ void HexagonDeviceAPIv2::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* r
}
}
+// DataSpace: static allocations for Hexagon
void* HexagonDeviceAPIv2::AllocDataSpace(Device dev, int ndim, const int64_t* shape,
DLDataType dtype, Optional<String> mem_scope) {
- return new HexagonBuffer(ndim, shape, dtype, mem_scope.defined() ? mem_scope : String("global"));
+ CHECK(TVMDeviceExtType(dev.device_type) == kDLHexagon);
+
+ // Forcing contiguous allocation, for now
+ // TODO(Straw): Enable discontiguous allocation after RFC 39 lands
+ size_t nallocs = 1;
+ size_t nbytes = 1;
+ for (int i = 0; i < ndim; ++i) {
+ nbytes *= shape[i];
+ }
+ size_t typesize = (dtype.bits / 8) * dtype.lanes;
+ nbytes *= typesize;
+
+ size_t alignment = typesize;
+ if (alignment < kHexagonAllocAlignment) {
+ alignment = kHexagonAllocAlignment;
+ }
+ return new HexagonBuffer(nallocs, nbytes, alignment, mem_scope);
}
void* HexagonDeviceAPIv2::AllocDataSpace(Device dev, size_t nbytes, size_t alignment,
DLDataType type_hint) {
+ if (alignment < kHexagonAllocAlignment) {
+ alignment = kHexagonAllocAlignment;
+ }
return new HexagonBuffer(nbytes, alignment, String("global"));
}
void HexagonDeviceAPIv2::FreeDataSpace(Device dev, void* ptr) {
- auto* pbuf = static_cast<HexagonBuffer*>(ptr);
- delete pbuf;
+ CHECK(TVMDeviceExtType(dev.device_type) == kDLHexagon);
+ auto* hexbuf = static_cast<HexagonBuffer*>(ptr);
+ CHECK(hexbuf != nullptr);
+ delete hexbuf;
}
+// WorkSpace: runtime allocations for Hexagon
struct HexagonWorkspacePool : public WorkspacePool {
HexagonWorkspacePool() : WorkspacePool(kDLCPU, HexagonDeviceAPIv2::Global()) {}
};
void* HexagonDeviceAPIv2::AllocWorkspace(Device dev, size_t size, DLDataType type_hint) {
- auto* buffer = static_cast<HexagonBuffer*>(
+ CHECK(TVMDeviceExtType(dev.device_type) == kDLHexagon);
+ auto* hexbuf = static_cast<HexagonBuffer*>(
dmlc::ThreadLocalStore<HexagonWorkspacePool>::Get()->AllocWorkspace(dev, size));
- void* ptr = buffer->GetPointer();
- workspace_allocations_.insert({ptr, buffer});
+
+ // Assumes a single contiguous allocation
+ // TODO(Straw): Enable discontiguous allocation after RFC 39 lands
+ void* ptr = hexbuf->GetPointer()[0];
+ workspace_allocations_.insert({ptr, hexbuf});
return ptr;
}
void HexagonDeviceAPIv2::FreeWorkspace(Device dev, void* data) {
+ CHECK(TVMDeviceExtType(dev.device_type) == kDLHexagon);
auto it = workspace_allocations_.find(data);
- ICHECK(it != workspace_allocations_.end())
+ CHECK(it != workspace_allocations_.end())
<< "Attempt made to free unknown or already freed workspace allocation";
dmlc::ThreadLocalStore<HexagonWorkspacePool>::Get()->FreeWorkspace(dev, it->second);
workspace_allocations_.erase(it);
}
void HexagonDeviceAPIv2::CopyDataFromTo(DLTensor* from, DLTensor* to, TVMStreamHandle stream) {
- if (IsHexagonDevice(from->device) && IsHexagonDevice(to->device)) {
- HexagonBuffer* buffer_src = static_cast<HexagonBuffer*>(from->data);
- HexagonBuffer* buffer_dst = static_cast<HexagonBuffer*>(to->data);
- // Check storage scopes
- if (buffer_src->GetStorageScope() == HexagonBuffer::StorageScope::kDDR &&
- buffer_dst->GetStorageScope() == HexagonBuffer::StorageScope::kDDR) {
- memcpy(static_cast<char*>(buffer_dst->GetPointer()) + to->byte_offset,
- static_cast<const char*>(buffer_src->GetPointer()) + from->byte_offset,
- GetDataSize(*from));
- } else {
- ICHECK(false) << "Currently only copying between DDR storage is supported.";
- }
- } else if (IsHexagonDevice(from->device) && to->device.device_type == kDLCPU) {
- HexagonBuffer* buffer_src = static_cast<HexagonBuffer*>(from->data);
- memcpy(static_cast<char*>(to->data) + to->byte_offset,
- static_cast<const char*>(buffer_src->GetPointer()) + from->byte_offset,
- GetDataSize(*from));
- } else if (from->device.device_type == kDLCPU && IsHexagonDevice(to->device)) {
- HexagonBuffer* buffer_dst = static_cast<HexagonBuffer*>(to->data);
- memcpy(static_cast<char*>(buffer_dst->GetPointer()) + to->byte_offset,
- static_cast<const char*>(from->data) + from->byte_offset, GetDataSize(*from));
+ CHECK_EQ(from->byte_offset, 0);
+ CHECK_EQ(to->byte_offset, 0);
+ CHECK_EQ(GetDataSize(*from), GetDataSize(*to));
+
+ HexagonBuffer* hex_from_buf = static_cast<HexagonBuffer*>(from->data);
+ HexagonBuffer* hex_to_buf = static_cast<HexagonBuffer*>(to->data);
+
+ if (TVMDeviceExtType(from->device.device_type) == kDLHexagon &&
+ TVMDeviceExtType(to->device.device_type) == kDLHexagon) {
+ CHECK(hex_from_buf != nullptr);
+ CHECK(hex_to_buf != nullptr);
+ hex_to_buf->CopyFrom(*hex_from_buf);
+ } else if (from->device.device_type == kDLCPU &&
+ TVMDeviceExtType(to->device.device_type) == kDLHexagon) {
+ CHECK(hex_to_buf != nullptr);
+ hex_to_buf->CopyFrom(from->data, GetDataSize(*from));
+ } else if (TVMDeviceExtType(from->device.device_type) == kDLHexagon &&
+ to->device.device_type == kDLCPU) {
+ CHECK(hex_from_buf != nullptr);
+ hex_from_buf->CopyTo(to->data, GetDataSize(*to));
} else {
CHECK(false)
<< "Expect copy between DLTensor devices of types kDLHexagon and kDLCPU (external) only.";
diff --git a/tests/python/contrib/test_hexagon/rpc/test_launcher.py b/tests/python/contrib/test_hexagon/rpc/test_launcher.py
index d705541..15dfac6 100644
--- a/tests/python/contrib/test_hexagon/rpc/test_launcher.py
+++ b/tests/python/contrib/test_hexagon/rpc/test_launcher.py
@@ -76,6 +76,54 @@ def test_add(tvm_tracker_host, tvm_tracker_port, android_serial_number):
launcher.close()
+@requires_rpc_tracker
+@requires_hexagon_toolchain
+def test_add_vtcm(tvm_tracker_host, tvm_tracker_port, android_serial_number):
+ dtype = "int8"
+ A = tvm.te.placeholder((2,), dtype=dtype)
+ B = tvm.te.placeholder((1,), dtype=dtype)
+ C = tvm.te.compute(A.shape, lambda i: A[i] + B[0], name="C")
+ sched = tvm.te.create_schedule(C.op)
+
+ target_hexagon = tvm.target.hexagon("v68", link_params=True)
+ func = tvm.build(
+ sched, [A, B, C], tvm.target.Target(target_hexagon, host=target_hexagon), name="add"
+ )
+
+ temp = utils.tempdir()
+ dso_binary = "test_binary.so"
+ dso_binary_path = temp.relpath(dso_binary)
+ func.save(dso_binary_path)
+
+ launcher = HexagonLauncher(serial_number=android_serial_number)
+ launcher.android_run_rpc(rpc_tracker_host=tvm_tracker_host, rpc_tracker_port=tvm_tracker_port)
+ launcher.hexagon_setup()
+ remote_kw = {
+ "host": tvm_tracker_host,
+ "port": tvm_tracker_port,
+ "priority": 0,
+ "timeout": 60,
+ }
+ launcher.hexagon_session_setup(remote_kw)
+ launcher.upload(dso_binary_path, dso_binary)
+
+ with launcher.session as sess:
+ mod = launcher.get_module(dso_binary)
+ A_data = tvm.nd.empty(A.shape, A.dtype, sess.device, "global.vtcm")
+ A_data.copyfrom(np.array([2, 3]))
+
+ B_data = tvm.nd.empty(B.shape, B.dtype, sess.device, "global.vtcm")
+ B_data.copyfrom(np.array([4]))
+
+ C_data = tvm.nd.empty(C.shape, C.dtype, sess.device, "global.vtcm")
+ C_data.copyfrom(np.array([0, 0]))
+
+ mod["add"](A_data, B_data, C_data)
+ result = C_data.numpy()
+ assert (result == np.array([6, 7])).all()
+ launcher.close()
+
+
class TestMatMul:
M = tvm.testing.parameter(32)
N = tvm.testing.parameter(32)