You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by kp...@apache.org on 2022/04/22 15:20:20 UTC

[tvm] branch main updated: [Hexagon] Delete offload runtime, move files to right places (#11090)

This is an automated email from the ASF dual-hosted git repository.

kparzysz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new e0e788b765 [Hexagon] Delete offload runtime, move files to right places (#11090)
e0e788b765 is described below

commit e0e788b765e8e0ccb868a035fabdc1dea846abfd
Author: Krzysztof Parzyszek <kp...@quicinc.com>
AuthorDate: Fri Apr 22 10:20:13 2022 -0500

    [Hexagon] Delete offload runtime, move files to right places (#11090)
    
    Within src/runtime/hexagon
    - delete directory android,
    - move files from hexagon to ., delete hexagon,
    - merge host/hexagon_module.cc with hexagon_module.cc, delete host.
    
    Rename HexagonHostModuleNode to HexagonModuleNode.
---
 CMakeLists.txt                                     |   10 -
 cmake/modules/Hexagon.cmake                        |   10 +-
 src/runtime/hexagon/android/hexagon_device.h       |  135 --
 src/runtime/hexagon/android/hexagon_device_api.cc  |  144 --
 src/runtime/hexagon/android/hexagon_module.cc      |  521 -------
 src/runtime/hexagon/android/hexagon_posix.cc       |   37 -
 .../hexagon/android/sim/driver/CMakeLists.txt      |   72 -
 src/runtime/hexagon/android/sim/driver/README.md   |   38 -
 .../hexagon/android/sim/driver/fake_pthread.cc     |  286 ----
 src/runtime/hexagon/android/sim/driver/pthread.h   |   92 --
 src/runtime/hexagon/android/sim/driver/sched.h     |   31 -
 .../hexagon/android/sim/driver/sim_device.cc       |  560 --------
 .../hexagon/android/sim/hexagon_device_sim.cc      | 1468 --------------------
 .../hexagon/android/sim/hexagon_sim_proto.h        |   73 -
 .../hexagon/android/target/fastrpc/CMakeLists.txt  |  173 ---
 .../hexagon/android/target/fastrpc/README.md       |   56 -
 .../android/target/fastrpc/include/tvm_remote.idl  |   51 -
 .../target/fastrpc/include/tvm_remote_nd.idl       |   49 -
 .../hexagon/android/target/fastrpc/src/tvm_hvx.cc  |  208 ---
 .../hexagon/android/target/fastrpc/src/tvm_hvx.h   |  153 --
 .../android/target/fastrpc/src/tvm_remote_imp.cc   |  244 ----
 .../target/fastrpc/src/tvm_remote_nd_imp.cc        |  325 -----
 .../android/target/fastrpc/src/tvm_wrap_pthread.cc |   76 -
 .../android/target/hexagon_device_target.cc        |  521 -------
 .../hexagon/android/target/hexagon_dsprpcapi.cc    |  100 --
 .../hexagon/android/target/hexagon_dsprpcapi.h     |  192 ---
 .../hexagon/android/target/hexagon_stubapi.cc      |  108 --
 .../hexagon/android/target/hexagon_stubapi.h       |  315 -----
 .../hexagon/android/target/hexagon_target_log.h    |   34 -
 .../hexagon/{hexagon => }/hexagon_buffer.cc        |    0
 src/runtime/hexagon/{hexagon => }/hexagon_buffer.h |    6 +-
 .../hexagon/{hexagon => }/hexagon_common.cc        |    3 +-
 src/runtime/hexagon/{hexagon => }/hexagon_common.h |    6 +-
 .../hexagon/{hexagon => }/hexagon_device_api_v2.cc |    2 +-
 .../hexagon/{hexagon => }/hexagon_device_api_v2.h  |    6 +-
 src/runtime/hexagon/hexagon_module.cc              |   41 +-
 src/runtime/hexagon/hexagon_module.h               |   21 +-
 .../hexagon/{hexagon => }/hexagon_user_dma.cc      |    0
 .../{hexagon => }/hexagon_user_dma_descriptors.h   |    6 +-
 .../{hexagon => }/hexagon_user_dma_instructions.h  |    6 +-
 .../{hexagon => }/hexagon_user_dma_registers.h     |    6 +-
 src/runtime/hexagon/host/hexagon_module.cc         |   49 -
 src/runtime/hexagon/rpc/simulator/rpc_server.cc    |    2 +-
 src/target/llvm/codegen_hexagon.cc                 |   27 +-
 src/target/opt/build_hexagon_off.cc                |    3 +-
 tests/cpp/runtime/hexagon_buffer.cc                |    2 +-
 46 files changed, 57 insertions(+), 6211 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1564a68207..151173ac57 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -320,17 +320,7 @@ tvm_file_glob(GLOB RUNTIME_SRCS
 )
 
 if(BUILD_FOR_HEXAGON)
-  # Add file implementing posix_memalign when building the runtime as
-  # a shared library.
-  # This function is actually defined in the static libc, but when linking
-  # a shared library, libc is not linked into it. Some runtime systems
-  # don't implement posix_runtime, which causes runtime failires.
-  # To avoid this issue, Hexagon runtime contains an implementation of
-  # posix_memalign, but it should only be used with the dynamic TVM
-  # runtime, since it would cause multiple definition errors with the
-  # static one.
   if(NOT BUILD_STATIC_RUNTIME)
-    list(APPEND RUNTIME_SRCS src/runtime/hexagon/android/hexagon_posix.cc)
     # Allow undefined symbols (there will be some from libc).
     set(TVM_NO_UNDEFINED_SYMBOLS "")
   endif()
diff --git a/cmake/modules/Hexagon.cmake b/cmake/modules/Hexagon.cmake
index 2914b0e3b1..3b0ff7dfea 100644
--- a/cmake/modules/Hexagon.cmake
+++ b/cmake/modules/Hexagon.cmake
@@ -84,9 +84,9 @@ if(NOT USE_HEXAGON)
   if(BUILD_FOR_HOST)
     list(APPEND COMPILER_SRCS src/target/opt/build_hexagon_off.cc)
   endif()
-  list(APPEND RUNTIME_SRCS src/runtime/hexagon/hexagon/hexagon_buffer.cc)
-  list(APPEND RUNTIME_SRCS src/runtime/hexagon/hexagon/hexagon_common.cc)
-  list(APPEND RUNTIME_SRCS src/runtime/hexagon/hexagon/hexagon_user_dma.cc)
+  list(APPEND RUNTIME_SRCS src/runtime/hexagon/hexagon_buffer.cc)
+  list(APPEND RUNTIME_SRCS src/runtime/hexagon/hexagon_common.cc)
+  list(APPEND RUNTIME_SRCS src/runtime/hexagon/hexagon_user_dma.cc)
   return()
 endif()
 
@@ -122,9 +122,7 @@ endfunction()
 
 # Common sources for TVM runtime with Hexagon support
 file_glob_append(RUNTIME_HEXAGON_SRCS
-  "${TVMRT_SOURCE_DIR}/hexagon/hexagon_module.cc"
-  "${TVMRT_SOURCE_DIR}/hexagon/hexagon/*.cc"
-  "${TVMRT_SOURCE_DIR}/hexagon/host/*.cc"
+  "${TVMRT_SOURCE_DIR}/hexagon/*.cc"
 )
 
 
diff --git a/src/runtime/hexagon/android/hexagon_device.h b/src/runtime/hexagon/android/hexagon_device.h
deleted file mode 100644
index 552b8f9713..0000000000
--- a/src/runtime/hexagon/android/hexagon_device.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifndef TVM_RUNTIME_HEXAGON_ANDROID_HEXAGON_DEVICE_H_
-#define TVM_RUNTIME_HEXAGON_ANDROID_HEXAGON_DEVICE_H_
-
-#include <tvm/runtime/logging.h>
-#include <tvm/runtime/module.h>
-
-#include <memory>
-#include <string>
-
-#include "../../meta_data.h"
-
-namespace tvm {
-namespace runtime {
-namespace hexagon {
-
-/*!
- * \brief Low-level interface for communicating with Hexagon devices.
- */
-class Device {
- public:
-  /*!
-   * \brief Allocate memory on device.
-   * \param size    Requested size.
-   * \param align   Requested alignment.
-   * \return        Pointer (local to the device) of the allocated memory,
-   *                or nullptr if allocation failed.
-   */
-  virtual void* Alloc(unsigned size, unsigned align) = 0;
-  /*!
-   * \brief Release allocated memory on device.
-   * \param ptr     Pointer to memory previously allocated by \ref Alloc.
-   */
-  virtual void Free(void* ptr) = 0;
-  /*!
-   * \brief Allocate VTCM memory on device.
-   * \param size    Requested size.
-   * \param align   Requested alignment.
-   * \return        Pointer (local to the device) of the allocated memory,
-   *                or nullptr if allocation failed.
-   */
-  virtual void* AllocVtcm(unsigned size, unsigned align) = 0;
-  /*!
-   * \brief Release allocated VTCM memory on device.
-   * \param ptr     Pointer to memory previously allocated by \ref AllocVtcm.
-   */
-  virtual void FreeVtcm(void* ptr) = 0;
-  /*!
-   * \brief Copy a block of data on device to another location on the device.
-   * \param dst     Pointer (local to device) to the destination buffer.
-   * \param src     Pointer (local to device) of the source buffer.
-   * \param len     Number of bytes to copy.
-   */
-  virtual void CopyDeviceToDevice(void* dst, const void* src, unsigned len) = 0;
-  /*!
-   * \brief Copy a block of data from device to host.
-   * \param host_dst  Pointer (local to host) to the destination buffer.
-   * \param src       Pointer (local to device) to the source buffer.
-   * \param len       Number of bytes to copy.
-   */
-  virtual void CopyDeviceToHost(void* host_dst, const void* src, unsigned len) = 0;
-  /*!
-   * \brief Copy a block of data from host to device.
-   * \param dst       Pointer (local to device) to the destination buffer.
-   * \param host_src  Pointer (local to host) to the source buffer.
-   * \param len       Number of bytes to copy.
-   */
-  virtual void CopyHostToDevice(void* dst, const void* host_src, unsigned len) = 0;
-  /*!
-   * \brief Load a module (typically a shared library) into device.
-   * \param data    Name of the shared library.
-   * \param fmt     Format of the library (currently ignored).
-   * \return        Pointer to the loaded module.
-   * \note Currently only one module can be loaded at any given time.
-   */
-  virtual void* Load(const std::string& data, const std::string& fmt) = 0;
-  /*!
-   * \brief Unload a module from device.
-   * \param mod     Pointer to a loaded module returned by \ref Load.
-   */
-  virtual void Unload(void* mod) = 0;
-  /*!
-   * \brief Find the address of an object in the currently loaded module.
-   * \param sym     Name of the object.
-   * \return Address of the located object, or nullptr if object was
-   *         not found.
-   */
-  virtual void* Resolve(const std::string& sym) = 0;
-  /*!
-   * \brief Invoke a function on device with given arguments.
-   * \param func    Address (local to device) of the function to call.
-   * \param scalar  Pointer to an array of 32-bit values that will be
-   *                passed via consecutive registers: r0..r5. This array
-   *                includes dummy values for skipped registers.
-   * \param sc_num  Number of values in the "scalar" array.
-   * \param stack   Pointer to an array of 32-bit values that will be
-   *                passed on the stack. This array includes dummy values
-   *                for padding.
-   * \param st_num  Number of values in the "stack" array.
-   */
-  virtual void Call(void* func, uint32_t* scalar, unsigned sc_num, uint32_t* stack,
-                    unsigned st_num) = 0;
-
-  virtual ~Device() = 0;
-
-  static std::shared_ptr<Device> Global();
-  static bool ValidateDeviceId(decltype(DLDevice::device_id) device_id) {
-    // Only supporting a single device for now.
-    return device_id == 0;
-  }
-};
-
-}  // namespace hexagon
-
-}  // namespace runtime
-}  // namespace tvm
-#endif  // TVM_RUNTIME_HEXAGON_ANDROID_HEXAGON_DEVICE_H_
diff --git a/src/runtime/hexagon/android/hexagon_device_api.cc b/src/runtime/hexagon/android/hexagon_device_api.cc
deleted file mode 100644
index f80c7e245a..0000000000
--- a/src/runtime/hexagon/android/hexagon_device_api.cc
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include <tvm/runtime/device_api.h>
-#include <tvm/runtime/logging.h>
-#include <tvm/runtime/registry.h>
-
-#include <algorithm>
-#include <cstring>
-
-#include "hexagon_device.h"
-
-namespace tvm {
-namespace runtime {
-
-class HexagonDeviceAPI : public DeviceAPI {
- public:
-  void SetDevice(Device dev) final;
-  void GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv) final;
-  void* AllocDataSpace(Device dev, size_t nbytes, size_t alignment, DLDataType type_hint) final;
-  void FreeDataSpace(Device dev, void* ptr) final;
-  void StreamSync(Device dev, TVMStreamHandle stream) final;
-  void* AllocWorkspace(Device dev, size_t nbytes, DLDataType type_hint = {}) final;
-  void FreeWorkspace(Device dev, void* ptr) final;
-
-  static HexagonDeviceAPI* Global() {
-    // NOTE: explicitly use new to avoid destruction of global state
-    // Global state will be recycled by OS as the process exits.
-    static HexagonDeviceAPI* inst = new HexagonDeviceAPI();
-    return inst;
-  }
-
- protected:
-  void CopyDataFromTo(const void* from, size_t from_offset, void* to, size_t to_offset,
-                      size_t num_bytes, Device dev_from, Device dev_to, DLDataType type_hint,
-                      TVMStreamHandle stream) final;
-};
-
-// HexagonDeviceAPI.
-
-inline void HexagonDeviceAPI::SetDevice(Device dev) {}
-
-inline void HexagonDeviceAPI::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv) {
-  if (kind == kExist) *rv = 1;
-}
-
-inline void* HexagonDeviceAPI::AllocDataSpace(Device dev, size_t nbytes, size_t alignment,
-                                              DLDataType type_hint) {
-  ICHECK(hexagon::Device::ValidateDeviceId(dev.device_id));
-  return hexagon::Device::Global()->Alloc(nbytes, alignment);
-}
-
-inline void HexagonDeviceAPI::FreeDataSpace(Device dev, void* ptr) {
-  ICHECK(hexagon::Device::ValidateDeviceId(dev.device_id));
-  hexagon::Device::Global()->Free(ptr);
-}
-
-inline void HexagonDeviceAPI::CopyDataFromTo(const void* from, size_t from_offset, void* to,
-                                             size_t to_offset, size_t num_bytes, Device dev_from,
-                                             Device dev_to, DLDataType type_hint,
-                                             TVMStreamHandle stream) {
-  const char* src = static_cast<const char*>(from) + from_offset;
-  char* dst = static_cast<char*>(to) + to_offset;
-
-  auto Is32bit = [](const void* p) {
-    return p == reinterpret_cast<const void*>(uint32_t(uintptr_t(p)));
-  };
-  (void)Is32bit;
-
-  if (dev_from.device_type == dev_to.device_type) {
-    if (dev_from.device_type == kDLCPU) {
-      memmove(dst, src, num_bytes);
-    } else if (static_cast<int>(dev_from.device_type) == kDLHexagon) {
-      ICHECK(hexagon::Device::ValidateDeviceId(dev_from.device_id));
-      ICHECK_EQ(dev_from.device_id, dev_to.device_id);
-      ICHECK(Is32bit(dst) && Is32bit(src));
-      hexagon::Device::Global()->CopyDeviceToDevice(dst, src, num_bytes);
-    }
-  } else {
-    if (dev_from.device_type == kDLCPU) {
-      ICHECK_EQ(static_cast<int>(dev_to.device_type), kDLHexagon);
-      ICHECK(Is32bit(dst));
-      ICHECK(hexagon::Device::ValidateDeviceId(dev_to.device_id));
-      hexagon::Device::Global()->CopyHostToDevice(dst, src, num_bytes);
-    } else {
-      ICHECK_EQ(static_cast<int>(dev_from.device_type), kDLHexagon);
-      ICHECK_EQ(dev_to.device_type, kDLCPU);
-      ICHECK(Is32bit(src));
-      ICHECK(hexagon::Device::ValidateDeviceId(dev_from.device_id));
-      hexagon::Device::Global()->CopyDeviceToHost(dst, src, num_bytes);
-    }
-  }
-}
-
-inline void HexagonDeviceAPI::StreamSync(Device dev, TVMStreamHandle stream) {}
-
-inline void* HexagonDeviceAPI::AllocWorkspace(Device dev, size_t nbytes, DLDataType type_hint) {
-  ICHECK(hexagon::Device::ValidateDeviceId(dev.device_id));
-  if (type_hint.code == 100) {
-    size_t align = std::min(nbytes, 2048lu);
-    return hexagon::Device::Global()->AllocVtcm(nbytes, align);
-  }
-  return DeviceAPI::AllocWorkspace(dev, nbytes, type_hint);
-}
-
-inline void HexagonDeviceAPI::FreeWorkspace(Device dev, void* ptr) {
-  ICHECK(hexagon::Device::ValidateDeviceId(dev.device_id));
-  DeviceAPI::FreeWorkspace(dev, ptr);
-}
-
-TVM_REGISTER_GLOBAL("device_api.hexagon.v1").set_body([](TVMArgs args, TVMRetValue* rv) {
-  DeviceAPI* ptr = HexagonDeviceAPI::Global();
-  *rv = ptr;
-});
-}  // namespace runtime
-}  // namespace tvm
-
-// Hexagon-specific runtime functions to allocate/deallocate workspaces
-// in VTCM.
-extern "C" {
-void* HexagonBackendAllocateVTCM(uint32_t nbytes, uint32_t align) {
-  align = std::max(align, 2048u);
-  return tvm::runtime::hexagon::Device::Global()->AllocVtcm(nbytes, align);
-}
-void HexagonBackendFreeVTCM(void* ptr) {
-  return tvm::runtime::hexagon::Device::Global()->FreeVtcm(ptr);
-}
-}
diff --git a/src/runtime/hexagon/android/hexagon_module.cc b/src/runtime/hexagon/android/hexagon_module.cc
deleted file mode 100644
index b8af3698ab..0000000000
--- a/src/runtime/hexagon/android/hexagon_module.cc
+++ /dev/null
@@ -1,521 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include "../hexagon_module.h"
-
-#ifdef __ANDROID__
-#include <android/log.h>
-#endif
-#include <tvm/runtime/logging.h>
-#include <tvm/runtime/registry.h>
-
-#include <memory>
-#include <set>
-#include <string>
-#include <unordered_map>
-#include <vector>
-
-#include "../../file_utils.h"
-#include "hexagon_device.h"
-
-namespace tvm {
-namespace runtime {
-
-hexagon::Device::~Device() {}
-
-namespace hexagon {
-
-/*!
- * \brief Function argument locations according to the Hexagon ABI.
- *
- * In order to invoke a function whose arguments are in TVMArgs list, at
- * some point before branching to the function's address, these arguments
- * need to be loaded into locations (registers or stack) specified by the
- * corresponding ABI.
- * When a host wants to call a function on Hexagon, the host will identify
- * how each element of the TVMArgs list will be passed to the Hexagon
- * function. This class is a description of which values should go into
- * registers, and which values should be on stack. Right before the call
- * this class will be serialized and transfereed over to the Hexagon side.
- * The code running on Hexagon will then execute the argument placement
- * and invoke the function.
- */
-struct ArgLayout {
-  std::vector<uint32_t> Scalar; /*!< Values going into registers, maximum  */
-                                /*!< 6, including dummy values for skipped */
-                                /*!< registers.                            */
-  std::vector<uint32_t> Stack;  /*!< Values going on stack, including      */
-                                /*!< dummy values for padding.             */
-  // There are no vector types at this time.
-
-  /*!
-   * \brief Alignment of type T on Hexagon.
-   */
-  template <typename T>
-  static constexpr unsigned align_of();
-  /*!
-   * \brief Size of type T on Hexagon.
-   */
-  template <typename T>
-  static constexpr unsigned size_of();
-
-  /*!
-   * \brief Add a value of type T to the layout.
-   */
-  template <typename T>
-  void Push(const T& v);
-
- private:
-  /*!
-   * \brief Add raw data to the layout.
-   * \param v         Pointer to the raw data as an array of 32-bit words.
-   * \param t_size    Number of bytes to add.
-   * \param t_align   Required alignment of the data on Hexagon.
-   */
-  void Push(uint32_t* v, unsigned t_size, unsigned t_align);
-};
-
-template <>
-constexpr unsigned ArgLayout::align_of<int32_t>() {
-  return 4;
-}
-template <>
-constexpr unsigned ArgLayout::align_of<uint32_t>() {
-  return 4;
-}
-template <>
-constexpr unsigned ArgLayout::align_of<float>() {
-  return 4;
-}
-template <>
-constexpr unsigned ArgLayout::align_of<void*>() {
-  return 4;
-}
-template <>
-constexpr unsigned ArgLayout::align_of<int64_t>() {
-  return 8;
-}
-template <>
-constexpr unsigned ArgLayout::align_of<uint64_t>() {
-  return 8;
-}
-template <>
-constexpr unsigned ArgLayout::align_of<double>() {
-  return 8;
-}
-template <>
-constexpr unsigned ArgLayout::align_of<DLTensor*>() {
-  return 4;
-}
-
-template <typename T>
-constexpr unsigned ArgLayout::align_of() {
-  // The static_assertion should depend on T so that it's only checked
-  // after instantiation.
-  static_assert((sizeof(T), false), "Implement align_of for this type");
-  return 0;
-}
-
-template <typename T>
-constexpr unsigned ArgLayout::size_of() {
-  return ArgLayout::align_of<T>();
-}
-
-template <typename T>
-void ArgLayout::Push(const T& v) {
-  static_assert(std::is_scalar<T>::value, "T must be a scalar");
-  constexpr unsigned T_size = size_of<T>();
-  // The reason for this assertion is to avoid sign-extensions here:
-  // an extra bit of information would be required to determine whether
-  // a size- or a zero-extension is needed.
-  static_assert(T_size >= 4, "Type should be of size that is at least 4");
-  union {
-    uint32_t v[(T_size + 3) / 4];
-    T t;
-  } u;
-
-  u.t = v;
-  Push(u.v, T_size, align_of<T>());
-}
-
-void ArgLayout::Push(uint32_t* v, unsigned t_size, unsigned t_align) {
-  // t_size == 4 and t_size == 8 can be passed in scalar registers.
-  bool InReg = false;
-  if (t_size == 4) {
-    if (Scalar.size() < 6) {
-      Scalar.push_back(v[0]);
-      InReg = true;
-    }
-  } else if (t_size == 8) {
-    // Round the size up to the next
-    unsigned cs = Scalar.size();
-    if (cs <= 4) {
-      // There is room in the scalar registers.
-      if (cs & 1) Scalar.push_back(0u);
-      Scalar.push_back(v[0]);
-      Scalar.push_back(v[1]);
-      InReg = true;
-    }
-  }
-
-  if (!InReg) {
-    // Allocate on stack.
-    ICHECK_EQ((t_align & (t_align - 1)), 0) << "Alignment should be a power of 2";
-    ICHECK_GE(t_align, 4) << "Alignment should be at least 4";
-    // Round t_size up to a multiple of 4.
-    unsigned s_size = Stack.size();
-    unsigned s_align = t_align / 4;  // Alignment of T in words on the stack.
-    unsigned pad = ((s_size + s_align - 1) / s_align) * s_align - s_size;
-    Stack.insert(Stack.end(), pad / 4, 0u);
-    Stack.insert(Stack.end(), v, v + t_size / 4);
-  }
-}
-
-}  // namespace hexagon
-
-class HexagonModuleNode final : public runtime::HexagonHostModuleNode {
- public:
-  HexagonModuleNode(std::string data, std::string fmt,
-                    std::unordered_map<std::string, FunctionInfo> fmap, std::string asm_str,
-                    std::string obj_str, std::string ir_str, std::string bc_str,
-                    const std::set<std::string>& packed_c_abi)
-      : HexagonHostModuleNode(data, fmt, fmap, asm_str, obj_str, ir_str, bc_str, packed_c_abi),
-        hexagon_device_(),
-        dl_handle_(nullptr) {}
-
-  virtual ~HexagonModuleNode() {
-    if (dl_handle_) {
-      hexagon_device_->Unload(dl_handle_);
-    }
-  }
-
-  PackedFunc GetFunction(const std::string& name, const ObjectPtr<Object>& sptr_to_self) final;
-  std::string GetSource(const std::string& format) final;
-
- private:
-  void CallRemotePackedCABI(void* func_ptr, const TVMArgs& args, TVMRetValue* rv) const;
-  void CallRemoteDirect(void* func_ptr, const TVMArgs& args, TVMRetValue* rv) const;
-  void RemapArgs(const TVMArgs& args,
-                 std::vector<TVMValue>& values,              // NOLINT(*)
-                 std::vector<int>& type_codes,               // NOLINT(*)
-                 std::vector<void*>& remote_tensors) const;  // NOLINT(*)
-  void* CreateRemoteTensor(const DLTensor* T) const;
-  hexagon::ArgLayout BuildArgLayout(const TVMArgs& Aa) const;
-
-  std::shared_ptr<hexagon::Device> hexagon_device_;
-  void* dl_handle_ = nullptr;
-};
-
-void HexagonModuleNode::CallRemotePackedCABI(void* func_ptr, const TVMArgs& args,
-                                             TVMRetValue* rv) const {
-  // Remap all arguments, creating remote DLTensors.
-  std::vector<TVMValue> values;
-  std::vector<int> codes;
-  std::vector<void*> remote_tensors;
-
-  RemapArgs(args, values, codes, remote_tensors);
-  // The prototype of packed C function is
-  //   int (TVMValue* args, int* type_codes, int num_args,
-  //        TVMValue* ret_value, int* ret_code)
-  // The pointers must point to allocated space, the return information
-  // will be filled in by the callee.
-  // Allocate remote buffer to hold:
-  // 1. argument TVMValues,
-  // 2. return TVMValue,
-  // 3. argument type codes,
-  // 4. return type code.
-
-  int num_args = args.size();
-  int values_size = num_args * sizeof(TVMValue);
-  int codes_size = num_args * sizeof(int);
-  void* remote =
-      hexagon_device_->Alloc(values_size + sizeof(TVMValue) + codes_size + sizeof(int), 8);
-
-  // Copy all argument TVMValues to the remote space.
-  void* remote_values = remote;
-  void* remote_ret_value = static_cast<char*>(remote_values) + values_size;
-  void* remote_codes = static_cast<char*>(remote_ret_value) + sizeof(TVMValue);
-  void* remote_ret_code = static_cast<char*>(remote_codes) + codes_size;
-  hexagon_device_->CopyHostToDevice(remote_values, values.data(), values_size);
-  hexagon_device_->CopyHostToDevice(remote_codes, codes.data(), codes_size);
-
-  // Call the function: construct temporary values/codes and pass them through
-  // the arg layout building to preprare for the actual remote call.
-  TVMValue temp_values[5];
-  temp_values[0].v_handle = remote_values;
-  temp_values[1].v_handle = remote_codes;
-  temp_values[2].v_int64 = num_args;
-  temp_values[3].v_handle = remote_ret_value;
-  temp_values[4].v_handle = remote_ret_code;
-  int temp_codes[5] = {kTVMOpaqueHandle, kTVMOpaqueHandle, kDLInt, kTVMOpaqueHandle,
-                       kTVMOpaqueHandle};
-  TVMArgs temp_args(temp_values, temp_codes, 5);
-  hexagon::ArgLayout as = BuildArgLayout(temp_args);
-  hexagon_device_->Call(func_ptr, as.Scalar.data(), as.Scalar.size(), as.Stack.data(),
-                        as.Stack.size());
-
-  // TODO(kparzysz-quic): copy return value back
-  std::for_each(remote_tensors.begin(), remote_tensors.end(),
-                [this](void* t) { hexagon_device_->Free(t); });
-  hexagon_device_->Free(remote);
-}
-
-void HexagonModuleNode::CallRemoteDirect(void* func_ptr, const TVMArgs& args,
-                                         TVMRetValue* rv) const {
-  hexagon::ArgLayout as = BuildArgLayout(args);
-  hexagon_device_->Call(func_ptr, as.Scalar.data(), as.Scalar.size(), as.Stack.data(),
-                        as.Stack.size());
-}
-
-PackedFunc HexagonModuleNode::GetFunction(const std::string& name,
-                                          const ObjectPtr<Object>& sptr_to_self) {
-  auto f = fmap_.find(name);
-  if (f == fmap_.end()) return PackedFunc(nullptr);
-
-  if (!hexagon_device_) hexagon_device_ = hexagon::Device::Global();
-  if (!dl_handle_) dl_handle_ = hexagon_device_->Load(data_, fmt_);
-
-  // Get function pointer from device.
-  void* pf = hexagon_device_->Resolve(name);
-  // The cast result and the original share ownership. Do the cast here
-  // so that sptr_to_self can be destroyed (i.e. "func" will only have
-  // one shared pointer to HexagonModuleNode).
-  auto sref = ObjectRef(sptr_to_self);
-
-  if (packed_c_abi_funcs_.count(name)) {
-    // Calling packed C func, follow the TVMBackendPackedCFunc prototype.
-    return PackedFunc([pf, sref](TVMArgs args, TVMRetValue* rv) {
-      const auto* hm = sref.as<HexagonModuleNode>();
-      hm->CallRemotePackedCABI(pf, args, rv);
-    });
-  } else {
-    // Direct call to a non-packed-C function.
-    return PackedFunc([pf, sref](TVMArgs args, TVMRetValue* rv) {
-      const auto* hm = sref.as<HexagonModuleNode>();
-      hm->CallRemoteDirect(pf, args, rv);
-    });
-  }
-}
-
-std::string HexagonModuleNode::GetSource(const std::string& format) {
-  if (format == "s" || format == "asm") {
-    return asm_;
-  }
-  if (format == "ll") {
-    return ir_;
-  }
-  return "";
-}
-
-void HexagonModuleNode::RemapArgs(const TVMArgs& args, std::vector<TVMValue>& values,
-                                  std::vector<int>& type_codes,
-                                  std::vector<void*>& remote_tensors) const {
-  for (unsigned i = 0, e = args.size(); i != e; ++i) {
-    const TVMArgValue& a = args[i];
-
-    switch (unsigned tc = a.type_code()) {
-      case kTVMNDArrayHandle:
-      case kTVMDLTensorHandle: {
-        DLTensor* t = static_cast<DLTensor*>(a);
-        ICHECK(TVMDeviceExtType(t->device.device_type) == kDLHexagon);
-        TVMValue v;
-        v.v_handle = CreateRemoteTensor(t);
-        remote_tensors.push_back(v.v_handle);
-        values.push_back(v);
-        type_codes.push_back(tc);
-        break;
-      }
-
-      default:
-        values.push_back(a.value());
-        type_codes.push_back(tc);
-        break;
-    }
-  }
-}
-
-void* HexagonModuleNode::CreateRemoteTensor(const DLTensor* t) const {
-  /*
-    Layout of the DLTensor structure on Hexagon.
-
-    DLTensor:                       Size  offset
-      data               void*          4       0
-      device.device_type enum           1       4
-      <pad>                             3       5
-      device.device_id   int            4       8
-      ndim               int            4      12
-      dtype.code         uint8_t        1      16
-      dtype.bits         uint8_t        1      17
-      dtype.lanes        uint16_t       2      18
-      shape              int64_t*       4      20
-      strides            int64_t*       4      24
-      <pad>                             4      28
-      byte_offset        uint64_t       8      32
-      .. end ................................ 40
-  */
-  struct __attribute__((packed)) HexagonDLTensor {
-    uint32_t data;
-    uint8_t device_type;
-    uint8_t pad0[3];  // MUST BE ZERO!
-    int32_t device_id;
-    int32_t ndim;
-    uint8_t dtype_code;
-    uint8_t dtype_bits;
-    uint16_t dtype_lanes;
-    uint32_t shape;
-    uint32_t strides;
-    uint8_t pad1[4];
-    uint64_t byte_offset;
-  };
-
-  constexpr uint32_t size_ht = sizeof(HexagonDLTensor);
-  static_assert(size_ht == 40, "HexagonDLTensor should be 40 bytes");
-
-  // Shape and strides will contain ndim elements of size sizeof(uint64_t)
-  // each. Allocate them after the main structure.
-  int ndim = t->ndim;
-  uint32_t size_s = 8 * ndim;  // sizeof(uint64_t)*ndim
-  uint32_t size_ss = t->strides ? 2 * size_s : size_s;
-  void* remote = hexagon_device_->Alloc(size_ht + size_ss, 8);
-  uint32_t remote_as_int = reinterpret_cast<uintptr_t>(remote);
-  void* remote_ss = reinterpret_cast<void*>(remote_as_int + size_ht);
-
-  HexagonDLTensor local;
-  local.data = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(t->data));
-  local.device_type = uint8_t(t->device.device_type);
-  local.pad0[0] = local.pad0[1] = local.pad0[2] = 0;
-  local.device_id = t->device.device_id;
-  local.ndim = t->ndim;
-  local.dtype_code = t->dtype.code;
-  local.dtype_bits = t->dtype.bits;
-  local.dtype_lanes = t->dtype.lanes;
-  local.shape = remote_as_int + size_ht;
-  local.strides = t->strides ? remote_as_int + size_ht + size_s : 0u;
-  local.byte_offset = t->byte_offset;
-
-  std::vector<uint64_t> local_ss(size_ss / 8);
-  for (int i = 0; i != ndim; ++i) local_ss[i] = t->shape[i];
-  if (t->strides) {
-    for (int i = 0; i != ndim; ++i) local_ss[ndim + i] = t->strides[i];
-  }
-
-  hexagon_device_->CopyHostToDevice(remote, &local, sizeof local);
-  hexagon_device_->CopyHostToDevice(remote_ss, local_ss.data(), size_ss);
-  return remote;
-}
-
-hexagon::ArgLayout HexagonModuleNode::BuildArgLayout(const TVMArgs& As) const {
-  hexagon::ArgLayout Args;
-
-  for (unsigned i = 0, e = As.size(); i != e; ++i) {
-    const TVMArgValue& A = As[i];
-    unsigned TC = A.type_code();
-    switch (TC) {
-      // Treat all integers as 32-bit values.
-      case kDLInt:
-      case kDLUInt:
-        // KLUDGE: There is no distinction between 32- and 64-bit integer
-        // types, so there is no way to tell if the value being passed needs
-        // one or two registers. Assume that all integers are 32-bit, and
-        // simply abort if the actual value does not fit.
-        ICHECK_EQ(static_cast<int64_t>(A), static_cast<int32_t>(A));
-        Args.Push(static_cast<int>(A));
-        break;
-      // As above, treat floating point values as float32.
-      case kDLFloat:
-        ICHECK_EQ(static_cast<double>(A), static_cast<float>(static_cast<double>(A)));
-        Args.Push(static_cast<float>(static_cast<double>(A)));
-        break;
-
-      case kTVMOpaqueHandle:
-      case kTVMNullptr:
-      case kTVMObjectHandle:
-      case kTVMModuleHandle:
-      case kTVMPackedFuncHandle:
-        Args.Push(static_cast<void*>(A));
-        break;
-
-      case kTVMNDArrayHandle:
-      case kTVMDLTensorHandle:
-        LOG(FATAL) << __func__ << ": cannot handle DLTensor*, code:" << TC;
-
-      default:
-        LOG(FATAL) << __func__ << ": unhandled type code" << TC;
-        break;
-    }
-  }
-
-  return Args;
-}
-
-Module HexagonModuleCreate(std::string data, std::string fmt,
-                           std::unordered_map<std::string, FunctionInfo> fmap, std::string asm_str,
-                           std::string obj_str, std::string ir_str, std::string bc_str,
-                           const std::set<std::string>& packed_c_abi) {
-  auto n = make_object<HexagonModuleNode>(data, fmt, fmap, asm_str, obj_str, ir_str, bc_str,
-                                          packed_c_abi);
-  return Module(n);
-}
-
-// Load module from file.
-Module HexagonModuleLoadFile(const std::string& file_name, const std::string& format) {
-  std::string data = file_name;
-  std::unordered_map<std::string, FunctionInfo> fmap;
-  std::string fmt = GetFileFormat(file_name, format);
-  std::string meta_file = GetMetaFilePath(file_name);
-  LoadMetaDataFromFile(meta_file, &fmap);
-
-  std::string empty;
-  // This passes {} as the set of packed C functions. Won't work for
-  // standalone functions on target.
-  return HexagonModuleCreate(data, fmt, fmap, empty, empty, empty, empty, {});
-}
-
-namespace hexagon {
-
-std::shared_ptr<Device> Device::Global() {
-  // Declare device constructors.
-#ifdef __ANDROID__
-  std::shared_ptr<Device> CreateHexagonTarget(void);
-#else
-  std::shared_ptr<Device> CreateHexagonSimulator(void);
-#endif
-
-  static std::shared_ptr<Device> dev(
-#ifdef __ANDROID__
-      CreateHexagonTarget()
-#else
-      CreateHexagonSimulator()
-#endif
-  );  // NOLINT
-
-  return dev;
-}
-
-}  // namespace hexagon
-
-// Disable this: it conflicts with loadfile_hexagon from hexagon_common.cc
-// This was only used with offload on Android, which is being deprecated.
-// TVM_REGISTER_GLOBAL("runtime.module.loadfile_hexagon").set_body([](TVMArgs args, TVMRetValue* rv)
-// {
-//   *rv = HexagonModuleLoadFile(args[0], args[1]);
-// });
-
-}  // namespace runtime
-}  // namespace tvm
diff --git a/src/runtime/hexagon/android/hexagon_posix.cc b/src/runtime/hexagon/android/hexagon_posix.cc
deleted file mode 100644
index e98fefd1da..0000000000
--- a/src/runtime/hexagon/android/hexagon_posix.cc
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#if defined(__hexagon__)
-
-#include <errno.h>
-#include <stdlib.h>
-
-extern "C" {
-int posix_memalign(void** memptr, size_t alignment, size_t size) __attribute__((nothrow));
-}
-
-__attribute__((nothrow)) int posix_memalign(void** memptr, size_t alignment, size_t size) {
-  if (void* p = memalign(alignment, size)) {
-    *memptr = p;
-    return 0;
-  }
-
-  return ENOMEM;
-}
-#endif
diff --git a/src/runtime/hexagon/android/sim/driver/CMakeLists.txt b/src/runtime/hexagon/android/sim/driver/CMakeLists.txt
deleted file mode 100644
index 75f185997a..0000000000
--- a/src/runtime/hexagon/android/sim/driver/CMakeLists.txt
+++ /dev/null
@@ -1,72 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-project(SIM_DEV C CXX)
-cmake_minimum_required(VERSION 3.0.2)
-
-set(CMAKE_SYSTEM_NAME "Linux")
-
-if(EXISTS ${CMAKE_CURRENT_BINARY_DIR}/config.cmake)
-  include(${CMAKE_CURRENT_BINARY_DIR}/config.cmake)
-endif()
-
-include(../../../../../../cmake/utils/Utils.cmake)
-
-if("${HEXAGON_ARCH}" STREQUAL "")
-  set(DEFAULT_HEXAGON_ARCH "v66")
-  message(STATUS "HEXAGON_ARCH not defined, defaulting to ${DEFAULT_HEXAGON_ARCH}")
-  set(HEXAGON_ARCH "${DEFAULT_HEXAGON_ARCH}")
-endif()
-
-set(EXTRA_CXX_FLAGS
-  "-O2"
-  "-Wno-format"
-  "-mhvx -mhvx-length=128b"
-  "-m${HEXAGON_ARCH}"
-  "-stdlib=libc++"
-)
-
-set(EXTRA_LINK_FLAGS
-  "-stdlib=libc++"
-  "-G0"
-  "-Wl,--force-dynamic"
-  "-Wl,--export-dynamic"
-  "-Wl,--whole-archive"   # This should link entire libc, libc++ and libc+abi.
-  "-Wl,--defsym=HEAP_SIZE=0x40000000"
-)
-
-string(REGEX REPLACE ";" " " EXTRA_CXX_FLAGS_STR "${EXTRA_CXX_FLAGS}")
-string(REGEX REPLACE ";" " " EXTRA_LINK_FLAGS_STR "${EXTRA_LINK_FLAGS}")
-
-set(CMAKE_CXX_STANDARD 11)
-set(CMAKE_CXX_FLAGS "${EXTRA_CXX_FLAGS_STR} ${CMAKE_CXX_FLAGS}")
-set(CMAKE_EXE_LINKER_FLAGS "${EXTRA_LINK_FLAGS_STR} ${CMAKE_EXE_LINKER_FLAGS}")
-
-# Set project properties.
-
-tvm_file_glob(GLOB SOURCE_FILES "*.cc")
-add_executable(sim_dev ${SOURCE_FILES})
-target_include_directories(sim_dev
-  PUBLIC "."
-  PUBLIC ".."
-  PUBLIC "../../../../../../include"
-)
-target_include_directories(sim_dev SYSTEM
-  PUBLIC "../../../../../../3rdparty/dlpack/include"
-)
-
-target_link_libraries(sim_dev "-ldl")
diff --git a/src/runtime/hexagon/android/sim/driver/README.md b/src/runtime/hexagon/android/sim/driver/README.md
deleted file mode 100644
index 3aee1a14b7..0000000000
--- a/src/runtime/hexagon/android/sim/driver/README.md
+++ /dev/null
@@ -1,38 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-# Hexagon simulator driver
-
-The driver (`sim_dev` executable) is the process running on the Hexagon simulator that handles the Hexagon-side communication with the TVM runtime running on x86. The location of `sim_dev` should be added to `PATH` before running any python code that uses Hexagon. The `sim_dev` executable is not intended to be run by users, it is automatically loaded by the simulator control code (in `hexagon_device_sim.cc`).
-
-### Prerequisites
-
-1. Hexagon C/C++ toolchain (such as the one in Hexagon SDK version 3.5.0 or later).
-
-Hexagon SDK is available at //developer.qualcomm.com/software/hexagon-dsp-sdk.
-
-### Configuring
-
-Set
-```
-CMAKE_C_COMPILER=hexagon-clang
-CMAKE_CXX_COMPILER=hexagon-clang++
-```
-
-### Building
-
-There are no special options required for `make` (or the tool selected with `cmake`). The location of the resulting binary `sim_dev` should be added to `PATH`.
diff --git a/src/runtime/hexagon/android/sim/driver/fake_pthread.cc b/src/runtime/hexagon/android/sim/driver/fake_pthread.cc
deleted file mode 100644
index 3613186908..0000000000
--- a/src/runtime/hexagon/android/sim/driver/fake_pthread.cc
+++ /dev/null
@@ -1,286 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include <cassert>
-#include <cerrno>
-#include <csetjmp>
-#include <cstddef>
-#include <cstdlib>
-#include <map>
-#include <vector>
-
-#include "pthread.h"
-#include "sched.h"
-
-/*!
- * Implementation of a subset of pthread API for single-threaded execution.
- *
- * They main idea is that the thread function ("start_routine" in the call
- * to pthread_create) is executed immediately. When pthread_create returns,
- * the thread function has already finished.
- *
- * Since the thread routine can itself call pthread_create, it is possible
- * to have multiple threads existing at the same time, although only the
- * last one is running.
- *
- * There are two main things that need to be taken care of:
- * - thread-specific data, i.e. pthread_setspecific, pthread_getspecific,
- *   and the handling of thread keys,
- * - handling of thread return values.
- *
- * Threads are identified by thread ids (of type pthread_t). The main process
- * thread has the id of 0, the remaining threads have ids starting at 1 and
- * incrementing by 1. For each thread there is some data (thread_info_t)
- * associated with it, and stored in "thread_data" map. When a thread
- * terminates, the corresponding entry from "thread_data" cannot be removed
- * until the return value is claimed (pthread_join), unless it is explicitly
- * discarded (pthread_detach). When a new thread is created, it gets the
- * first available id for which there is no entry in "thread_data". This
- * could be an id that was never allocated, or an id that was used, but
- * has since been removed from the map.
- * A thread can terminate through thread_exit. This means that when the
- * thread function calls thread_exit, the execution should return to the
- * pthread_create call that ran it. This is implemented via setjmp/longjmp
- * (neither longjmp nor pthread_exit unwind the stack).
- *
- * Any mutexes or condition variables cannot block, or else it would cause
- * a deadlock. Since there is only one thread running at a time, locking
- * a mutex or waiting for a condition always succeeds (returns immediately).
- */
-
-struct key_entry_t {
-  key_entry_t(void* v, void (*d)(void*)) : value(v), dtor(d) {}
-  void* value = nullptr;
-  void (*dtor)(void*) = nullptr;
-};
-
-struct thread_info_t {
-  thread_info_t() = default;
-  std::map<pthread_key_t, key_entry_t> keys;
-  std::jmp_buf env;
-  void* ret_value = nullptr;
-  bool finished = false;
-  bool detached = false;
-};
-
-static pthread_t main_thread_id = 0;
-
-static std::map<pthread_t, thread_info_t> thread_data = {
-    // Reserve the 0th entry.
-    {main_thread_id, {}}};
-
-static std::vector<pthread_t> running_threads = {main_thread_id};
-
-template <typename K, typename V>
-K first_available_key(const std::map<K, V>& m) {
-  auto i = m.begin(), e = m.end();
-  K key = 1;
-  for (; i != e && key == i->first; ++i, ++key) {
-  }
-  return key;
-}
-
-int pthread_cond_destroy(pthread_cond_t* cond) { return 0; }
-
-int pthread_cond_init(pthread_cond_t* __restrict cond, const pthread_condattr_t* __restrict attr) {
-  return 0;
-}
-
-int pthread_cond_signal(pthread_cond_t* cond) { return 0; }
-
-int pthread_cond_broadcast(pthread_cond_t* cond) { return 0; }
-
-int pthread_cond_timedwait(pthread_cond_t* __restrict cond, pthread_mutex_t* __restrict mutex,
-                           const struct timespec* __restrict abstime) {
-  return 0;
-}
-
-int pthread_cond_wait(pthread_cond_t* __restrict cond, pthread_mutex_t* __restrict mutex) {
-  return 0;
-}
-
-int pthread_mutexattr_init(pthread_mutexattr_t* attr) { return 0; }
-
-int pthread_mutexattr_destroy(pthread_mutexattr_t* attr) { return 0; }
-
-int pthread_mutexattr_settype(pthread_mutexattr_t* attr, int type) { return 0; }
-
-int pthread_mutexattr_gettype(const pthread_mutexattr_t* __restrict attr, int* __restrict type) {
-  *type = PTHREAD_MUTEX_NORMAL;
-  return 0;
-}
-
-int pthread_mutex_init(pthread_mutex_t* __restrict mutex,
-                       const pthread_mutexattr_t* __restrict attr) {
-  return 0;
-}
-
-int pthread_mutex_destroy(pthread_mutex_t* mutex) { return 0; }
-
-int pthread_mutex_lock(pthread_mutex_t* mutex) { return 0; }
-
-int pthread_mutex_trylock(pthread_mutex_t* mutex) { return 0; }
-
-int pthread_mutex_unlock(pthread_mutex_t* mutex) { return 0; }
-
-int pthread_once(pthread_once_t* once_control, void (*init_routine)(void)) {
-  static_assert(PTHREAD_ONCE_INIT != PTHREAD_ONCE_DONE,
-                "PTHREAD_ONCE_INIT must be different from PTHREAD_ONCE_DONE");
-  if (*once_control == PTHREAD_ONCE_INIT) {
-    init_routine();
-    *once_control = PTHREAD_ONCE_DONE;
-  }
-  return 0;
-}
-
-int pthread_equal(pthread_t t1, pthread_t t2) { return t1 == t2; }
-
-int pthread_create(pthread_t* thread, const pthread_attr_t* attr, void* (*start_routine)(void*),
-                   void* arg) {
-  std::jmp_buf& env = thread_data[pthread_self()].env;
-  volatile pthread_t tid;
-  if (setjmp(env) == 0) {
-    tid = first_available_key(thread_data);
-    *thread = tid;
-    running_threads.push_back(pthread_t(tid));
-    thread_info_t& thr = thread_data[pthread_t(tid)];
-    thr.ret_value = start_routine(arg);
-  }
-  thread_info_t& thr = thread_data[pthread_t(tid)];
-  thr.finished = true;
-  running_threads.pop_back();
-
-  // Destroy all keys.
-  bool repeat = true;
-  size_t iter = 0;
-  while (repeat && iter++ < PTHREAD_DESTRUCTOR_ITERATIONS) {
-    repeat = false;
-    // Assume that destructors can create new keys (i.e. modify the map).
-    for (size_t k = 0; k != PTHREAD_KEYS_MAX; ++k) {
-      auto f = thr.keys.find(k);
-      if (f == thr.keys.end()) {
-        continue;
-      }
-      key_entry_t& key = f->second;
-      if (key.dtor == nullptr || key.value == nullptr) {
-        continue;
-      }
-      key.dtor(key.value);
-      repeat = true;
-    }
-  }
-
-  if (thr.detached) {
-    thread_data.erase(pthread_t(tid));
-  }
-
-  return 0;
-}
-
-int pthread_join(pthread_t thread, void** retval) {
-  auto f = thread_data.find(thread);
-  if (f == thread_data.end()) {
-    return ESRCH;
-  }
-  thread_info_t& thr = f->second;
-  if (!thr.finished) {
-    return EDEADLK;
-  }
-  if (retval != nullptr) {
-    *retval = thr.ret_value;
-  }
-  thread_data.erase(f);
-  return 0;
-}
-
-int pthread_detach(pthread_t thread) {
-  auto f = thread_data.find(thread);
-  if (f == thread_data.end()) {
-    return ESRCH;
-  }
-  // Can discard the return value.
-  f->second.detached = true;
-  return 0;
-}
-
-void pthread_exit(void* retval) {
-  pthread_t sid = pthread_self();
-  if (sid != main_thread_id) {
-    thread_info_t& self = thread_data[sid];
-    self.ret_value = retval;
-    self.finished = true;
-    longjmp(self.env, 1);
-  }
-  exit(0);  // Only executes for the main thread, plus silences
-            // the "should not return" warning.
-}
-
-int pthread_key_create(pthread_key_t* key, void (*destructor)(void*)) {
-  if (key == nullptr) {
-    return EINVAL;
-  }
-  auto& keys = thread_data[pthread_self()].keys;
-  pthread_key_t k = first_available_key(keys);
-  if (k >= PTHREAD_KEYS_MAX) {
-    return EAGAIN;
-  }
-  *key = k;
-  keys.emplace(k, key_entry_t{nullptr, destructor});
-  return 0;
-}
-
-int pthread_key_delete(pthread_key_t key) {
-  auto& keys = thread_data[pthread_self()].keys;
-  auto f = keys.find(key);
-  if (f == keys.end()) {
-    return EINVAL;
-  }
-  // pthread_key_delete does not call key destructors.
-  keys.erase(f);
-  return 0;
-}
-
-int pthread_setspecific(pthread_key_t key, const void* value) {
-  auto& keys = thread_data[pthread_self()].keys;
-  auto f = keys.find(key);
-  if (f == keys.end()) {
-    return EINVAL;
-  }
-  f->second.value = const_cast<void*>(value);
-  return 0;
-}
-
-void* pthread_getspecific(pthread_key_t key) {
-  auto& keys = thread_data[pthread_self()].keys;
-  auto f = keys.find(key);
-  if (f != keys.end()) {
-    return f->second.value;
-  }
-  return nullptr;
-}
-
-pthread_t pthread_self(void) { return running_threads.back(); }
-
-int sched_yield(void) { return 0; }
-
-#ifdef __cplusplus_
-extern "C" int nanosleep(const struct timespec* req, struct timespec* rem);
-#endif
-
-int nanosleep(const struct timespec* req, struct timespec* rem) { return 0; }
diff --git a/src/runtime/hexagon/android/sim/driver/pthread.h b/src/runtime/hexagon/android/sim/driver/pthread.h
deleted file mode 100644
index b4d559c44f..0000000000
--- a/src/runtime/hexagon/android/sim/driver/pthread.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifndef TVM_RUNTIME_HEXAGON_ANDROID_SIM_DRIVER_PTHREAD_H_
-#define TVM_RUNTIME_HEXAGON_ANDROID_SIM_DRIVER_PTHREAD_H_
-
-#define _PROVIDE_POSIX_TIME_DECLS 1
-#include <time.h>
-#undef _PROVIDE_POSIX_TIME_DECLS
-
-typedef int pthread_t;
-typedef int pthread_attr_t;
-typedef int pthread_cond_t;
-typedef int pthread_condattr_t;
-typedef int pthread_key_t;
-typedef int pthread_mutex_t;
-typedef int pthread_mutexattr_t;
-typedef int pthread_once_t;
-
-enum {
-  PTHREAD_COND_INITIALIZER,
-  PTHREAD_MUTEX_DEFAULT,
-  PTHREAD_MUTEX_ERRORCHECK,
-  PTHREAD_MUTEX_INITIALIZER,
-  PTHREAD_MUTEX_NORMAL,
-  PTHREAD_MUTEX_RECURSIVE,
-  PTHREAD_ONCE_INIT = 0,  // Must be same as in QuRT
-  PTHREAD_ONCE_DONE,      // Non-standard
-};
-
-const size_t PTHREAD_KEYS_MAX = 128;
-const size_t PTHREAD_DESTRUCTOR_ITERATIONS = 4;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-int pthread_cond_destroy(pthread_cond_t* cond);
-int pthread_cond_init(pthread_cond_t* __restrict cond, const pthread_condattr_t* __restrict attr);
-int pthread_cond_signal(pthread_cond_t* cond);
-int pthread_cond_broadcast(pthread_cond_t* cond);
-int pthread_cond_timedwait(pthread_cond_t* __restrict cond, pthread_mutex_t* __restrict mutex,
-                           const struct timespec* __restrict abstime);
-int pthread_cond_wait(pthread_cond_t* __restrict cond, pthread_mutex_t* __restrict mutex);
-
-int pthread_mutexattr_init(pthread_mutexattr_t* attr);
-int pthread_mutexattr_destroy(pthread_mutexattr_t* attr);
-int pthread_mutexattr_gettype(const pthread_mutexattr_t* __restrict attr, int* __restrict type);
-int pthread_mutexattr_settype(pthread_mutexattr_t* attr, int type);
-
-int pthread_mutex_init(pthread_mutex_t* __restrict mutex,
-                       const pthread_mutexattr_t* __restrict attr);
-int pthread_mutex_destroy(pthread_mutex_t* mutex);
-int pthread_mutex_lock(pthread_mutex_t* mutex);
-int pthread_mutex_trylock(pthread_mutex_t* mutex);
-int pthread_mutex_unlock(pthread_mutex_t* mutex);
-
-int pthread_once(pthread_once_t* once_control, void (*init_routine)(void));
-int pthread_equal(pthread_t t1, pthread_t t2);
-
-int pthread_create(pthread_t* thread, const pthread_attr_t* attr, void* (*start_routine)(void*),
-                   void* arg);
-int pthread_join(pthread_t thread, void** retval);
-int pthread_detach(pthread_t thread);
-void pthread_exit(void* retval) __attribute__((__noreturn__));
-
-int pthread_key_create(pthread_key_t* key, void (*destructor)(void*));
-int pthread_key_delete(pthread_key_t key);
-int pthread_setspecific(pthread_key_t key, const void* value);
-void* pthread_getspecific(pthread_key_t key);
-
-pthread_t pthread_self(void);
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // TVM_RUNTIME_HEXAGON_ANDROID_SIM_DRIVER_PTHREAD_H_
diff --git a/src/runtime/hexagon/android/sim/driver/sched.h b/src/runtime/hexagon/android/sim/driver/sched.h
deleted file mode 100644
index 621ef218b7..0000000000
--- a/src/runtime/hexagon/android/sim/driver/sched.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifndef TVM_RUNTIME_HEXAGON_ANDROID_SIM_DRIVER_SCHED_H_
-#define TVM_RUNTIME_HEXAGON_ANDROID_SIM_DRIVER_SCHED_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-int sched_yield(void);
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // TVM_RUNTIME_HEXAGON_ANDROID_SIM_DRIVER_SCHED_H_
diff --git a/src/runtime/hexagon/android/sim/driver/sim_device.cc b/src/runtime/hexagon/android/sim/driver/sim_device.cc
deleted file mode 100644
index c8cf783894..0000000000
--- a/src/runtime/hexagon/android/sim/driver/sim_device.cc
+++ /dev/null
@@ -1,560 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
-  Required options:
-    -ldl -G0                  For dlinit/dlopen/dlclose.
-    -Wl,--force-dynamic       Make this a dynamic executable (with dynamic
-                              symbol table).
-    -Wl,-E                    Export all defined symbols as dynamic.
-    -Wl,--whole-archive       Link the entire contents of libc.
-    -mhvx -mhvx-length=128b   Enable HVX.
-    -Wno-format               Silence format warning (unsigned vs uint32_t).
-*/
-
-#include <assert.h>
-#include <dlfcn.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-#include <unistd.h>
-
-#include <algorithm>
-#include <iterator>
-#include <string>
-#include <vector>
-
-#include "hexagon_sim_proto.h"
-#include "pthread.h"
-#include "tvm/runtime/c_runtime_api.h"
-
-static std::string timeNow() {
-  char str[11];  // [hh:mm:ss]
-  time_t time_value = time(NULL);
-  tm* pnow = localtime(&time_value);  // NOLINT(runtime/threadsafe_fn)
-
-  snprintf(str, sizeof(str), "[%02d:%02d:%02d]", pnow->tm_hour, pnow->tm_min, pnow->tm_sec);
-  return std::string(str);
-}
-
-#define LOG(FMT, ...) \
-  fprintf(stderr, "%s %s:%d: " FMT "\n", timeNow().c_str(), __FILE__, __LINE__, ##__VA_ARGS__)
-
-using HVX_Vector = int __attribute__((__vector_size__(128))) __attribute__((aligned(128)));
-
-static unsigned getVectorLength() {
-  HVX_Vector v = __builtin_HEXAGON_V6_lvsplatw_128B(0x01010101);
-  unsigned char* p = reinterpret_cast<unsigned char*>(&v);
-  if (p[127] == 1) return 128;
-  assert(p[63] == 1);
-  return 64;
-}
-
-extern "C" {
-// Print vector functions. They can be used to help debug tensorized
-// code, via
-// ib.emit(tvm.call_extern('int32', 'V6_pv8', 'vector:', v))
-// ib.emit(tvm.call_extern('int32', 'V6_pv16', 'info:', v))
-// ib.emit(tvm.call_extern('int32', 'V6_pv32', 'value:', v))
-
-// The first argument is a string printed before the vector contents.
-int V6_pv8(const char* s, HVX_Vector v);
-int V6_pv16(const char* s, HVX_Vector v);
-int V6_pv32(const char* s, HVX_Vector v);
-}
-
-int V6_pv8(const char* s, HVX_Vector v) {
-  unsigned vlen = getVectorLength();
-  uint8_t* ptr = reinterpret_cast<uint8_t*>(&v);
-  fprintf(stderr, "%s:", s);
-  for (unsigned i = 0; i != vlen; ++i) {
-    fprintf(stderr, " %02x", ptr[i]);
-  }
-  fprintf(stderr, "\n");
-  return 0;
-}
-
-int V6_pv16(const char* s, HVX_Vector v) {
-  unsigned vlen = getVectorLength();
-  uint16_t* ptr = reinterpret_cast<uint16_t*>(&v);
-  fprintf(stderr, "%s:", s);
-  for (unsigned i = 0; i != vlen / sizeof(uint16_t); ++i) {
-    fprintf(stderr, " %04x", ptr[i]);
-  }
-  fprintf(stderr, "\n");
-  return 0;
-}
-
-int V6_pv32(const char* s, HVX_Vector v) {
-  unsigned vlen = getVectorLength();
-  uint32_t* ptr = reinterpret_cast<uint32_t*>(&v);
-  fprintf(stderr, "%s:", s);
-  for (unsigned i = 0; i != vlen / sizeof(uint32_t); ++i) {
-    fprintf(stderr, " %08x", ptr[i]);
-  }
-  fprintf(stderr, "\n");
-  return 0;
-}
-
-extern "C" {
-// Function referenced from libc++.a, but not defined in libc.a.
-int clock_gettime(clockid_t clock_id, struct timespec* tp);
-// pthread_create is wrapped so that we can set a bigger stack size
-// for QuRT. Here this isn't needed, but we still need to implement
-// the wrapper.
-int __wrap_pthread_create(pthread_t* thread, const pthread_attr_t* attr,
-                          void* (*start_routine)(void*), void* arg);
-}
-
-int clock_gettime(clockid_t clock_id, struct timespec* tp) {
-  // Stub implementation.
-  return 0;
-}
-
-int __wrap_pthread_create(pthread_t* thread, const pthread_attr_t* attr,
-                          void* (*start_routine)(void*), void* arg) {
-  LOG("%s", __func__);
-  return pthread_create(thread, attr, start_routine, arg);
-}
-
-// FIXME(kparzysz-quic): query the cfg register to compute the VTCM base.
-// This works now.
-const unsigned int TCM_BASE = 0xD8000000;
-const unsigned int VTCM_BASE = TCM_BASE + 0x400000;
-
-class Allocator {
- private:
-  struct Block {
-    Block(void* p, size_t s) : ptr_(p), size_(s), vtcm_(false) {}
-    Block(void* p, size_t s, bool v) : ptr_(p), size_(s), vtcm_(v) {}
-    bool operator<(const Block& b) const { return uintptr_t(ptr_) < uintptr_t(b.ptr_); }
-    void* ptr_;
-    size_t size_;
-    bool vtcm_;
-  };
-
-  using vector_type = std::vector<Block>;
-  using iterator = vector_type::iterator;
-  vector_type allocations_;
-
-  uintptr_t cur_vtcm = VTCM_BASE;
-
- public:
-  void* alloc(unsigned size, size_t align);
-  void* vtcm_alloc(unsigned size, size_t align);
-  void free(void* p);
-};
-
-void* Allocator::alloc(unsigned size, size_t align) {
-  void* ptr = aligned_alloc(align, size);
-  if (ptr == nullptr) {
-    perror("device: error allocating memory:");
-    return ptr;
-  }
-
-  Block b(ptr, size);
-  iterator i = std::lower_bound(allocations_.begin(), allocations_.end(), b);
-  iterator w = allocations_.insert(i, b);
-  if (w != allocations_.begin()) {
-    iterator pw = w - 1;
-    assert(uintptr_t(pw->ptr_) + pw->size_ < uintptr_t(w->ptr_));
-  }
-  if (w + 1 != allocations_.end()) {
-    iterator nw = w + 1;
-    assert(uintptr_t(w->ptr_) + w->size_ <= uintptr_t(nw->ptr_));
-  }
-
-  LOG("device: allocated %d bytes aligned at %d: %p", size, align, ptr);
-  return ptr;
-}
-
-// For now, just allocation sequentially. This needs to be improved to use a
-// free list.
-void* Allocator::vtcm_alloc(unsigned size, size_t align) {
-  uintptr_t a = cur_vtcm;
-  a = (a + (align - 1)) & -align;
-  cur_vtcm = a + size;
-  void* ptr = reinterpret_cast<void*>(a);
-  if (ptr == nullptr) {
-    perror("device: error allocating vtcm memory:");
-    return ptr;
-  }
-
-  Block b(ptr, size, true);
-  iterator i = std::lower_bound(allocations_.begin(), allocations_.end(), b);
-  iterator w = allocations_.insert(i, b);
-  if (w != allocations_.begin()) {
-    iterator pw = w - 1;
-    assert(uintptr_t(pw->ptr_) + pw->size_ <= uintptr_t(w->ptr_));
-  }
-  if (w + 1 != allocations_.end()) {
-    iterator nw = w + 1;
-    assert(uintptr_t(w->ptr_) + w->size_ <= uintptr_t(nw->ptr_));
-  }
-
-  LOG("device: allocated vtcm %d bytes aligned at %d: %p", size, align, ptr);
-  return ptr;
-}
-
-void Allocator::free(void* ptr) {
-  LOG("device: freeing %p", ptr);
-  iterator i = std::lower_bound(allocations_.begin(), allocations_.end(), Block(ptr, 0));
-  assert(i != allocations_.end());
-  assert(i->ptr_ == ptr);
-  if (!i->vtcm_) ::free(i->ptr_);
-  allocations_.erase(i);
-}
-
-static void printMsgCall(const MsgCall& mc) {
-  auto to_dec_string = [](int v) {
-    char tmp[11];
-    snprintf(tmp, sizeof(tmp), "%d", v);
-    return std::string(tmp);
-  };
-  auto to_hex_string = [](uint32_t v) {
-    char tmp[9];
-    snprintf(tmp, sizeof(tmp), "%lx", v);
-    return std::string(tmp);
-  };
-  std::string str = "device: launching " + to_hex_string(mc.func_va) +
-                    " sc:" + to_dec_string(mc.scalar_num) + " {";
-  for (unsigned i = 0; i != mc.scalar_num; ++i) {
-    str += ' ' + to_hex_string(mc.data[i]);
-    if (i + 1 != mc.scalar_num) str += ',';
-  }
-  str += " }, st:" + to_dec_string(mc.stack_num) + " {";
-  for (unsigned i = 0; i != mc.stack_num; ++i) {
-    str += ' ' + to_hex_string(mc.data[i + mc.scalar_num]);
-    if (i + 1 != mc.stack_num) str += ',';
-  }
-  str += " }";
-  LOG("%s", str.c_str());
-}
-
-static std::vector<MsgCall*> task_queue;
-
-struct Environment {
-  Allocator alloc;
-  void* dl_handle = nullptr;
-};
-
-extern "C" {
-volatile Message message_buffer;
-int dispatch(Environment* env) __attribute__((noinline));
-}
-
-static volatile unsigned char payload_buffer[4096];
-
-static void setMsg(uint32_t code, uint32_t len, uint32_t va) {
-  message_buffer.code = code;
-  message_buffer.len = len;
-  message_buffer.va = va;
-}
-
-inline void* pointer(uint32_t v) { return reinterpret_cast<void*>(static_cast<uintptr_t>(v)); }
-
-inline uint32_t va(const volatile void* p) {
-  return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(p));
-}
-
-__attribute__((naked)) uint32_t launcher(volatile MsgCall* mc, uint64_t* pcc) {
-  __asm__(
-      "// This function is intentionally written to be readable,      \n"
-      "// rather than fast.                                           \n"
-      "// r0 = value of 'volatile MsgCall *mc'                        \n"
-      "// r1 = address where to store the program cycle count         \n"
-      "{ memd(r29+#-16) = r21:20                                      \n"
-      "  allocframe(#24)          }                                   \n"
-      "{ memd(r29+#0) = r17:16                                        \n"
-      "  memd(r29+#8) = r19:18    }                                   \n"
-      "{ r17:16 = combine(r1,r0)                                      \n"
-      "  r18 = r29                                                    \n"
-      "  r1 = memw(r0+#4)            // scalar_num                    \n"
-      "  r2 = memw(r0+#8)         }  // stack_num                     \n"
-      "// If there are no stack values, skip the stack setup.         \n"
-      "{ p0 = cmp.eq(r2,#0)                                           \n"
-      "  if (p0.new) jump:t .Llauncher1 }                             \n"
-
-      "// Allocate space on the stack. Let r2 = needed space          \n"
-      "// rounded up to a multiple of 8.                              \n"
-      "{ loop0(.Llauncher0,r2)                                        \n"
-      "  r2 = asl(r2,#2)          }                                   \n"
-      "{ r2 = add(r2,#4)          }                                   \n"
-      "{ r2 = clrbit(r2,#2)       }                                   \n"
-      "{ r29 = sub(r29,r2)        }                                   \n"
-
-      "// Copy stack contents onto the stack. Stack contents start    \n"
-      "// at r3 = r0 + offsetof(data) + scalar_num*4                  \n"
-      "{ r3 = addasl(r0,r1,#2)                                        \n"
-      "  r4 = r29                 }                                   \n"
-      "{ r3 = add(r3,#12)         } // offsetof(data)                 \n"
-      ".Llauncher0:                                                   \n"
-      "{ r5 = memw(r3++#4)                                            \n"
-      "  memw(r4++#4) = r5.new    } :endloop0                         \n"
-
-      "// Load registers. Some of the loaded data may actually be     \n"
-      "// values from the stack part of 'data', but it's not an issue.\n"
-      ".Llauncher1:                                                   \n"
-      "{ r0 = memw(r16+#12)         // mc + offsetof(data)            \n"
-      "  r1 = memw(r16+#16)       }                                   \n"
-      "{ r2 = memw(r16+#20)                                           \n"
-      "  r3 = memw(r16+#24)       }                                   \n"
-      "{ r4 = memw(r16+#28)                                           \n"
-      "  r5 = memw(r16+#32)       }                                   \n"
-
-      "// Call.                                                       \n"
-      "{ r6 = memw(r16+#0)                                            \n"
-      "  r21:20 = upcycle         }                                   \n"
-      "{ callr r6                 }                                   \n"
-
-      "// Restore stack pointer (free up r18), calculate cycle count. \n"
-      "{ r29 = r18                                                    \n"
-      "  r19:18 = upcycle         }                                   \n"
-      "{ r19:18 = sub(r19:18, r21:20) }                               \n"
-
-      "// Store pcount, restore non-volatile registers, and return.   \n"
-      "{ memd(r17+#0) = r19:18                                        \n"
-      "  r21:20 = memd(r29+#16)   }                                   \n"
-      "{ r19:18 = memd(r29+#8)                                        \n"
-      "  r17:16 = memd(r29+#0)    }                                   \n"
-      "{ dealloc_return           } // implicit-use r1:0              \n");
-}
-
-int dispatch(Environment* env) {
-  uint32_t code = message_buffer.code;
-  // Special handling of MsgReq.
-  if (code == kMsgReq) {
-    assert(message_buffer.len <= sizeof(payload_buffer));
-    setMsg(kMsgAck, sizeof(payload_buffer), va(payload_buffer));
-    return 0;
-  }
-
-  switch (code) {
-    case kAlloc: {
-      LOG("device: {kAlloc, %lu, %lx}", message_buffer.len, message_buffer.va);
-      assert(message_buffer.len == sizeof(MsgAlloc));
-      auto* ma = reinterpret_cast<volatile MsgAlloc*>(message_buffer.va);
-      void* p = env->alloc.alloc(ma->size, ma->align);
-      reinterpret_cast<volatile MsgPointer*>(payload_buffer)->va = va(p);
-      setMsg(kNone, sizeof(MsgPointer), va(payload_buffer));
-      break;
-    }
-    case kFree: {
-      LOG("device: {kFree, %lu, %lx}", message_buffer.len, message_buffer.va);
-      assert(message_buffer.len == sizeof(MsgPointer));
-      auto* mp = reinterpret_cast<volatile MsgPointer*>(message_buffer.va);
-      env->alloc.free(pointer(mp->va));
-      setMsg(kNone, 0u, 0u);
-      break;
-    }
-    case kAllocVtcm: {
-      LOG("device: {kAllocVtcm, %lu, %lx}", message_buffer.len, message_buffer.va);
-      assert(message_buffer.len == sizeof(MsgAlloc));
-      auto* ma = reinterpret_cast<volatile MsgAlloc*>(message_buffer.va);
-      void* p = env->alloc.vtcm_alloc(ma->size, ma->align);
-      reinterpret_cast<volatile MsgPointer*>(payload_buffer)->va = va(p);
-      setMsg(kNone, sizeof(MsgPointer), va(payload_buffer));
-      break;
-    }
-    case kCopy: {
-      LOG("device: {kCopy, %lu, %lx}", message_buffer.len, message_buffer.va);
-      assert(message_buffer.len == sizeof(MsgCopy));
-      auto* mc = reinterpret_cast<volatile MsgCopy*>(message_buffer.va);
-      memcpy(pointer(mc->dst), pointer(mc->src), mc->len);
-      setMsg(kNone, 0u, 0u);
-      break;
-    }
-    case kLoad: {
-      if (env->dl_handle != nullptr) dlclose(env->dl_handle);
-      const char* name = static_cast<const char*>(pointer(message_buffer.va));
-      // LOG(stderr, "device: dlopen(%s)", name);
-      env->dl_handle = dlopen(name, RTLD_LAZY);
-      if (env->dl_handle == nullptr) LOG("dlopen: %s\n", dlerror());
-      assert(env->dl_handle != nullptr);
-      reinterpret_cast<volatile MsgPointer*>(payload_buffer)->va = va(env->dl_handle);
-      setMsg(kNone, sizeof(MsgPointer), va(payload_buffer));
-      break;
-    }
-    case kUnload: {
-      assert(env->dl_handle != nullptr);
-      assert(message_buffer.len == sizeof(MsgPointer));
-      auto* mp = reinterpret_cast<volatile MsgPointer*>(message_buffer.va);
-      assert(pointer(mp->va) == env->dl_handle);
-      dlclose(env->dl_handle);
-      env->dl_handle = nullptr;
-      setMsg(kNone, 0u, 0u);
-      break;
-    }
-    case kResolve: {
-      LOG("device: {kResolve, %lu, %lx}", message_buffer.len, message_buffer.va);
-      assert(env->dl_handle != nullptr);
-      dlerror();
-      const char* name = static_cast<const char*>(pointer(message_buffer.va));
-      void* s = dlsym(env->dl_handle, name);
-      reinterpret_cast<volatile MsgPointer*>(payload_buffer)->va = va(s);
-      setMsg(kNone, sizeof(MsgPointer), va(payload_buffer));
-      break;
-    }
-    case kCall: {
-      LOG("device: {kCall, %lu, %lx}", message_buffer.len, message_buffer.va);
-      // Add the task to the queue.
-      auto* mc = reinterpret_cast<MsgCall*>(message_buffer.va);
-      uint32_t size = 4 * (3 + mc->scalar_num + mc->stack_num);
-      MsgCall* t = static_cast<MsgCall*>(malloc(size));
-      memcpy(t, mc, size);
-      task_queue.push_back(t);
-      // Return 0.
-      *reinterpret_cast<volatile uint32_t*>(payload_buffer) = 0;
-      setMsg(kNone, sizeof(uint32_t), va(payload_buffer));
-      break;
-    }
-    case kFlush: {
-      LOG("device: {kFlush}");
-      LOG("device: %d tasks in the queue", task_queue.size());
-      // Execute all tasks from the queue and release memory buffers
-      // for as long as the return values are 0. Upon receiving a non-zero
-      // return value, continue freeing memory but no longer execute
-      // any tasks. The task queue will be cleared in any case.
-      uint32_t rv = 0;
-      uint64_t pcc;  // Pcycle counter, will be 0 under simulator (upcycle).
-      for (MsgCall* t : task_queue) {
-        if (rv == 0) {
-          printMsgCall(*t);
-          rv = launcher(t, &pcc);
-          LOG("device: execution took %lld pcycles", pcc);
-        }
-        free(t);
-      }
-      task_queue.clear();
-      *reinterpret_cast<volatile uint32_t*>(payload_buffer) = rv;
-      setMsg(kNone, sizeof(uint32_t), va(payload_buffer));
-      break;
-    }
-    default:
-      LOG("device: unknown code: %lu", message_buffer.code);
-      abort();
-      break;
-  }
-  return 0;
-}
-
-extern "C" {
-int acquire_vector_unit(int);
-void release_vector_unit();
-}
-
-static void makePathList(const std::string& arg, std::vector<std::string>* list) {
-  size_t p = 0, e = arg.size();
-  std::vector<char> tmp;
-
-  while (p < e) {
-    tmp.clear();
-    bool check_next = true;
-    size_t i = p;
-    for (; i != e; ++i) {
-      char c = arg[i];
-      if (check_next) {
-        if (c == '\\') {
-          check_next = false;
-          continue;
-        } else if (c == ':') {
-          break;
-        }
-      }
-      check_next = true;
-      tmp.push_back(c);
-    }
-    if (!tmp.empty()) list->emplace_back(tmp.begin(), tmp.end());
-    p = i + 1;
-  }
-}
-
-static std::string findInPaths(const std::string& filename, const std::string& paths) {
-  std::vector<std::string> path_list;
-  makePathList(paths, &path_list);
-
-  for (const auto& p : path_list) {
-    std::string pf = p + '/' + filename;
-    if (access(pf.c_str(), X_OK) == 0) return std::move(pf);
-  }
-  // If the search failed, try bare filename. If it cannot be loaded,
-  // dlerror will print a meaningful message.
-  return filename;
-}
-
-// Presence of this function indicates that sim_dev is running.
-extern "C" int running_in_sim_dev_17bc90206f6cf5a7();
-int running_in_sim_dev_17bc90206f6cf5a7() { return 0; }
-
-int main(int argc, char* argv[]) {
-  int opt;
-  std::string ld_path;
-  while ((opt = getopt(argc, argv, "L:")) != -1) {
-    switch (opt) {
-      case 'L':
-        ld_path += ':' + std::string(optarg);
-        break;
-      case '?':
-        LOG("Usage %s: [-L path1[:path2...]]", argv[0]);
-        return 1;
-    }
-  }
-
-  std::string rt_path = findInPaths("libtvm_runtime.so", ld_path);
-  LOG("TVM runtime path: %s", rt_path.c_str());
-
-  Environment env;
-  acquire_vector_unit(0);
-
-  const char* builtin[] = {
-      "libgcc.so",    "libc.so",     "libc++.so",
-      "libc++abi.so", "libc++.so.1", "libc++abi.so.1"  // Alternative names.
-  };
-  dlinit(sizeof(builtin) / sizeof(builtin[0]), const_cast<char**>(builtin));
-  void* rt_handle = dlopen(rt_path.c_str(), RTLD_GLOBAL);
-  if (rt_handle == nullptr) {
-    LOG("error loading TVM runtime: %s", dlerror());
-    return 1;
-  }
-
-  // When running TVM runtime on Hexagon there is no longer a device
-  // for Hexagon, but standalone ops can still refer to it. All of
-  // required DeviceAPI's functionality is adequately implemented
-  // via the CPU device, so remap device_api.hexagon to device_api.cpu.
-  auto* get_global =
-      reinterpret_cast<decltype(&TVMFuncGetGlobal)>(dlsym(rt_handle, "TVMFuncGetGlobal"));
-  assert(get_global != nullptr);
-  auto* register_global =
-      reinterpret_cast<decltype(&TVMFuncRegisterGlobal)>(dlsym(rt_handle, "TVMFuncRegisterGlobal"));
-  assert(register_global != nullptr);
-
-  TVMFunctionHandle cpu_api;
-  if (get_global("device_api.cpu", &cpu_api) != 0 ||
-      register_global("device_api.hexagon", cpu_api, true) != 0) {
-    LOG("error setting device_api.hexagon");
-    return 1;
-  }
-
-  while (!dispatch(&env)) {
-  }
-
-  dlclose(rt_handle);
-  release_vector_unit();
-  return 0;
-}
diff --git a/src/runtime/hexagon/android/sim/hexagon_device_sim.cc b/src/runtime/hexagon/android/sim/hexagon_device_sim.cc
deleted file mode 100644
index 05559a1d1a..0000000000
--- a/src/runtime/hexagon/android/sim/hexagon_device_sim.cc
+++ /dev/null
@@ -1,1468 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include <dmlc/optional.h>
-#include <stdlib.h>
-#include <tvm/runtime/logging.h>
-#include <unistd.h>
-
-#include <algorithm>
-#include <deque>
-#include <iomanip>
-#include <iterator>
-#include <locale>
-#include <memory>
-#include <sstream>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "../hexagon_device.h"
-#include "HexagonWrapper.h"
-#include "hexagon_sim_proto.h"
-
-namespace tvm {
-namespace runtime {
-namespace hexagon {
-
-static_assert(sizeof(HEX_VA_t) == sizeof(uint32_t), "Hexagon VA must be uint32");
-
-template <typename T>
-struct unalign {
-  using type = struct { T value; } __attribute__((aligned(1), packed));
-};
-
-template <unsigned N>
-struct uint {
-  using type = void;
-};
-
-template <>
-struct uint<8> {
-  using type = uint64_t;
-};
-template <>
-struct uint<4> {
-  using type = uint32_t;
-};
-template <>
-struct uint<2> {
-  using type = uint16_t;
-};
-template <>
-struct uint<1> {
-  using type = uint8_t;
-};
-
-using string_list = std::deque<std::string>;
-
-namespace detail {
-
-template <typename T, typename... Args>
-std::unique_ptr<T> make_unique(Args... args) {
-  return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
-}
-template <typename T>
-std::unique_ptr<T> make_unique(size_t size) {
-  using U = typename std::remove_extent<T>::type;
-  return std::unique_ptr<T>(new U[size]());
-}
-
-// An "Optional" class, originally a replacement for llvm::Optional, then an
-// extension of dmlc::optional to make it compatible with C++17's std::optional.
-template <typename T>
-struct Optional : public dmlc::optional<T> {
-  using dmlc::optional<T>::optional;
-  using dmlc::optional<T>::operator=;
-  Optional(const T& val) : dmlc::optional<T>(val) {}  // NOLINT(*)
-
-  T* operator->() { return &this->operator*(); }
-  const T* operator->() const { return &this->operator*(); }
-};
-
-// Converter class to translate vector<string> to char**. This relieves the
-// user from memory reallocation and copying.
-struct non_const_str {
-  non_const_str() {}
-  explicit non_const_str(const std::string& str) : non_const_str(std::vector<std::string>{str}) {}
-  explicit non_const_str(const std::vector<std::string>& vec) {
-    for (const std::string& s : vec) {
-      auto c = detail::make_unique<char[]>(s.size() + 1);
-      std::strncpy(c.get(), s.c_str(), s.size() + 1);
-      storage_.push_back(std::move(c));
-      pointers_.push_back(storage_.back().get());
-    }
-  }
-  non_const_str(non_const_str&& ncs) { *this = std::move(ncs); }
-  non_const_str& operator=(non_const_str&& ncs) {
-    if (this != &ncs) {
-      for (auto& s : ncs.storage_) storage_.push_back(std::move(s));
-      for (auto& s : storage_) pointers_.push_back(s.get());
-    }
-    return *this;
-  }
-  size_t size() const { return pointers_.size(); }
-  operator char*() {
-    ICHECK_EQ(pointers_.size(), 1);
-    return pointers_[0];
-  }
-  operator char**() { return pointers_.data(); }
-
- private:
-  std::vector<char*> pointers_;
-  std::vector<std::unique_ptr<char[]>> storage_;
-};
-
-using MaybeString = Optional<std::string>;
-
-MaybeString front(const string_list& deq) {
-  return !deq.empty() ? MaybeString(deq.front()) : MaybeString();
-}
-
-MaybeString pop_front(string_list& deq) {  // NOLINT(*)
-  if (deq.empty()) return MaybeString();
-  std::string f = deq.front();
-  deq.pop_front();
-  return MaybeString(f);
-}
-
-Optional<int64_t> to_int(const MaybeString& str) {
-  auto none = Optional<int64_t>();
-  if (str.has_value()) {
-    try {
-      size_t pos;
-      int64_t val = std::stoll(*str, &pos, 0);
-      return pos == str->size() ? Optional<int64_t>(val) : none;
-    } catch (std::invalid_argument) {
-    }
-  }
-  return none;
-}
-
-Optional<uint64_t> to_uint(const MaybeString& str) {
-  auto none = Optional<uint64_t>();
-  if (str.has_value()) {
-    try {
-      size_t pos;
-      uint64_t val = std::stoull(*str, &pos, 0);
-      return pos == str->size() ? Optional<uint64_t>(val) : none;
-    } catch (std::invalid_argument) {
-    }
-  }
-  return none;
-}
-
-Optional<float> to_float(const MaybeString& str) {
-  auto none = Optional<float>();
-  if (str.has_value()) {
-    try {
-      size_t pos;
-      float val = std::stof(*str, &pos);
-      return pos == str->size() ? Optional<float>(val) : none;
-    } catch (std::invalid_argument) {
-    }
-  }
-  return none;
-}
-
-Optional<bool> to_bool(const MaybeString& str) {
-  auto none = Optional<bool>();
-  if (auto num = to_int(str)) {
-    if (*num == 0) return false;
-    if (*num == 1) return true;
-    return none;
-  }
-  if (str) {
-    if (*str == "true" || *str == "TRUE") return true;
-    if (*str == "false" || *str == "FALSE") return false;
-  }
-  return none;
-}
-
-template <typename T>
-using MaybeRange = Optional<std::pair<T, T>>;
-
-template <typename T, Optional<T> Parse(const MaybeString&)>
-MaybeRange<T> to_range(const MaybeString& str) {
-  auto none = MaybeRange<T>();
-  if (str && !str->empty()) {
-    auto n = str->find('-', 1);
-    if (n != std::string::npos) {
-      auto begin = Parse(str->substr(0, n));
-      auto end = Parse(str->substr(n + 1, str->size() - n - 1));
-      if (begin && end) {
-        return std::make_pair(*begin, *end);
-      }
-    }
-  }
-  return none;
-}
-
-// Replacement for llvm::StringSwitch.
-template <typename T>
-class StringSwitch {
- public:
-  explicit StringSwitch(const std::string& key) : key(key) {}
-  operator T() const {
-    auto f = map.find(key);
-    if (f != map.end()) {
-      return f->second;
-    }
-    ICHECK(static_cast<bool>(def_val)) << "default value not set";
-    return *def_val;
-  }
-  StringSwitch& Case(const std::string& key, T val) {
-    map.insert(std::make_pair(key, val));
-    return *this;
-  }
-  StringSwitch& Default(T val) {
-    ICHECK(!static_cast<bool>(def_val)) << "default value already set";
-    def_val = val;
-    return *this;
-  }
-
- private:
-  const std::string key;
-  std::map<std::string, T> map;
-  Optional<T> def_val;
-};
-
-// Replacement for llvm::sys::fs::access with AccessMode = Execute.
-bool FileExists(const std::string& file) { return access(file.c_str(), X_OK) == 0; }
-
-// Replacement for llvm::sys::Process::FindInEnvPath.
-MaybeString FindInEnvPath(const std::string& env_var, const std::string& file) {
-  auto none = MaybeString();
-  if (file.empty() || file[0] == '/') {
-    return none;
-  }
-
-  const char* e = getenv(env_var.c_str());
-  std::string env_val = e != nullptr ? std::string(e) : std::string();
-
-  std::vector<std::string> paths;
-  // Split the environment variable into individual paths.
-  size_t first = 0, env_size = env_val.size();
-  for (size_t last = 0; last != env_size; ++last) {
-    if (env_val[last] == ':') {
-      if (last > first) {
-        paths.emplace_back(env_val, first, last - first);
-      }
-      first = last + 1;
-    }
-  }
-  if (first < env_size) {
-    paths.emplace_back(env_val, first, env_size - first);
-  }
-
-  // Search for the file.
-  for (const std::string& dir : paths) {
-    std::string full = dir + '/' + file;
-    if (FileExists(full)) {
-      return full;
-    }
-  }
-  return none;
-}
-}  // namespace detail
-
-class HexagonSimulator final : public tvm::runtime::hexagon::Device {
- public:
-  explicit HexagonSimulator(bool enable_queuing);
-  ~HexagonSimulator() final {}
-  void* Alloc(unsigned size, unsigned align) final;
-  void Free(void* ptr) final;
-  void* AllocVtcm(unsigned size, unsigned align) final;
-  void FreeVtcm(void* ptr) final;
-  void CopyDeviceToDevice(void* dst, const void* src, unsigned len) final;
-  void CopyDeviceToHost(void* host_dst, const void* src, unsigned len) final;
-  void CopyHostToDevice(void* dst, const void* host_src, unsigned len) final;
-  void* Load(const std::string& data, const std::string& fmt) final;
-  void Unload(void* mod) final;
-  void* Resolve(const std::string& sym) final;
-  void Call(void* func, uint32_t* scalar, unsigned sc_num, uint32_t* stack, unsigned st_num) final;
-
-  static std::string to_string(HEXAPI_Status status);
-
- private:
-  static HEX_VA_t p2va(const void* p);
-  static void* va2p(HEX_VA_t va);
-
-  void CopyFromV(void* host_dst, HEX_VA_t src, unsigned len);
-  void CopyToV(HEX_VA_t dst, const void* host_src, unsigned len);
-
-  template <unsigned N>
-  void CopyNToV(HEX_VA_t dst, const void* host_src);
-  template <unsigned N>
-  void CopyNFromV(void* host_dst, HEX_VA_t src);
-
-  // NOLINTNEXTLINE(runtime/references)
-  void SendMsg(Message& m, const void* data, bool show_dbg);
-
-  std::string arch_;
-  std::unique_ptr<HexagonWrapper> sim_;
-  HEX_VA_t dispatch_v_, message_buffer_v_;
-  bool task_queuing_;
-
-  // Sim configuration routines.
-  bool Configure(string_list& opts);  // NOLINT(*)
-
-  bool HandleAHBBusPenalty(string_list& rest);      // NOLINT(*)
-  bool HandleAHBBusRatio(string_list& rest);        // NOLINT(*)
-  bool HandleAHBHighAddr(string_list& rest);        // NOLINT(*)
-  bool HandleAHBLowAddr(string_list& rest);         // NOLINT(*)
-  bool HandleAXI2BusPenalty(string_list& rest);     // NOLINT(*)
-  bool HandleAXI2BusRatio(string_list& rest);       // NOLINT(*)
-  bool HandleAXI2HighAddr(string_list& rest);       // NOLINT(*)
-  bool HandleAXI2LowAddr(string_list& rest);        // NOLINT(*)
-  bool HandleBuildTag(string_list& rest);           // NOLINT(*)
-  bool HandleBusPenalty(string_list& rest);         // NOLINT(*)
-  bool HandleBusRatio(string_list& rest);           // NOLINT(*)
-  bool HandleBusTrace(string_list& rest);           // NOLINT(*)
-  bool HandleBypassIdle(string_list& rest);         // NOLINT(*)
-  bool HandleConnectionTimeout(string_list& rest);  // NOLINT(*)
-  bool HandleCoprocTrace(string_list& rest);        // NOLINT(*)
-  bool HandleCoreDump(string_list& rest);           // NOLINT(*)
-  bool HandleCosimFile(string_list& rest);          // NOLINT(*)
-  bool HandleDCacheTrace(string_list& rest);        // NOLINT(*)
-  bool HandleDSPClock(string_list& rest);           // NOLINT(*)
-  bool HandleETMCFGBase(string_list& rest);         // NOLINT(*)
-  bool HandleGDBServ(string_list& rest);            // NOLINT(*)
-  bool HandleHVXLength(string_list& rest);          // NOLINT(*)
-  bool HandleICacheTrace(string_list& rest);        // NOLINT(*)
-  bool HandleL2CacheTrace(string_list& rest);       // NOLINT(*)
-  bool HandleL2CFGBase(string_list& rest);          // NOLINT(*)
-  bool HandleL2TCMBase(string_list& rest);          // NOLINT(*)
-  bool HandleMemFillRand(string_list& rest);        // NOLINT(*)
-  bool HandleMemFill(string_list& rest);            // NOLINT(*)
-  bool HandleMemTrace(string_list& rest);           // NOLINT(*)
-  bool HandleNullPtr(string_list& rest);            // NOLINT(*)
-  bool HandlePacketAnalyze(string_list& rest);      // NOLINT(*)
-  bool HandlePCFilter(string_list& rest);           // NOLINT(*)
-  bool HandlePCTraceMin(string_list& rest);         // NOLINT(*)
-  bool HandlePCTraceNano(string_list& rest);        // NOLINT(*)
-  bool HandlePCTrace(string_list& rest);            // NOLINT(*)
-  bool HandlePMUStatsFile(string_list& rest);       // NOLINT(*)
-  bool HandleProfile(string_list& rest);            // NOLINT(*)
-  bool HandleProfileTimeZero(string_list& rest);    // NOLINT(*)
-  bool HandleQuiet(string_list& rest);              // NOLINT(*)
-  bool HandleReconnect(string_list& rest);          // NOLINT(*)
-  bool HandleRTOS(string_list& rest);               // NOLINT(*)
-  bool HandleSimErr(string_list& rest);             // NOLINT(*)
-  bool HandleSimIn(string_list& rest);              // NOLINT(*)
-  bool HandleSimOut(string_list& rest);             // NOLINT(*)
-  bool HandleStackStart(string_list& rest);         // NOLINT(*)
-  bool HandleStallTrace(string_list& rest);         // NOLINT(*)
-  bool HandleStatsFile(string_list& rest);          // NOLINT(*)
-  bool HandleSubsystemBase(string_list& rest);      // NOLINT(*)
-  bool HandleSymFile(string_list& rest);            // NOLINT(*)
-  bool HandleTCM(string_list& rest);                // NOLINT(*)
-  bool HandleTCMHighAddr(string_list& rest);        // NOLINT(*)
-  bool HandleTCMLowAddr(string_list& rest);         // NOLINT(*)
-  bool HandleTimeFilterNS(string_list& rest);       // NOLINT(*)
-  bool HandleTiming(string_list& rest);             // NOLINT(*)
-  bool HandleUArchTrace(string_list& rest);         // NOLINT(*)
-  bool HandleUseFS(string_list& rest);              // NOLINT(*)
-  bool HandleV2PTranslation(string_list& rest);     // NOLINT(*)
-  bool HandleVerbose(string_list& rest);            // NOLINT(*)
-
-  using MaybeUInt64 = detail::Optional<uint64_t>;
-  using MaybeUIntRange = std::pair<MaybeUInt64, MaybeUInt64>;
-
-  bool should_parse_next(const string_list& rest);
-  detail::Optional<HEXAPI_Interval> to_interval(const detail::MaybeString& str);
-  detail::Optional<HEXAPI_TimingMode> to_timingmode(const detail::MaybeString& str);
-  detail::Optional<HEXAPI_VerboseMode> to_verbosemode(const detail::MaybeString& str);
-  detail::Optional<HEXAPI_Nullptr> to_nullptr(const detail::MaybeString& str);
-
-  MaybeUIntRange ahb_, axi2_;
-  detail::Optional<uint32_t> debug_port_;
-  detail::non_const_str sim_dev_args_;
-
-  using OptionHandler = bool (HexagonSimulator::*)(string_list&);
-  static std::map<std::string, OptionHandler> opt_map_;
-};
-
-decltype(HexagonSimulator::opt_map_) HexagonSimulator::opt_map_ = {
-    {"--ahbbuspenalty", &HexagonSimulator::HandleAHBBusPenalty},
-    {"--ahbbusratio", &HexagonSimulator::HandleAHBBusRatio},
-    {"--ahb:highaddr", &HexagonSimulator::HandleAHBHighAddr},
-    {"--ahb:lowaddr", &HexagonSimulator::HandleAHBLowAddr},
-    {"--axi2buspenalty", &HexagonSimulator::HandleAXI2BusPenalty},
-    {"--axi2busratio", &HexagonSimulator::HandleAXI2BusRatio},
-    {"--axi2:highaddr", &HexagonSimulator::HandleAXI2HighAddr},
-    {"--axi2:lowaddr", &HexagonSimulator::HandleAXI2LowAddr},
-    {"-b", &HexagonSimulator::HandleBusTrace},
-    {"--build_tag", &HexagonSimulator::HandleBuildTag},
-    {"--buspenalty", &HexagonSimulator::HandleBusPenalty},
-    {"--busratio", &HexagonSimulator::HandleBusRatio},
-    {"--bustrace", &HexagonSimulator::HandleBusTrace},
-    {"--bypass_idle", &HexagonSimulator::HandleBypassIdle},
-    {"--connection_timeout", &HexagonSimulator::HandleConnectionTimeout},
-    {"--coproctrace", &HexagonSimulator::HandleCoprocTrace},
-    {"--coredump", &HexagonSimulator::HandleCoreDump},
-    {"--cosim_file", &HexagonSimulator::HandleCosimFile},
-    {"--dcachetrace", &HexagonSimulator::HandleDCacheTrace},
-    {"--dsp_clock", &HexagonSimulator::HandleDSPClock},
-    {"-E", &HexagonSimulator::HandleSimErr},
-    {"--etm_base", &HexagonSimulator::HandleETMCFGBase},
-    {"--etmcfg_base", &HexagonSimulator::HandleETMCFGBase},
-    {"--gdbserv", &HexagonSimulator::HandleGDBServ},
-    {"-G", &HexagonSimulator::HandleGDBServ},
-    {"--hvx_length", &HexagonSimulator::HandleHVXLength},
-    {"--icachetrace", &HexagonSimulator::HandleICacheTrace},
-    {"-I", &HexagonSimulator::HandleSimIn},
-    {"--l2cachetrace", &HexagonSimulator::HandleL2CacheTrace},
-    {"--l2cfg_base", &HexagonSimulator::HandleL2CFGBase},
-    {"--l2tcm_base", &HexagonSimulator::HandleL2TCMBase},
-    {"--memfill", &HexagonSimulator::HandleMemFill},
-    {"--memfill_rand", &HexagonSimulator::HandleMemFillRand},
-    {"--memtrace", &HexagonSimulator::HandleMemTrace},
-    {"-m", &HexagonSimulator::HandleMemTrace},
-    {"--nullptr", &HexagonSimulator::HandleNullPtr},
-    {"-O", &HexagonSimulator::HandleSimOut},
-    {"--packet_analyze", &HexagonSimulator::HandlePacketAnalyze},
-    {"--pcfilter", &HexagonSimulator::HandlePCFilter},
-    {"--pctrace", &HexagonSimulator::HandlePCTrace},
-    {"--pctrace_min", &HexagonSimulator::HandlePCTraceMin},
-    {"--pctrace_nano", &HexagonSimulator::HandlePCTraceNano},
-    {"-p", &HexagonSimulator::HandleProfile},
-    {"--pmu_statsfile", &HexagonSimulator::HandlePMUStatsFile},
-    {"--profile", &HexagonSimulator::HandleProfile},
-    {"--profile_timezero", &HexagonSimulator::HandleProfileTimeZero},
-    {"-q", &HexagonSimulator::HandleQuiet},
-    {"--quiet", &HexagonSimulator::HandleQuiet},
-    {"--reconnect", &HexagonSimulator::HandleReconnect},
-    {"--rtos", &HexagonSimulator::HandleRTOS},
-    {"-S", &HexagonSimulator::HandleStatsFile},
-    {"--sim_err", &HexagonSimulator::HandleSimErr},
-    {"--sim_in", &HexagonSimulator::HandleSimIn},
-    {"--sim_out", &HexagonSimulator::HandleSimOut},
-    {"--stackstart", &HexagonSimulator::HandleStackStart},
-    {"--stalltrace", &HexagonSimulator::HandleStallTrace},
-    {"--statsfile", &HexagonSimulator::HandleStatsFile},
-    {"--subsystem_base", &HexagonSimulator::HandleSubsystemBase},
-    {"--symfile", &HexagonSimulator::HandleSymFile},
-    {"--tcm", &HexagonSimulator::HandleTCM},
-    {"--tcm:highaddr", &HexagonSimulator::HandleTCMHighAddr},
-    {"--tcm:lowaddr", &HexagonSimulator::HandleTCMLowAddr},
-    {"-t", &HexagonSimulator::HandlePCTrace},
-    {"--timefilter_ns", &HexagonSimulator::HandleTimeFilterNS},
-    {"--timing", &HexagonSimulator::HandleTiming},
-    {"--uarchtrace", &HexagonSimulator::HandleUArchTrace},
-    {"-u", &HexagonSimulator::HandlePCTraceMin},
-    {"--usefs", &HexagonSimulator::HandleUseFS},
-    {"--v2p_translation", &HexagonSimulator::HandleV2PTranslation},
-    {"--verbose", &HexagonSimulator::HandleVerbose},
-};
-
-#define CHECKED_CALL(func, ...)                                                               \
-  do {                                                                                        \
-    HEXAPI_Status s = sim_->func(__VA_ARGS__);                                                \
-    ICHECK_EQ(s, HEX_STAT_SUCCESS)                                                            \
-        << "HexagonSimulator: " #func " failed with code " << HexagonSimulator::to_string(s); \
-  } while (false)
-
-inline HEX_VA_t HexagonSimulator::p2va(const void* p) {
-  uintptr_t u = reinterpret_cast<uintptr_t>(p);
-  HEX_VA_t va = static_cast<HEX_VA_t>(u);
-  ICHECK_EQ(static_cast<uintptr_t>(va), u);
-  return va;
-}
-
-inline void* HexagonSimulator::va2p(HEX_VA_t va) {
-  return reinterpret_cast<void*>(static_cast<uintptr_t>(va));
-}
-
-template <unsigned N, unsigned A>
-constexpr bool is_multiple_of() {
-  return (N / A) * A == N;
-}
-
-std::shared_ptr<Device> CreateHexagonSimulator() {
-  return std::make_shared<HexagonSimulator>(/*enable_queuing=*/true);
-}
-
-template <unsigned N>
-void HexagonSimulator::CopyNToV(HEX_VA_t dst, const void* host_src) {
-  using src_uint_t = typename unalign<typename uint<N>::type>::type;
-  auto* ps = reinterpret_cast<const src_uint_t*>(host_src);
-  ICHECK_EQ(sim_->WriteVirtual(dst, -1u, N, ps->value), HEX_STAT_SUCCESS);
-}
-
-template <unsigned N>
-void HexagonSimulator::CopyNFromV(void* host_dst, HEX_VA_t src) {
-  typename uint<N>::type v;
-  ICHECK_EQ(sim_->ReadVirtual(src, -1u, N, &v), HEX_STAT_SUCCESS);
-
-  using dst_uint_t = typename unalign<typename uint<N>::type>::type;
-  auto* pd = reinterpret_cast<dst_uint_t*>(host_dst);
-  pd->value = v;
-}
-
-void HexagonSimulator::CopyToV(HEX_VA_t dst, const void* host_src, unsigned len) {
-  const uint8_t* src = static_cast<const uint8_t*>(host_src);
-
-  while (len >= 8) {
-    CopyNToV<8>(dst, src);
-    dst += 8;
-    src += 8;
-    len -= 8;
-  }
-  if (len >= 4) {
-    CopyNToV<4>(dst, src);
-    dst += 4;
-    src += 4;
-    len -= 4;
-  }
-  if (len >= 2) {
-    CopyNToV<2>(dst, src);
-    dst += 2;
-    src += 2;
-    len -= 2;
-  }
-  if (len >= 1) {
-    CopyNToV<1>(dst, src);
-    dst++;
-    src++;
-    len--;
-  }
-  ICHECK_EQ(len, 0);
-}
-
-void HexagonSimulator::CopyFromV(void* host_dst, HEX_VA_t src, unsigned len) {
-  uint8_t* dst = static_cast<uint8_t*>(host_dst);
-
-  while (len >= 8) {
-    CopyNFromV<8>(dst, src);
-    dst += 8;
-    src += 8;
-    len -= 8;
-  }
-  if (len >= 4) {
-    CopyNFromV<4>(dst, src);
-    dst += 4;
-    src += 4;
-    len -= 4;
-  }
-  if (len >= 2) {
-    CopyNFromV<2>(dst, src);
-    dst += 2;
-    src += 2;
-    len -= 2;
-  }
-  if (len >= 1) {
-    CopyNFromV<1>(dst, src);
-    dst++;
-    src++;
-    len--;
-  }
-  ICHECK_EQ(len, 0);
-}
-
-void HexagonSimulator::SendMsg(Message& m, const void* data, bool show_dbg) {
-  auto run = [this](bool report_cycles) {
-    HEXAPI_CoreState core = HEX_CORE_RESET;
-    HEX_4u_t result;
-    HEX_8u_t cycles0, cycles1;
-    if (report_cycles) {
-      ICHECK_EQ(sim_->GetSimulatedCycleCount(&cycles0), HEX_STAT_SUCCESS);
-    }
-
-    core = sim_->Run(&result);
-    ICHECK_EQ(core, HEX_CORE_BREAKPOINT);
-    if (report_cycles) {
-      ICHECK_EQ(sim_->GetSimulatedCycleCount(&cycles1), HEX_STAT_SUCCESS);
-      LOG(INFO) << "host: execution took " << (cycles1 - cycles0) << " cycles";
-    }
-  };
-
-  // Send the message request.
-  Message r = {kMsgReq, m.len, 0u};
-  CopyToV(message_buffer_v_, &r, sizeof(r));
-  run(false);
-
-  // Receive the acknowledgement with the address for the payload.
-  CopyFromV(&r, message_buffer_v_, sizeof(r));
-  ICHECK_EQ(r.code, kMsgAck);
-  ICHECK_GE(r.len, m.len);
-
-  // Send the actual message.
-  m.va = r.va;
-  CopyToV(message_buffer_v_, &m, sizeof(m));
-  if (m.len > 0) CopyToV(r.va, data, m.len);
-  run(show_dbg);
-
-  // Receive the return data.
-  CopyFromV(&m, message_buffer_v_, sizeof(m));
-  ICHECK_EQ(m.code, kNone);
-}
-
-HexagonSimulator::HexagonSimulator(bool enable_queuing) {
-  task_queuing_ = enable_queuing;
-
-  // The simulator argument string is in the form:
-  //   <cpu_ver> <optional_arguments>
-  // The optional arguments are seperated with spaces:
-  // Ex: --hvx_length 128 --memfill 0 --timing -m output.txt
-  const char* sim_args_env = std::getenv("HEXAGON_SIM_ARGS");
-  if (sim_args_env == nullptr) sim_args_env = "";
-  auto sim_args_iss = std::istringstream(std::string(sim_args_env));
-  using iterator = std::istream_iterator<std::string>;
-  auto sim_args = string_list(iterator(sim_args_iss), iterator());
-
-  std::string target_str = !sim_args.empty() ? *detail::pop_front(sim_args) : std::string("v66");
-
-  arch_ = target_str;
-  sim_ = detail::make_unique<HexagonWrapper>(detail::non_const_str(target_str));
-  LOG(INFO) << "HexagonSimulator: Core version: " << arch_;
-
-  // Locate the sim_dev binary in PATH, or in the current working directory.
-  std::string sim_dev = "sim_dev";
-  detail::MaybeString path_sim_dev = detail::FindInEnvPath("PATH", sim_dev);
-  if (!path_sim_dev) {
-    if (!detail::FileExists(sim_dev)) {
-      LOG(FATAL) << "Cannot find sim_dev in PATH.";
-    }
-    path_sim_dev = sim_dev;
-  }
-
-  CHECKED_CALL(ConfigureExecutableBinary, path_sim_dev->c_str());
-
-  std::vector<std::string> app_args = {*path_sim_dev};
-  if (char* ev = getenv("ADSP_LIBRARY_PATH")) {
-    app_args.push_back("-L");
-    app_args.push_back(ev);
-  }
-  sim_dev_args_ = detail::non_const_str(app_args);
-  CHECKED_CALL(ConfigureAppCommandLine, sim_dev_args_.size(), sim_dev_args_);
-
-  Configure(sim_args);
-
-  CHECKED_CALL(EndOfConfiguration);
-  CHECKED_CALL(LoadExecutableBinary);
-  CHECKED_CALL(ReadSymbolValue, "dispatch", &dispatch_v_);
-  CHECKED_CALL(ReadSymbolValue, "message_buffer", &message_buffer_v_);
-  CHECKED_CALL(SetBreakpoint, dispatch_v_);
-
-  HEXAPI_CoreState core = HEX_CORE_RESET;
-
-  HEX_4u_t result;
-  core = sim_->Run(&result);
-  if (core != HEX_CORE_BREAKPOINT) {
-    LOG(FATAL) << "HexagonSimulator: Run not stopped on breakpoint, "
-                  "code="
-               << static_cast<int>(core);
-  }
-
-  // At this point the simulator has executed the executable's initialization
-  // code that could have written to the SSR register.
-  // Enable UPCYCLE register.
-  HEX_4u_t thread_num;
-  CHECKED_CALL(GetCurrentHWThreadNum, &thread_num);
-  HEX_4u_t thread_ssr;
-  CHECKED_CALL(ReadThreadRegister, thread_num, TH_REG_SSR, &thread_ssr);
-  thread_ssr |= (1 << 23);
-  CHECKED_CALL(WriteThreadRegister, thread_num, TH_REG_SSR, thread_ssr);
-}
-
-void* HexagonSimulator::Alloc(unsigned size, unsigned align) {
-  LOG(INFO) << "HexagonSimulator::Alloc(size=" << size << ", align=" << align << ')';
-  Message m = {kAlloc, sizeof(MsgAlloc), 0u};
-  MsgAlloc ma = {size, align};
-  SendMsg(m, &ma, true);
-
-  ICHECK_EQ(sizeof(MsgPointer), m.len);
-  MsgPointer mp;
-  CopyFromV(&mp, m.va, m.len);
-
-  LOG(INFO) << "HexagonSimulator::Alloc -> " << std::hex << mp.va << std::dec;
-  ICHECK_NE(mp.va, 0);
-  return va2p(mp.va);
-}
-
-void HexagonSimulator::Free(void* ptr) {
-  LOG(INFO) << "HexagonSimulator::Free(ptr=" << std::hex << ptr << std::dec << ')';
-  if (task_queuing_) {
-    Message mf = {kFlush, 0, 0};
-    SendMsg(mf, nullptr, true);
-  }
-  Message m = {kFree, sizeof(MsgPointer), 0u};
-  MsgPointer mp = {p2va(ptr)};
-  SendMsg(m, &mp, true);
-}
-
-void* HexagonSimulator::AllocVtcm(unsigned size, unsigned align) {
-  LOG(INFO) << "HexagonSimulator::AllocVtcm(size=" << size << ", align=" << align << ')';
-  Message m = {kAllocVtcm, sizeof(MsgAlloc), 0u};
-  MsgAlloc ma = {size, align};
-  SendMsg(m, &ma, true);
-
-  ICHECK_EQ(sizeof(MsgPointer), m.len);
-  MsgPointer mp;
-  CopyFromV(&mp, m.va, m.len);
-
-  LOG(INFO) << "HexagonSimulator::AllocVtcm -> " << std::hex << mp.va << std::dec;
-  ICHECK_NE(mp.va, 0);
-  return va2p(mp.va);
-}
-
-void HexagonSimulator::FreeVtcm(void* ptr) {}
-
-void HexagonSimulator::CopyDeviceToDevice(void* dst, const void* src, unsigned len) {
-  LOG(INFO) << "HexagonSimulator::CopyDeviceToDevice(dst=" << std::hex << dst << ", src=" << src
-            << ", len=" << std::dec << len << ')';
-  ICHECK(dst != nullptr && src != nullptr);
-  Message m = {kCopy, sizeof(MsgCopy), 0u};
-  MsgCopy mc = {p2va(dst), p2va(src), len};
-  SendMsg(m, &mc, true);
-}
-
-void HexagonSimulator::CopyDeviceToHost(void* host_dst, const void* src, unsigned len) {
-  LOG(INFO) << "HexagonSimulator::CopyDeviceToHost(host_dst=" << host_dst << ", src=" << src
-            << ", len=" << len << ')';
-  if (task_queuing_) {
-    Message mf = {kFlush, 0, 0};
-    SendMsg(mf, nullptr, true);
-  }
-  CopyFromV(host_dst, p2va(src), len);
-}
-
-void HexagonSimulator::CopyHostToDevice(void* dst, const void* host_src, unsigned len) {
-  LOG(INFO) << "HexagonSimulator::CopyHostToDevice(dst=" << dst << ", host_src=" << host_src
-            << ", len=" << len << ')';
-  CopyToV(p2va(dst), host_src, len);
-}
-
-void* HexagonSimulator::Load(const std::string& data, const std::string& fmt) {
-  // Load the shared library.
-  Message m = {kLoad, static_cast<uint32_t>(data.size() + 1), 0u};
-  SendMsg(m, data.c_str(), false);
-
-  ICHECK_EQ(sizeof(MsgPointer), m.len);
-  MsgPointer mp;
-  CopyFromV(&mp, m.va, sizeof(mp));
-
-  return va2p(mp.va);
-}
-
-void HexagonSimulator::Unload(void* mod) {
-  ICHECK(mod);
-  Message m = {kUnload, sizeof(MsgPointer), 0u};
-  MsgPointer mp = {p2va(mod)};
-  SendMsg(m, &mp, false);
-}
-
-void* HexagonSimulator::Resolve(const std::string& sym) {
-  LOG(INFO) << "HexagonSimulator::Resolve(sym=" << sym << ')';
-  Message m = {kResolve, static_cast<uint32_t>(sym.size() + 1), 0u};
-  SendMsg(m, sym.c_str(), true);
-
-  ICHECK_EQ(sizeof(MsgPointer), m.len);
-  MsgPointer mp;
-  CopyFromV(&mp, m.va, sizeof(mp));
-
-  LOG(INFO) << "HexagonSimulator::Resolve -> " << std::hex << mp.va << std::dec;
-  return va2p(mp.va);
-}
-
-void HexagonSimulator::Call(void* func, uint32_t* scalar, unsigned sc_num, uint32_t* stack,
-                            unsigned st_num) {
-  LOG(INFO) << "HexagonSimulator::Call(func=" << std::hex << func << ", scalar=" << scalar
-            << ", sc_num=" << std::dec
-            << sc_num
-            // NOLINTNEXTLINE(build/include_what_you_use)
-            << ", stack=" << std::hex << stack << ", st_num=" << std::dec << st_num;
-
-  std::vector<uint32_t> data;
-
-  // Copy the MsgCall contents into the data vector as a sequence of uints.
-  MsgCall me = {p2va(func), sc_num, st_num};
-
-  ICHECK((is_multiple_of<sizeof(MsgCall), sizeof(uint32_t)>()));
-  for (unsigned i = 0, e = sizeof(me) / sizeof(uint32_t); i != e; ++i)
-    data.push_back(reinterpret_cast<uint32_t*>(&me)[i]);
-
-  // Append the scalar (register) arguments.
-  for (unsigned i = 0; i != sc_num; ++i) data.push_back(scalar[i]);
-  // Append the stack contents.
-  for (unsigned i = 0; i != st_num; ++i) data.push_back(stack[i]);
-
-  std::ostringstream log_data;
-  log_data << "data: {" << std::hex;
-  for (unsigned i = 0, e = static_cast<uint32_t>(data.size()); i != e; ++i) {
-    log_data << ' ' << reinterpret_cast<uint32_t*>(data.data())[i];
-  }
-  log_data << std::dec << " }" << std::flush;
-  LOG(INFO) << log_data.str();
-
-  Message m = {kCall, static_cast<uint32_t>(data.size() * sizeof(uint32_t)), 0u};
-  SendMsg(m, data.data(), true);
-
-  if (!task_queuing_) {
-    Message mf = {kFlush, 0, 0};
-    SendMsg(mf, nullptr, true);
-  }
-
-  std::vector<uint8_t> rv(m.len);
-  CopyFromV(rv.data(), m.va, m.len);
-
-  std::ostringstream log_rv;
-  log_rv << "HexagonSimulator::Call -> {" << std::hex;
-  for (unsigned i = 0, e = std::min<unsigned>(rv.size(), 4u); i != e; ++i) {
-    log_rv << ' ' << std::setw(2) << std::setfill('0') << static_cast<uint32_t>(rv[i]);
-  }
-  if (rv.size() > 4) log_rv << "...";
-  log_rv << std::dec << " }";
-  LOG(INFO) << log_rv.str();
-}
-
-bool HexagonSimulator::Configure(string_list& opts) {
-  while (!opts.empty()) {
-    std::string key = *detail::pop_front(opts);
-    auto f = opt_map_.find(key);
-    if (f == opt_map_.end()) {
-      LOG(FATAL) << "Unrecognized simulator option: " << key;
-      // unreachable
-    }
-    ICHECK((this->*f->second)(opts)) << "error handling option: " << key;
-  }
-
-  // Check AHB.
-  if (ahb_.first.has_value() && ahb_.second.has_value()) {
-    CHECKED_CALL(ConfigureAHB, *ahb_.first, *ahb_.second);
-  } else {
-    ICHECK(!ahb_.first.has_value() && !ahb_.second.has_value())
-        << "HexagonSimulator: please specify both low and high addresses "
-           "for AHB";
-  }
-
-  // Check AXI2.
-  if (axi2_.first.has_value() && axi2_.second.has_value()) {
-    CHECKED_CALL(ConfigureAXI2, *axi2_.first, *axi2_.second);
-  } else {
-    ICHECK(!axi2_.first.has_value() && !axi2_.second.has_value())
-        << "HexagonSimulator: please specify both low and high addresses "
-           "for AXI2";
-  }
-
-  return true;
-}
-
-bool HexagonSimulator::HandleAHBBusPenalty(string_list& rest) {
-  auto penalty = detail::to_uint(detail::pop_front(rest));
-  auto interval = to_interval(detail::pop_front(rest));
-  if (penalty && interval) {
-    CHECKED_CALL(ConfigureAHBBusPenalty, *penalty, *interval);
-  }
-  return static_cast<bool>(penalty) && static_cast<bool>(interval);
-}
-
-bool HexagonSimulator::HandleAHBBusRatio(string_list& rest) {
-  auto ratio = detail::to_float(detail::pop_front(rest));
-  if (ratio) {
-    CHECKED_CALL(ConfigureAHBBusRatio, *ratio);
-  }
-  return static_cast<bool>(ratio);
-}
-
-bool HexagonSimulator::HandleAHBHighAddr(string_list& rest) {
-  auto addr = detail::to_uint(detail::pop_front(rest));
-  ICHECK(addr) << "HexagonSimulator: invalid value for AHB high adddress";
-  if (addr) {
-    ahb_.second = *addr;
-  }
-  return static_cast<bool>(addr);
-}
-
-bool HexagonSimulator::HandleAHBLowAddr(string_list& rest) {
-  auto addr = detail::to_uint(detail::pop_front(rest));
-  ICHECK(addr) << "HexagonSimulator: invalid value for AHB low adddress";
-  if (addr) {
-    ahb_.first = *addr;
-  }
-  return static_cast<bool>(addr);
-}
-
-bool HexagonSimulator::HandleAXI2BusPenalty(string_list& rest) {
-  auto penalty = detail::to_uint(detail::pop_front(rest));
-  auto interval = to_interval(detail::pop_front(rest));
-  if (penalty && interval) {
-    CHECKED_CALL(ConfigureAXI2BusPenalty, *penalty, *interval);
-  }
-  return static_cast<bool>(penalty) && static_cast<bool>(interval);
-}
-
-bool HexagonSimulator::HandleAXI2BusRatio(string_list& rest) {
-  auto ratio = detail::to_float(detail::pop_front(rest));
-  if (ratio) {
-    CHECKED_CALL(ConfigureAXI2BusRatio, *ratio);
-  }
-  return static_cast<bool>(ratio);
-}
-
-bool HexagonSimulator::HandleAXI2HighAddr(string_list& rest) {
-  auto addr = detail::to_uint(detail::pop_front(rest));
-  ICHECK(addr) << "HexagonSimulator: invalid value for AXI2 high adddress";
-  if (addr) {
-    axi2_.second = *addr;
-  }
-  return static_cast<bool>(addr);
-}
-
-bool HexagonSimulator::HandleAXI2LowAddr(string_list& rest) {
-  auto addr = detail::to_uint(detail::pop_front(rest));
-  ICHECK(addr) << "HexagonSimulator: invalid value for AXI2 low adddress";
-  if (addr) {
-    axi2_.first = *addr;
-  }
-  return static_cast<bool>(addr);
-}
-
-bool HexagonSimulator::HandleBuildTag(string_list& rest) {
-  sim_->PrintBuildTag();
-  return true;
-}
-
-bool HexagonSimulator::HandleBusPenalty(string_list& rest) {
-  auto penalty = detail::to_uint(detail::pop_front(rest));
-  auto interval = to_interval(detail::pop_front(rest));
-  if (penalty && interval) {
-    CHECKED_CALL(ConfigureBusPenalty, *penalty, *interval);
-  }
-  return static_cast<bool>(penalty) && static_cast<bool>(interval);
-}
-
-bool HexagonSimulator::HandleBusRatio(string_list& rest) {
-  auto ratio = detail::to_float(detail::pop_front(rest));
-  if (ratio) {
-    CHECKED_CALL(ConfigureBusRatio, *ratio);
-  }
-  return static_cast<bool>(ratio);
-}
-
-bool HexagonSimulator::HandleBusTrace(string_list& rest) {
-  auto file = detail::pop_front(rest);
-  if (file) {
-    CHECKED_CALL(SetTracing, HEX_TRACE_BUS, file->c_str());
-  }
-  return static_cast<bool>(file);
-}
-
-bool HexagonSimulator::HandleBypassIdle(string_list& rest) {
-  CHECKED_CALL(ConfigureBypassIdle, true);
-  return true;
-}
-
-bool HexagonSimulator::HandleConnectionTimeout(string_list& rest) {
-  auto time = detail::to_int(detail::pop_front(rest));
-  if (time) {
-    CHECKED_CALL(ConfigureConnectionTimeout, *time);
-  }
-  return static_cast<bool>(time);
-}
-
-bool HexagonSimulator::HandleCoprocTrace(string_list& rest) {
-  auto file = detail::pop_front(rest);
-  if (file) {
-    CHECKED_CALL(SetTracing, HEX_TRACE_COPROC, file->c_str());
-  }
-  return static_cast<bool>(file);
-}
-
-bool HexagonSimulator::HandleCoreDump(string_list& rest) {
-  auto file = detail::pop_front(rest);
-  if (file) {
-    CHECKED_CALL(ConfigureCoreDump, file->c_str());
-  }
-  return static_cast<bool>(file);
-}
-
-bool HexagonSimulator::HandleCosimFile(string_list& rest) {
-  auto file = detail::pop_front(rest);
-  if (file) {
-    CHECKED_CALL(ConfigureCosim, file->c_str());
-  }
-  return static_cast<bool>(file);
-}
-
-bool HexagonSimulator::HandleDCacheTrace(string_list& rest) {
-  auto file = detail::pop_front(rest);
-  if (file) {
-    CHECKED_CALL(SetTracing, HEX_TRACE_DCACHE, file->c_str());
-  }
-  return static_cast<bool>(file);
-}
-
-bool HexagonSimulator::HandleDSPClock(string_list& rest) {
-  auto freq = detail::to_uint(detail::pop_front(rest));
-  if (freq) {
-    CHECKED_CALL(ConfigureCoreFrequency, *freq);
-  }
-  return static_cast<bool>(freq);
-}
-
-bool HexagonSimulator::HandleETMCFGBase(string_list& rest) {
-  auto base = detail::to_uint(detail::pop_front(rest));
-  if (base) {
-    CHECKED_CALL(ConfigureEtmcfgBase, *base);
-  }
-  return static_cast<bool>(base);
-}
-
-bool HexagonSimulator::HandleGDBServ(string_list& rest) {
-  auto port = detail::to_uint(detail::pop_front(rest));
-  if (port) {
-    CHECKED_CALL(ConfigureRemoteDebug, *port);
-    debug_port_ = *port;
-  }
-  return static_cast<bool>(port);
-}
-
-bool HexagonSimulator::HandleHVXLength(string_list& rest) {
-  auto len = detail::to_int(detail::pop_front(rest));
-  if (len) {
-    CHECKED_CALL(ConfigureHVXLength, *len);
-  }
-  return static_cast<bool>(len);
-}
-
-bool HexagonSimulator::HandleICacheTrace(string_list& rest) {
-  auto file = detail::pop_front(rest);
-  if (file) {
-    CHECKED_CALL(SetTracing, HEX_TRACE_ICACHE, file->c_str());
-  }
-  return static_cast<bool>(file);
-}
-
-bool HexagonSimulator::HandleL2CacheTrace(string_list& rest) {
-  auto file = detail::pop_front(rest);
-  if (file) {
-    CHECKED_CALL(SetTracing, HEX_TRACE_L2CACHE, file->c_str());
-  }
-  return static_cast<bool>(file);
-}
-
-bool HexagonSimulator::HandleL2CFGBase(string_list& rest) {
-  auto base = detail::to_uint(detail::pop_front(rest));
-  if (base) {
-    CHECKED_CALL(ConfigureL2cfgBase, *base);
-  }
-  return static_cast<bool>(base);
-}
-
-bool HexagonSimulator::HandleL2TCMBase(string_list& rest) {
-  auto base = detail::to_uint(detail::pop_front(rest));
-  if (base) {
-    CHECKED_CALL(ConfigureL2tcmBase, *base);
-  }
-  return static_cast<bool>(base);
-}
-
-bool HexagonSimulator::HandleMemFillRand(string_list& rest) {
-  auto seed = detail::to_uint(detail::pop_front(rest));
-  if (seed) {
-    CHECKED_CALL(ConfigureMemFillRandom, *seed);
-  }
-  return static_cast<bool>(seed);
-}
-
-bool HexagonSimulator::HandleMemFill(string_list& rest) {
-  auto val = detail::to_uint(detail::pop_front(rest));
-  if (val) {
-    CHECKED_CALL(ConfigureMemFill, *val);
-  }
-  return static_cast<bool>(val);
-}
-
-bool HexagonSimulator::HandleMemTrace(string_list& rest) {
-  auto file = detail::pop_front(rest);
-  if (file) {
-    CHECKED_CALL(SetTracing, HEX_TRACE_MEM, file->c_str());
-  }
-  return static_cast<bool>(file);
-}
-
-bool HexagonSimulator::HandleNullPtr(string_list& rest) {
-  auto behavior = to_nullptr(detail::pop_front(rest));
-  if (behavior) {
-    CHECKED_CALL(ConfigureNULLPointerBehavior, *behavior);
-  }
-  return static_cast<bool>(behavior);
-}
-
-bool HexagonSimulator::HandlePacketAnalyze(string_list& rest) {
-  auto file = detail::pop_front(rest);
-  if (file) {
-    CHECKED_CALL(ConfigurePacketAnalysis, file->c_str());
-  }
-  return static_cast<bool>(file);
-}
-
-bool HexagonSimulator::HandlePCFilter(string_list& rest) {
-  auto range = detail::to_range<uint64_t, detail::to_uint>(detail::pop_front(rest));
-  if (range) {
-    CHECKED_CALL(ConfigurePCRangeFilter, range->first, range->second);
-  }
-  return static_cast<bool>(range);
-}
-
-bool HexagonSimulator::HandlePCTraceMin(string_list& rest) {
-  auto file = detail::pop_front(rest);
-  if (file) {
-    CHECKED_CALL(SetTracing, HEX_TRACE_PC_MIN, file->c_str());
-  }
-  return static_cast<bool>(file);
-}
-
-bool HexagonSimulator::HandlePCTraceNano(string_list& rest) {
-  auto file = detail::pop_front(rest);
-  if (file) {
-    CHECKED_CALL(SetTracing, HEX_TRACE_PC_NANO, file->c_str());
-  }
-  return static_cast<bool>(file);
-}
-
-bool HexagonSimulator::HandlePCTrace(string_list& rest) {
-  auto file = detail::pop_front(rest);
-  if (file) {
-    CHECKED_CALL(SetTracing, HEX_TRACE_PC, file->c_str());
-  }
-  return static_cast<bool>(file);
-}
-
-bool HexagonSimulator::HandlePMUStatsFile(string_list& rest) {
-  auto file = detail::pop_front(rest);
-  if (file) {
-    CHECKED_CALL(ConfigurePmuStatisticsFile, file->c_str());
-  }
-  return static_cast<bool>(file);
-}
-
-bool HexagonSimulator::HandleProfile(string_list& rest) {
-  auto path = detail::pop_front(rest);
-  if (path) {
-    CHECKED_CALL(ConfigureGProf, path->c_str());
-  }
-  return static_cast<bool>(path);
-}
-
-bool HexagonSimulator::HandleProfileTimeZero(string_list& rest) {
-  auto timezero = detail::to_bool(detail::pop_front(rest));
-  if (timezero) {
-    CHECKED_CALL(ConfigureProfileMode, *timezero);
-  }
-  return static_cast<bool>(timezero);
-}
-
-bool HexagonSimulator::HandleQuiet(string_list& rest) {
-  sim_->VerboseMode(HEX_QUIET);
-  return true;
-}
-
-bool HexagonSimulator::HandleReconnect(string_list& rest) {
-  if (!debug_port_) {
-    LOG(FATAL) << "Reconnect error: --reconnect must be specified "
-                  "AFTER --gdbserv <port_num>";
-  }
-  CHECKED_CALL(ConfigureRemoteDebug, *debug_port_, true);
-  return true;
-}
-
-bool HexagonSimulator::HandleRTOS(string_list& rest) {
-  auto file = detail::pop_front(rest);
-  if (file) {
-    CHECKED_CALL(ConfigureOSAwareness, file->c_str());
-  }
-  return static_cast<bool>(file);
-}
-
-bool HexagonSimulator::HandleSimErr(string_list& rest) {
-  auto file = detail::pop_front(rest);
-  if (file) {
-    CHECKED_CALL(ConfigureSimStderr, file->c_str());
-  }
-  return static_cast<bool>(file);
-}
-
-bool HexagonSimulator::HandleSimIn(string_list& rest) {
-  auto file = detail::pop_front(rest);
-  if (file) {
-    CHECKED_CALL(ConfigureSimStdin, file->c_str());
-  }
-  return static_cast<bool>(file);
-}
-
-bool HexagonSimulator::HandleSimOut(string_list& rest) {
-  auto file = detail::pop_front(rest);
-  if (file) {
-    CHECKED_CALL(ConfigureSimStdout, file->c_str());
-  }
-  return static_cast<bool>(file);
-}
-
-bool HexagonSimulator::HandleStackStart(string_list& rest) {
-  auto base = detail::to_uint(detail::pop_front(rest));
-  auto size = detail::to_uint(detail::pop_front(rest));
-  if (base && size) {
-    CHECKED_CALL(ConfigureStackInfo, *base, *size);
-  }
-  return static_cast<bool>(base) && static_cast<bool>(size);
-}
-
-bool HexagonSimulator::HandleStallTrace(string_list& rest) {
-  auto file = detail::pop_front(rest);
-  if (file) {
-    CHECKED_CALL(SetTracing, HEX_TRACE_STALL, file->c_str());
-  }
-  return static_cast<bool>(file);
-}
-
-bool HexagonSimulator::HandleStatsFile(string_list& rest) {
-  auto file = detail::pop_front(rest);
-  if (file) {
-    CHECKED_CALL(ConfigureStatisticsFile, file->c_str());
-  }
-  return static_cast<bool>(file);
-}
-
-bool HexagonSimulator::HandleSubsystemBase(string_list& rest) {
-  auto base = detail::to_uint(detail::pop_front(rest));
-  if (base) {
-    CHECKED_CALL(ConfigureSubsystemBase, *base);
-  }
-  return static_cast<bool>(base);
-}
-
-bool HexagonSimulator::HandleSymFile(string_list& rest) {
-  auto file = detail::pop_front(rest);
-  if (file) {
-    CHECKED_CALL(AddSymbolFile, file->c_str());
-  }
-  return static_cast<bool>(file);
-}
-
-bool HexagonSimulator::HandleTCM(string_list& rest) {
-  CHECKED_CALL(ConfigureTimingMode, HEX_TIMING);
-  return true;
-}
-
-bool HexagonSimulator::HandleTCMHighAddr(string_list& rest) {
-  // This option takes an argument, but (the option) is ignored.
-  auto addr = detail::to_uint(detail::pop_front(rest));
-  return static_cast<bool>(addr);
-}
-
-bool HexagonSimulator::HandleTCMLowAddr(string_list& rest) {
-  auto addr = detail::to_uint(detail::pop_front(rest));
-  if (addr) {
-    CHECKED_CALL(ConfigureTCM, *addr);
-  }
-  return static_cast<bool>(addr);
-}
-
-bool HexagonSimulator::HandleTimeFilterNS(string_list& rest) {
-  auto range = detail::to_range<uint64_t, detail::to_uint>(detail::pop_front(rest));
-  if (range) {
-    CHECKED_CALL(ConfigureTimeRangeFilter, range->first, HEX_NANOSEC, range->second, HEX_NANOSEC);
-  }
-  return static_cast<bool>(range);
-}
-
-bool HexagonSimulator::HandleTiming(string_list& rest) {
-  HEXAPI_TimingMode timing_mode = HEX_TIMING;
-  // The argument to --timing is optional.
-  if (should_parse_next(rest)) {
-    if (auto mode = to_timingmode(detail::pop_front(rest))) {
-      timing_mode = *mode;
-    } else {
-      return false;
-    }
-  }
-  CHECKED_CALL(ConfigureTimingMode, timing_mode);
-  return true;
-}
-
-bool HexagonSimulator::HandleUArchTrace(string_list& rest) {
-  auto file = detail::pop_front(rest);
-  if (file) {
-    CHECKED_CALL(SetTracing, HEX_TRACE_UARCH, file->c_str());
-  }
-  return static_cast<bool>(file);
-}
-
-bool HexagonSimulator::HandleUseFS(string_list& rest) {
-  auto file = detail::pop_front(rest);
-  if (file) {
-    CHECKED_CALL(ConfigureARFilesystem, detail::non_const_str(*file));
-  }
-  return static_cast<bool>(file);
-}
-
-bool HexagonSimulator::HandleV2PTranslation(string_list& rest) {
-  auto enable = detail::to_bool(detail::pop_front(rest));
-  if (enable) {
-    CHECKED_CALL(EnableVirtualToPhysicalTranslation, *enable);
-  }
-  return static_cast<bool>(enable);
-}
-
-bool HexagonSimulator::HandleVerbose(string_list& rest) {
-  auto mode = to_verbosemode(detail::pop_front(rest));
-  if (mode) {
-    sim_->VerboseMode(*mode);
-  }
-  return static_cast<bool>(mode);
-}
-
-bool HexagonSimulator::should_parse_next(const string_list& rest) {
-  if (auto str = detail::front(rest)) {
-    return str->empty() || str->front() != '-';
-  }
-  return false;
-}
-
-detail::Optional<HEXAPI_Interval> HexagonSimulator::to_interval(const detail::MaybeString& str) {
-  auto none = detail::Optional<HEXAPI_Interval>();
-  if (!str) return none;
-
-  if (auto val = detail::to_int(*str)) {
-    switch (*val) {
-      case HEX_MILLISEC:
-      case HEX_MICROSEC:
-      case HEX_NANOSEC:
-      case HEX_PICOSEC:
-      case HEX_PCYCLE:
-        return static_cast<HEXAPI_Interval>(*val);
-    }
-  }
-
-  return detail::StringSwitch<detail::Optional<HEXAPI_Interval>>(*str)
-      .Case("MILLISEC", HEX_MILLISEC)
-      .Case("MICROSEC", HEX_MICROSEC)
-      .Case("NANOSEC", HEX_NANOSEC)
-      .Case("PICOSEC", HEX_PICOSEC)
-      .Case("PCYCLE", HEX_PCYCLE)
-      .Default(none);
-}
-
-detail::Optional<HEXAPI_TimingMode> HexagonSimulator::to_timingmode(
-    const detail::MaybeString& str) {
-  auto none = detail::Optional<HEXAPI_TimingMode>();
-  if (!str) return none;
-
-  if (auto val = detail::to_int(*str)) {
-    switch (*val) {
-      case HEX_NOTIMING:
-      case HEX_TIMING_NODBC:
-      case HEX_TIMING:
-      case HEX_TIMING_COHERENCY:
-        return static_cast<HEXAPI_TimingMode>(*val);
-    }
-  }
-
-  return detail::StringSwitch<detail::Optional<HEXAPI_TimingMode>>(*str)
-      .Case("NOTIMING", HEX_NOTIMING)
-      .Case("TIMING_NODBC", HEX_TIMING_NODBC)
-      .Case("TIMING", HEX_TIMING)
-      .Case("TIMING_COHERENCY", HEX_TIMING_COHERENCY)
-      .Default(none);
-}
-
-detail::Optional<HEXAPI_VerboseMode> HexagonSimulator::to_verbosemode(
-    const detail::MaybeString& str) {
-  auto none = detail::Optional<HEXAPI_VerboseMode>();
-  if (!str) return none;
-
-  if (auto val = detail::to_int(*str)) {
-    switch (*val) {
-      case HEX_SILENT:
-      case HEX_QUIET:
-      case HEX_NORMAL:
-      case HEX_VERBOSE:
-      case HEX_REALLY_VERBOSE:
-        return static_cast<HEXAPI_VerboseMode>(*val);
-    }
-  }
-
-  return detail::StringSwitch<detail::Optional<HEXAPI_VerboseMode>>(*str)
-      .Case("SILENT", HEX_SILENT)
-      .Case("QUIET", HEX_QUIET)
-      .Case("NORMAL", HEX_NORMAL)
-      .Case("VERBOSE", HEX_VERBOSE)
-      .Case("REALLY_VERBOSE", HEX_REALLY_VERBOSE)
-      .Default(none);
-}
-
-detail::Optional<HEXAPI_Nullptr> HexagonSimulator::to_nullptr(const detail::MaybeString& str) {
-  auto none = detail::Optional<HEXAPI_Nullptr>();
-  if (!str) return none;
-
-  if (auto val = detail::to_int(*str)) {
-    switch (*val) {
-      case HEX_NULLPTR_IGNORE:
-      case HEX_NULLPTR_WARN:
-      case HEX_NULLPTR_FATAL:
-      case HEX_NULLPTR_PCZERO:
-        return static_cast<HEXAPI_Nullptr>(*val);
-    }
-  }
-
-  return detail::StringSwitch<detail::Optional<HEXAPI_Nullptr>>(*str)
-      .Case("IGNORE", HEX_NULLPTR_IGNORE)
-      .Case("WARN", HEX_NULLPTR_WARN)
-      .Case("FATAL", HEX_NULLPTR_FATAL)
-      .Case("PCZERO", HEX_NULLPTR_PCZERO)
-      .Default(none);
-}
-
-std::string HexagonSimulator::to_string(HEXAPI_Status status) {
-  switch (status) {
-    case HEX_STAT_ERROR:
-      return "ERROR";
-    case HEX_STAT_SUCCESS:
-      return "SUCCESS";
-    case HEX_STAT_CANNOT_CONFIG:
-      return "CANNOT_CONFIG";
-    case HEX_STAT_INVALID_ARGS:
-      return "INVALID_ARGS";
-    case HEX_STAT_RANGE_ERROR:
-      return "RANGE_ERROR";
-    case HEX_STAT_FILE_ACCESS_ERROR:
-      return "FILE_ACCESS_ERROR";
-    case HEX_STAT_DEVICE_NOT_FOUND:
-      return "DEVICE_NOT_FOUND";
-    case HEX_STAT_MEM_ACCESS_ERROR:
-      return "MEM_ACCESS_ERROR";
-    case HEX_STAT_CANNOT_TRANSLATE:
-      return "CANNOT_TRANSLATE";
-    case HEX_STAT_NO_ACTIVE_THREADS:
-      return "NO_ACTIVE_THREADS";
-    case HEX_STAT_LOAD_ELF_ERROR:
-      return "LOAD_ELF_ERROR";
-    case HEX_STAT_CORE_RESET:
-      return "CORE_RESET";
-    default:
-      return "unknown";
-  }
-}
-
-}  // namespace hexagon
-}  // namespace runtime
-}  // namespace tvm
diff --git a/src/runtime/hexagon/android/sim/hexagon_sim_proto.h b/src/runtime/hexagon/android/sim/hexagon_sim_proto.h
deleted file mode 100644
index 8887526232..0000000000
--- a/src/runtime/hexagon/android/sim/hexagon_sim_proto.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifndef TVM_RUNTIME_HEXAGON_ANDROID_SIM_HEXAGON_SIM_PROTO_H_
-#define TVM_RUNTIME_HEXAGON_ANDROID_SIM_HEXAGON_SIM_PROTO_H_
-
-// Protocol:
-
-// Host >-- [ code:MsgReq,  len:amount requested, va:_       ] --> Remote
-// Host <-- [ code:MsqAck,  len:amount provided,  va:address ] --< Remote
-// Host >-- [ code:message, len:payload length,   va:address ] --> Remote
-// Host <-- [ code:None,    len:response length,  va:address ] --< Remote
-
-enum : uint32_t {
-  kNone,
-  kMsgReq,
-  kMsgAck,
-  kAlloc,
-  kFree,
-  kCopy,
-  kLoad,
-  kUnload,
-  kResolve,
-  kCall,
-  kFlush,
-  kAllocVtcm
-};
-
-struct Message {
-  uint32_t code;
-  uint32_t len;
-  uint32_t va;
-} __attribute__((packed));
-
-struct MsgAlloc {
-  uint32_t size;
-  uint32_t align;
-} __attribute__((packed));
-
-struct MsgPointer {
-  uint32_t va;
-} __attribute__((packed));
-
-struct MsgCopy {
-  uint32_t dst;
-  uint32_t src;
-  uint32_t len;
-} __attribute__((packed));
-
-struct MsgCall {
-  uint32_t func_va;     // offset:  0
-  uint32_t scalar_num;  //          4
-  uint32_t stack_num;   //          8
-  uint32_t data[];      //         12
-} __attribute__((packed));
-
-#endif  // TVM_RUNTIME_HEXAGON_ANDROID_SIM_HEXAGON_SIM_PROTO_H_
diff --git a/src/runtime/hexagon/android/target/fastrpc/CMakeLists.txt b/src/runtime/hexagon/android/target/fastrpc/CMakeLists.txt
deleted file mode 100644
index 2c9a09f149..0000000000
--- a/src/runtime/hexagon/android/target/fastrpc/CMakeLists.txt
+++ /dev/null
@@ -1,173 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-cmake_minimum_required(VERSION 3.2)
-project(HexagonIDL C CXX)
-
-if(NOT "${FASTRPC_LIBS}" STREQUAL "SKEL" AND
-   NOT "${FASTRPC_LIBS}" STREQUAL "STUB")
-  message(SEND_ERROR "Please set FASTRPC_LIBS to either SKEL or STUB")
-endif()
-
-include(../../../../../../cmake/utils/Utils.cmake)
-include(../../../../../../cmake/modules/HexagonSDK.cmake)
-
-get_hexagon_sdk_property("${HEXAGON_SDK_ROOT}" "${HEXAGON_ARCH}"
-  SDK_INCLUDE  SDK_INCLUDE_DIRS
-  QURT_INCLUDE QURT_INCLUDE_DIRS
-  DSPRPC_LIB   DSPRPC_LIB_DIRS
-  RPCMEM_ROOT  RPCMEM_ROOT_DIR
-  QAIC_EXE     QAIC_EXE_PATH
-)
-if(NOT SDK_INCLUDE_DIRS OR NOT QURT_INCLUDE_DIRS OR NOT DSPRPC_LIB_DIRS OR
-   NOT RPCMEM_ROOT_DIR OR NOT QAIC_EXE_PATH)
-  message(WARNING "Could not locate some Hexagon SDK components")
-endif()
-
-include_directories(include)
-include_directories(SYSTEM ${SDK_INCLUDE_DIRS})
-
-foreach(INCDIR IN LISTS SDK_INCLUDE_DIRS)
-  list(APPEND QAIC_FLAGS "-I${INCDIR}")
-endforeach()
-
-set(FASTRPC_SRC "${CMAKE_CURRENT_SOURCE_DIR}")
-set(CMAKE_SKIP_RPATH TRUE)
-
-# Qaic for the non-domain header.
-#
-# Don't add paths to these filenames, or otherwise cmake may spontaneously
-# add -o option to the qaic invocation (with an undesirable path).
-set(TVM_REMOTE_ND_IDL "tvm_remote_nd.idl")
-set(TVM_REMOTE_ND_H "tvm_remote_nd.h")
-set(TVM_REMOTE_ND_SKEL_C "tvm_remote_nd_skel.c")
-set(TVM_REMOTE_ND_STUB_C "tvm_remote_nd_stub.c")
-
-add_custom_command(
-  OUTPUT ${TVM_REMOTE_ND_SKEL_C} ${TVM_REMOTE_ND_STUB_C}
-         "${FASTRPC_SRC}/include/${TVM_REMOTE_ND_H}"
-  COMMAND ${QAIC_EXE_PATH} ${QAIC_FLAGS}
-          "${FASTRPC_SRC}/include/${TVM_REMOTE_ND_IDL}"
-  COMMAND ${CMAKE_COMMAND} -E rename "${TVM_REMOTE_ND_H}"
-          "${FASTRPC_SRC}/include/${TVM_REMOTE_ND_H}"
-  MAIN_DEPENDENCY "${FASTRPC_SRC}/include/${TVM_REMOTE_ND_IDL}"
-)
-
-# Qaic for the domain header.
-#
-# Don't add paths to these filenames, or otherwise cmake may spontaneously
-# add -o option to the qaic invocation (with an undesirable path).
-set(TVM_REMOTE_D_IDL "tvm_remote.idl")
-set(TVM_REMOTE_D_H "tvm_remote.h")
-set(TVM_REMOTE_D_SKEL_C "tvm_remote_skel.c")
-set(TVM_REMOTE_D_STUB_C "tvm_remote_stub.c")
-
-add_custom_command(
-  OUTPUT ${TVM_REMOTE_D_SKEL_C} ${TVM_REMOTE_D_STUB_C}
-         "${FASTRPC_SRC}/include/${TVM_REMOTE_D_H}"
-  COMMAND ${QAIC_EXE_PATH} ${QAIC_FLAGS}
-          "${FASTRPC_SRC}/include/${TVM_REMOTE_D_IDL}"
-  COMMAND ${CMAKE_COMMAND} -E rename "${TVM_REMOTE_D_H}"
-          "${FASTRPC_SRC}/include/${TVM_REMOTE_D_H}"
-  MAIN_DEPENDENCY "${FASTRPC_SRC}/include/${TVM_REMOTE_D_IDL}"
-)
-
-
-if("${FASTRPC_LIBS}" STREQUAL "SKEL")
-  # Skel libraries.
-  #
-  include_directories(SYSTEM ${QURT_INCLUDE_DIRS})
-
-  # Extra compile flags (both C and C++).
-  set(EXTRA_COMP_FLAGS
-    "-O3"
-    "-m${HEXAGON_ARCH}"
-  )
-  string(REGEX REPLACE ";" " " EXTRA_COMP_FLAGS_STR "${EXTRA_COMP_FLAGS}")
-  set(CMAKE_C_FLAGS "${EXTRA_COMP_FLAGS_STR} ${CMAKE_C_FLAGS}")
-  set(CMAKE_CXX_FLAGS "${EXTRA_COMP_FLAGS_STR} ${CMAKE_CXX_FLAGS}")
-
-  set(EXTRA_LINK_FLAGS
-    "-Wl,--no-threads"
-    "-Wl,--wrap=malloc"
-    "-Wl,--wrap=calloc"
-    "-Wl,--wrap=free"
-    "-Wl,--wrap=realloc"
-    "-Wl,--wrap=memalign"
-    "-Wl,--wrap=posix_memalign"
-    "-Wl,--wrap=__stack_chk_fail"
-  )
-  string(REGEX REPLACE ";" " " EXTRA_LINK_FLAGS_STR "${EXTRA_LINK_FLAGS}")
-
-  set(SKEL_ND_SRCS
-    "src/tvm_hvx.cc"
-    "src/tvm_remote_nd_imp.cc"
-  )
-  add_library(tvm_remote_nd_skel SHARED
-    "${FASTRPC_SRC}/include/${TVM_REMOTE_ND_H}"
-    "${TVM_REMOTE_ND_SKEL_C}"
-    "${SKEL_ND_SRCS}"
-  )
-
-  set(SKEL_D_SRCS
-    # Also includes src/tvm_remote_nd_imp.cc
-    "${SKEL_ND_SRCS}"
-    "src/tvm_remote_imp.cc"
-  )
-  add_library(tvm_remote_skel SHARED
-    "${FASTRPC_SRC}/include/${TVM_REMOTE_D_H}"
-    "${TVM_REMOTE_D_SKEL_C}"
-    "${SKEL_D_SRCS}"
-  )
-
-  # Separate shared library with __wrap_pthread_create.
-  # It is necessary to have it as a separate library because it defines
-  # a function that libtvm_runtime.so will call. Because of that, this
-  # function needs to be in the global dynamic symbol table, but the
-  # skel libraries are loaded as private by FastRPC.
-  set(WRAP_PTHREAD_SRCS "src/tvm_wrap_pthread.cc")
-  add_library(tvm_wrap_pthread SHARED ${WRAP_PTHREAD_SRCS})
-
-  # Extra linker flags for linking shared libraries.
-  set_target_properties(tvm_remote_nd_skel PROPERTIES LINK_FLAGS ${EXTRA_LINK_FLAGS_STR})
-  set_target_properties(tvm_remote_skel PROPERTIES LINK_FLAGS ${EXTRA_LINK_FLAGS_STR})
-  set_target_properties(tvm_wrap_pthread PROPERTIES LINK_FLAGS ${EXTRA_LINK_FLAGS_STR})
-else()
-  # Stub libraries.
-  #
-  include_directories(SYSTEM
-    ${SDK_INCLUDE_DIRS}
-    "${RPCMEM_ROOT_DIR}/inc"
-  )
-  link_directories(${DSPRPC_LIB_DIRS})
-
-  if(RPCMEM_ROOT_DIR)
-    set(RPCMEM_ANDROID_C "${RPCMEM_ROOT_DIR}/src/rpcmem_android.c")
-  endif()
-  add_library(tvm_remote_nd_stub SHARED
-    "${FASTRPC_SRC}/include/${TVM_REMOTE_ND_H}"
-    "${RPCMEM_ANDROID_C}"
-    "${TVM_REMOTE_ND_STUB_C}"
-  )
-  add_library(tvm_remote_stub SHARED
-    "${FASTRPC_SRC}/include/${TVM_REMOTE_D_H}"
-    "${RPCMEM_ANDROID_C}"
-    "${TVM_REMOTE_D_STUB_C}"
-  )
-  target_link_libraries(tvm_remote_nd_stub adsprpc)
-  target_link_libraries(tvm_remote_stub adsprpc)
-endif()
diff --git a/src/runtime/hexagon/android/target/fastrpc/README.md b/src/runtime/hexagon/android/target/fastrpc/README.md
deleted file mode 100644
index 2d85679bdc..0000000000
--- a/src/runtime/hexagon/android/target/fastrpc/README.md
+++ /dev/null
@@ -1,56 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-# Hexagon IDL libraries
-
-This directory hosts IDL files and their implementations to offload TVM kernels to Hexagon via FastRPC. The implementations can be used to generate stub and skel libraries.
-
-### Prerequisites
-
-1. Android NDK version r19c or later.
-2. Hexagon SDK version 3.5.0 or later.
-
-Android NDK can be downloaded from https://developer.android.com/ndk.
-Hexagon SDK is available at //developer.qualcomm.com/software/hexagon-dsp-sdk.
-
-### Configuring
-
-Skel and stub libraries need to be configured and built separately. Please use different subdirectories for each. Otherwise the cmake cache from one configuration can interfere with the next.
-
-For skel libraries, set
-```
-FASTRPC_LIBS=SKEL
-HEXAGON_SDK_ROOT=/path/to/sdk
-CMAKE_C_COMPILER=hexagon-clang
-CMAKE_CXX_COMPILER=hexagon-clang++
-HEXAGON_ARCH= one of v60, v62, v65, v66
-```
-
-Please note that support for older versions of the Hexagon processor may be removed from the future versions of the Hexagon toolchain.
-
-
-For stub libraries, set
-```
-FASTRPC_LIBS=STUB
-HEXAGON_SDK_ROOT=/path/to/sdk
-CMAKE_C_COMPILER=aarch64-linux-android28-clang      # or later
-CMAKE_CXX_COMPILER=aarch64-linux-android28-clang++  # or later
-```
-
-### Building
-
-In each instance, simple `make` command will create header files `fastrpc/include/tvm_remote.h` and `fastrpc/include/tvm_remote_nd.h`. These headers are needed to compile the TVM runtime for Android (and the stub/skel libraries themselves).
diff --git a/src/runtime/hexagon/android/target/fastrpc/include/tvm_remote.idl b/src/runtime/hexagon/android/target/fastrpc/include/tvm_remote.idl
deleted file mode 100644
index bb7d8a2955..0000000000
--- a/src/runtime/hexagon/android/target/fastrpc/include/tvm_remote.idl
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * IDL to offload TVM kernels to Hexagon from APPS for multi-domains.
- */
-#include "remote.idl"
-#include "AEEStdDef.idl"
-
-interface tvm_remote : remote_handle64 {
-   typedef sequence<octet> buffer;
-   typedef unsigned long handle_t;
-
-   long load_library(in sequence<char> soname,
-                     rout handle_t mod_ptr);
-   long get_symbol(in handle_t mod,
-                   in sequence<char> name,
-                   rout handle_t sym_ptr);
-   long kernel(in handle_t mod,
-               in handle_t symbol,
-               in sequence <long> scalar,
-               in sequence <long> stack,
-               in sequence<buffer> scalar_in_octet,
-               rout sequence<buffer> scalar_out_octet,
-               in sequence<buffer> stack_in_octet,
-               rout sequence<buffer> stack_out_octet,
-               rout unsigned long long pcycles,
-               rout unsigned long long time_usec);
-   long release_library(in handle_t mod);
-   long alloc_vtcm(in unsigned long size,
-                   in unsigned long align,
-                   rout unsigned long dsp_va);
-   long free_vtcm(in unsigned long dsp_va);
-   long call_mmap64();
-};
diff --git a/src/runtime/hexagon/android/target/fastrpc/include/tvm_remote_nd.idl b/src/runtime/hexagon/android/target/fastrpc/include/tvm_remote_nd.idl
deleted file mode 100644
index 845ddeffa2..0000000000
--- a/src/runtime/hexagon/android/target/fastrpc/include/tvm_remote_nd.idl
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * IDL to offload TVM kernels to Hexagon from APPS for non-domains.
- */
-#include "remote.idl"
-#include "AEEStdDef.idl"
-
-interface tvm_remote_nd {
-   typedef sequence<octet> buffer;
-   typedef unsigned long handle_t;
-
-   long open();
-   long close();
-   long load_library(in sequence<char> soname,
-                     rout handle_t mod_ptr);
-   long get_symbol(in handle_t mod,
-                   in sequence<char> name,
-                   rout handle_t sym_ptr);
-   long kernel(in handle_t mod,
-               in handle_t symbol,
-               in sequence <long> scalar,
-               in sequence <long> stack,
-               in sequence<buffer> scalar_in_octet,
-               rout sequence<buffer> scalar_out_octet,
-               in sequence<buffer> stack_in_octet,
-               rout sequence<buffer> stack_out_octet,
-               rout unsigned long long pcycles,
-               rout unsigned long long time_usec);
-   long release_library(in handle_t mod);
-   long call_mmap64();
-};
diff --git a/src/runtime/hexagon/android/target/fastrpc/src/tvm_hvx.cc b/src/runtime/hexagon/android/target/fastrpc/src/tvm_hvx.cc
deleted file mode 100644
index 54c06e1024..0000000000
--- a/src/runtime/hexagon/android/target/fastrpc/src/tvm_hvx.cc
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include "tvm_hvx.h"
-
-#include "AEEStdErr.h"
-#include "HAP_farf.h"
-#include "HAP_power.h"
-
-extern "C" {
-#include "qurt_error.h"
-#include "qurt_hvx.h"
-}
-
-namespace hvx {
-
-#if __HEXAGON_ARCH__ >= 65
-#define DEFAULT_HVX_MODE MODE_128B
-#else
-#define DEFAULT_HVX_MODE MODE_DONT_CARE
-#endif
-
-static constexpr mode_t default_hvx_mode = DEFAULT_HVX_MODE;
-
-int reserve(unsigned num_units) {
-  if (qurt_hvx_get_units() <= 0) {
-    return -1;  // HVX not supported in this target.
-  }
-
-  if (num_units == 0) num_units = QURT_HVX_RESERVE_ALL_AVAILABLE;
-  int ret_val = qurt_hvx_reserve(num_units);
-  switch (ret_val) {
-    case QURT_HVX_RESERVE_ALREADY_MADE:
-    case QURT_HVX_RESERVE_NOT_SUPPORTED:
-    case QURT_HVX_RESERVE_NOT_SUCCESSFUL:
-      return 0;
-
-    default:
-      if (ret_val < 0) {
-        return -1;
-      }
-      break;
-  }
-  return ret_val;
-}
-
-int unreserve() {
-  int ret_val = qurt_hvx_cancel_reserve();
-  if (ret_val != QURT_EOK) {
-    return -1;
-  }
-  return 0;
-}
-
-int power_on() {
-  HAP_power_request_t request;
-  request.type = HAP_power_set_HVX;
-  request.hvx.power_up = 1;
-  int rc = HAP_power_set(nullptr, &request);
-  if (rc != AEE_SUCCESS) {
-    FARF(ERROR, "%s: unable to power on HVX, rc=%08x", rc);
-    return -1;
-  }
-  return 0;
-}
-
-int power_off() {
-  HAP_power_request_t request;
-  request.type = HAP_power_set_HVX;
-  request.hvx.power_up = 0;
-  int rc = HAP_power_set(nullptr, &request);
-  if (rc != AEE_SUCCESS) {
-    FARF(ERROR, "%s: unable to power off HVX, rc=%08x", rc);
-    return -1;
-  }
-  return 0;
-}
-
-int lock(mode_t mode) {
-  qurt_hvx_mode_t qurt_mode;
-  int vlen;
-
-  if (MODE_DONT_CARE == mode) mode = default_hvx_mode;
-
-  switch (mode) {
-    case MODE_DONT_CARE: {
-      int ret_val = qurt_hvx_get_mode();
-      if (ret_val < 0) {
-        FARF(HIGH, "%s: unknown HVX mode %d", __func__, qurt_mode);
-        return -1;
-      }
-      qurt_mode = static_cast<qurt_hvx_mode_t>(ret_val);
-      switch (qurt_mode) {
-        case QURT_HVX_MODE_64B:
-          vlen = 64;
-          break;
-        case QURT_HVX_MODE_128B:
-          vlen = 128;
-          break;
-      }
-      break;
-    }
-
-    case MODE_64B:
-      qurt_mode = QURT_HVX_MODE_64B;
-      vlen = 64;
-      break;
-
-    case MODE_128B:
-      qurt_mode = QURT_HVX_MODE_128B;
-      vlen = 128;
-      break;
-
-    default:
-      FARF(HIGH, "%s: unknown HVX mode %d", __func__, qurt_mode);
-      return -3;
-  }
-
-  // Starting with v65, the RTOS supports HVX context switching.
-  // Treat all hvx locks as blocking now, so they can succeed, and
-  // be scheduled according to RTOS scheduler via thread priority.
-  // Nonblocking call: qurt_hvx_try_lock(qurt_mode).
-  int ret_val = qurt_hvx_lock(qurt_mode);
-
-  if (ret_val != QURT_EOK) {
-    return -1;
-  }
-  return vlen;
-}
-
-int unlock() {
-  int ret_val = qurt_hvx_unlock();
-  if (ret_val != QURT_EOK) {
-    return -1;
-  }
-  return 0;
-}
-
-int prepare_mt_job(config_t* hvx_config) {
-  int num_units = qurt_hvx_get_units();
-  if (num_units <= 0) {
-    return -1;
-  }
-
-  // Check whether HVX is reserved for this protection domain. If not,
-  // see if we can temporarily reserve them for this invocation only.
-  hvx_config->temp_reserve = false;
-  if (hvx_config->num_reserved == 0) {
-    hvx_config->num_reserved = reserve(0);  // Reserve all units.
-    if (hvx_config->num_reserved <= 0) {
-      return -1;
-    }
-    hvx_config->temp_reserve = true;
-  }
-
-  // If client doesn't specify required mode, fallback to default.
-  if (hvx_config->mode == MODE_DONT_CARE) hvx_config->mode = default_hvx_mode;
-
-  // Choose 64 byte or 128 byte mode, based on whether there are odd or even
-  // number of units
-  if (hvx_config->mode == MODE_64B ||
-      (hvx_config->mode == MODE_DONT_CARE && (hvx_config->num_reserved & 1))) {
-    hvx_config->vlen = 64;
-    hvx_config->mode = MODE_64B;
-    hvx_config->num_threads = hvx_config->num_reserved;
-  } else {
-    hvx_config->vlen = 128;
-    hvx_config->mode = MODE_128B;
-    hvx_config->num_threads = (num_units >> 8) & 0xFF;
-    // Handle case where only 1 64-byte unit was available.
-    if (hvx_config->num_threads == 0) {
-      if (hvx_config->temp_reserve) unreserve();
-      return -1;
-    }
-  }
-
-  // If using HVX, make sure it turns on properly.
-  if (hvx_config->num_reserved > 0 && power_on() != 0) {
-    return -1;
-  }
-  return 0;
-}
-
-int cleanup_mt_job(const config_t* hvx_config) {
-  // If HVX was used, indicate it can be turned off.
-  if (hvx_config->num_reserved > 0) power_off();
-  // If HVX was temporarily reserved, unreserve it.
-  if (hvx_config->temp_reserve) unreserve();
-  return 0;
-}
-
-}  // namespace hvx
diff --git a/src/runtime/hexagon/android/target/fastrpc/src/tvm_hvx.h b/src/runtime/hexagon/android/target/fastrpc/src/tvm_hvx.h
deleted file mode 100644
index 3d14252ad6..0000000000
--- a/src/runtime/hexagon/android/target/fastrpc/src/tvm_hvx.h
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifndef TVM_RUNTIME_HEXAGON_ANDROID_TARGET_FASTRPC_SRC_TVM_HVX_H_
-#define TVM_RUNTIME_HEXAGON_ANDROID_TARGET_FASTRPC_SRC_TVM_HVX_H_
-
-// Utility providing functions for accessing the Hexagon Vector Extensions
-// (HVX) hardware.
-
-#include <cstdint>
-
-namespace hvx {
-
-enum mode_t : uint32_t {
-  MODE_DONT_CARE = 0, /*!< Don't-care, just use whatever current mode is. */
-  MODE_64B,           /*!< 64 byte HVX vector width.                      */
-  MODE_128B           /*!< 128 byte HVX vector width.                     */
-};
-
-/*!
- * \brief HVX configuration data.
- */
-struct config_t {
-  int num_reserved;  /*!< Number of reserved HVX units.                  */
-  bool temp_reserve; /*!< Indicates that HVX pool reservation is         */
-                     /*!< temporary and needs to be released after use.  */
-  mode_t mode;       /*!< Configured HVX mode.                           */
-  int vlen;          /*!< Configured HVX vector width (64 or 128 bytes). */
-  int num_threads;   /*!< Number of threads that can lock HVX units.     */
-};
-
-/*!
- * \brief
- *   This function reserves HVX units for the protection domain to which
- *   the caller belongs. Reservation is optional before locking HVX units.
- *   Typically it would be called by applications that want to guarantee
- *   up front that the requested number of HVX units will be available
- *   for the duration of the application.
- *
- * \param num_units
- *   Number of HVX units to reserve. 0 indicates to reserve all the units
- *   present in the given target. > 0 indicates the number of single HVX
- *   units to reserve. Mode (64 byte vs. 128 byte) is not specified.
- *
- * \return
- *   The number of HVX units (in terms of 64 byte single units) successfully
- *   reserved. The return value of -1 indicates no HVX hardware is available
- *   on the target.
- */
-int reserve(unsigned num_units);
-
-/*!
- * \brief
- *   This function releases all HVX unit from reservation. A call to this
- *   function nullifies all previous calls to reserve HVX units from within
- *   this worker pool's protection domain.
- *
- * \return
- *   0 on success, -1 if there was an error.
- */
-int unreserve();
-
-/*!
- * \brief
- *   This function turns on the HVX hardware. It must be called sometime
- *   before (possibly multiple) software threads lock HVX units.
- *
- * \return
- *   0 on success, -1 if there was an error.
- */
-int power_on();
-
-/*!
- * \brief
- *   This function turns off the HVX hardware. It must be called sometime
- *   after all threads have unlocked their HVX units.
- *
- * \return
- *   0 on success, -1 if there was an error.
- */
-int power_off();
-
-/*!
- * \brief
- *   This function locks the HVX units for the calling threads.
- *
- * \param mode
- *   The HVX mode.
- *
- * \return
- *   0 on success, -1 if there was an error.
- */
-int lock(mode_t mode);
-
-/*!
- * \brief
- *   This function unlocks the HVX units for the calling threads.
- *
- * \return
- *   0 on success, -1 if there was an error.
- */
-int unlock();
-
-/*!
- * \brief
- *   This function performs preparations for multithreaded job.
- *   It does so by filling out data members in the configuration
- *   structure passed as a parameter, and by setting up the hardware:
- *   - it performs a temporary reservation of HVX units, if no units
- *     have yet been reserved,
- *   - it powers on the HVX hardware.
- *
- * \param hvx_config
- *   Structure describing the HVX configuration. Two data members
- *   must be set prior to calling \ref prepare_mt_job:
- *   \ref num_reserved, indicating the number of previously reserved
- *   HVX units (can be 0), and \ref mode indicating the HVX mode.
- *
- * \return
- *   0 on success, -1 if there was an error.
- */
-int prepare_mt_job(config_t* hvx_config);
-
-/*!
- * \brief
- *   This function cleans up after \ref prepare_mt_job, in particular
- *   it releases temporarily reserved HVX units and turns the HVX
- *   hardware off.
- *
- * \return
- *   0 on success, -1 if there was an error.
- */
-int cleanup_mt_job(const config_t* hvx_config);
-
-}  // namespace hvx
-
-#endif  // TVM_RUNTIME_HEXAGON_ANDROID_TARGET_FASTRPC_SRC_TVM_HVX_H_
diff --git a/src/runtime/hexagon/android/target/fastrpc/src/tvm_remote_imp.cc b/src/runtime/hexagon/android/target/fastrpc/src/tvm_remote_imp.cc
deleted file mode 100644
index c9e3332d59..0000000000
--- a/src/runtime/hexagon/android/target/fastrpc/src/tvm_remote_imp.cc
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include <assert.h>
-#include <stdlib.h>
-
-#define FARF_ERROR 1
-#include "AEEStdErr.h"
-#include "HAP_farf.h"
-#include "HAP_perf.h"
-#include "apps_mem.h"
-#include "qurt.h"
-#include "tvm_remote.h"
-#include "tvm_remote_nd.h"
-
-#if __HEXAGON_ARCH__ >= 65
-#include "HAP_vtcm_mgr.h"
-#else
-// Stub functions for targets that don't support VTCM.
-static void* HAP_request_VTCM(int a, int b) { return 0; }
-static int HAP_release_VTCM(void* a) { return 0; }
-static int HAP_query_avail_VTCM(unsigned* avail_block_size, unsigned* max_page_size,
-                                unsigned* num_pages) {
-  FARF(ALWAYS, "%s: running on architecture V62 or less", __func__);
-  return AEE_ENOMEMORY;
-}
-#endif  // __HEXAGON_ARCH__
-
-#define MIN_GATHER_SCATTER_SZ (32 * 1024)
-#define MAX_GATHER_SCATTER_SZ (64 * 1024)
-#define MIN_VTCM_SZ (64 * 1024)
-
-/*!
- *  \brief Open a domain channel.
- *
- *  \param uri          URI of the channel description.
- *  \param handle_ptr   Where to store the channel handle.
- *
- *  \return 0 on success, negative value on error.
- */
-int tvm_remote_open(const char* uri, remote_handle64* handle_ptr) {
-  FARF(ALWAYS, "%s, uri=%s", __func__, uri);
-  int rc = tvm_remote_nd_open();
-  if (rc != AEE_SUCCESS) {
-    FARF(ERROR, "%s: tvm_remote_nd_open failed rc=%08x", __func__, rc);
-    return rc;
-  }
-
-  *handle_ptr = static_cast<remote_handle64>(reinterpret_cast<uintptr_t>(malloc(1)));
-  if (!*handle_ptr) {
-    FARF(ERROR, "%s: cannot allocate memory", __func__);
-    return AEE_ENOMEMORY;
-  }
-  return AEE_SUCCESS;
-}
-
-/*!
- *  \brief Close domain channel.
- *
- *  \param handle   Domain channel handle to close.
- *
- *  \return 0 on success, negative value on error.
- */
-int tvm_remote_close(remote_handle64 handle) {
-  FARF(ALWAYS, "%s", __func__);
-  if (handle) free(reinterpret_cast<void*>(static_cast<uintptr_t>(handle)));
-  int rc = tvm_remote_nd_close();
-  if (rc != AEE_SUCCESS) {
-    FARF(ERROR, "%s: tvm_remote_nd_close failed rc=%08x", __func__, rc);
-  }
-  return rc;
-}
-
-/*!
- *  \brief Dummy function.
- *
- *  \param handle   Domain channel handle.
- *
- *  \return This function always returns 0.
- *
- * This function is present as a workaround. See comment at the call site
- * in hexagon_device_target.cc.
- */
-int tvm_remote_call_mmap64(remote_handle64 handle) { return AEE_SUCCESS; }
-
-/*!
- *  \brief  Load a shared library.
- *
- *  \param handle       Domain channel handle.
- *  \param soname       Name of the shared library.
- *  \param soname_len   Length of the name.
- *  \param lib_ptr      Where to store the handle of the loaded libarary.
- *
- *  \return 0 on success, negative value on error.
- */
-int tvm_remote_load_library(remote_handle64 handle, const char* soname, int soname_len,
-                            tvm_remote_handle_t* lib_ptr) {
-  return tvm_remote_nd_load_library(soname, soname_len, lib_ptr);
-}
-
-/*!
- *  \brief  Resolve symbol name to an address.
- *
- *  \param handle       Domain channel handle.
- *  \param lib          Handle of the shared library with the symbol.
- *  \param name         Symbol name.
- *  \param name_len     Length of the name.
- *  \param sym_ptr      Where to store the resolved address.
- *
- *  \return 0 on success, negative value on error.
- */
-int tvm_remote_get_symbol(remote_handle64 handle, tvm_remote_handle_t lib, const char* name,
-                          int name_len, tvm_remote_handle_t* sym_ptr) {
-  return tvm_remote_nd_get_symbol(lib, name, name_len, sym_ptr);
-}
-
-/*!
- *  \brief Call the specified function.
- *
- *  \param handle                 Domain channel handle.
- *  \param lib                    Handle of the library containing
- *                                the function to call.
- *  \param symbol                 Address of the function to call.
- *  \param scalar                 Address of values to pass in registers.
- *  \param scalar_len             Number of values to pass in registers.
- *  \param stack                  Address of values to pass on stack.
- *  \param stack_len              Number of values to pass on stack.
- *
- *  \param scalar_in_octet        Address of the incoming scalar buffer.
- *  \param scalar_in_octet_len    Length of the incoming scalar buffer.
- *  \param scalar_out_octet       Address of the outgoing scalar buffer.
- *  \param scalar_out_octet_len   Length of the outgoing scalar buffer.
- *  \param stack_in_octet         Address of the incoming stack buffer.
- *  \param stack_in_octet_len     Length of the incoming stack buffer.
- *  \param stack_out_octet        Address of the outgoing stack buffer.
- *  \param stack_out_octet_len    Length of the outgoing stack buffer.
- *
- *  \param pcycles                Pointer to where to store cycle count.
- *  \param time_usec              Pointer to where to store time in usec.
- *
- *  \return 0 on success, negative value on error.
- *
- * The 8 "octet" arguments in this function are used for cache operations
- * only. They are not used for procesing.
- */
-int tvm_remote_kernel(remote_handle64 handle, tvm_remote_handle_t lib, tvm_remote_handle_t symbol,
-                      const int* scalar, int scalar_len, const int* stack, int stack_len,
-                      const tvm_remote_buffer* scalar_in_octet, int scalar_in_octet_len,
-                      tvm_remote_buffer* scalar_out_octet, int scalar_out_octet_len,
-                      const tvm_remote_buffer* stack_in_octet, int stack_in_octet_len,
-                      tvm_remote_buffer* stack_out_octet, int stack_out_octet_len, uint64* pcycles,
-                      uint64* time_usec) {
-  return tvm_remote_nd_kernel(
-      lib, symbol, scalar, scalar_len, stack, stack_len,
-      reinterpret_cast<const tvm_remote_nd_buffer*>(scalar_in_octet), scalar_in_octet_len,
-      reinterpret_cast<tvm_remote_nd_buffer*>(scalar_out_octet), scalar_out_octet_len,
-      reinterpret_cast<const tvm_remote_nd_buffer*>(stack_in_octet), stack_in_octet_len,
-      reinterpret_cast<tvm_remote_nd_buffer*>(stack_out_octet), stack_out_octet_len, pcycles,
-      time_usec);
-}
-
-/*!
- *  \brief Release previously loaded shared object.
- *
- *  \param handle       Domain channel handle.
- *  \param lib          Handle of shared library to release.
- *
- *  \return 0 on success, negative value on error.
- */
-int tvm_remote_release_library(remote_handle64 handle, tvm_remote_handle_t lib) {
-  // FARF(ALWAYS, "tvm_remote_release_library begin ");
-  return tvm_remote_nd_release_library(lib);
-}
-
-/*!
- *  \brief Allocate VTCM memory.
- *
- *  \param handle   Domain channel handle.
- *  \param size     Number of bytes to allocate.
- *  \param align    Requested alignment.
- *  \param dsp_va   Address of variable to store the allocated VTCM
- *                  address to.
- *
- *  \return 0 on success, negative value on error.
- */
-int tvm_remote_alloc_vtcm(remote_handle64 handle, unsigned size, unsigned align, unsigned* dsp_va) {
-  FARF(ALWAYS, "%s: size=%u, align=%u", __func__, size, align);
-  unsigned avail_block_size, max_page_size, num_pages;
-  int rc = HAP_query_avail_VTCM(&avail_block_size, &max_page_size, &num_pages);
-  if (rc != AEE_SUCCESS) {
-    FARF(ERROR, "%s: HAP_query_avail_VTCM failed, rc=%08x", __func__, rc);
-    return rc;
-  }
-  FARF(ALWAYS, "%s: avail_block_size=%u, max_page_size=%u, num_pages=%u", __func__,
-       avail_block_size, max_page_size, num_pages);
-
-  if (max_page_size < MIN_VTCM_SZ) {
-    FARF(ERROR, "%s: available VTCM size less than %d KB, aborting", __func__, MIN_VTCM_SZ / 1024);
-    return AEE_ENOMEMORY;
-  }
-
-  void* vtcm_base = HAP_request_VTCM(size, /*single_page_flag=*/1);
-  if (!vtcm_base) {
-    FARF(ERROR, "%s: error allocating VTCM", __func__);
-    return AEE_ENOMEMORY;
-  }
-  *dsp_va = static_cast<unsigned>(reinterpret_cast<uintptr_t>(vtcm_base));
-  FARF(ALWAYS, "%s: allocated VTCM addr=0x%p", __func__, vtcm_base);
-  return AEE_SUCCESS;
-}
-
-/*!
- *  \brief Free VTCM memory.
- *
- *  \param handle   Domain channel handle.
- *  \param dsp_va   VTCM address to free.
- *
- *  \return 0 on success, negative value on error.
- */
-int tvm_remote_free_vtcm(remote_handle64 handle, unsigned dsp_va) {
-  FARF(ALWAYS, "%s: dsp_va=0x%08x", __func__, dsp_va);
-  void* vtcm_base = reinterpret_cast<void*>(dsp_va);
-  int rc = HAP_release_VTCM(vtcm_base);
-  if (rc != AEE_SUCCESS) {
-    FARF(ERROR, "%s: error freeing VTCM, rc=%08x", __func__, rc);
-  }
-  return rc;
-}
diff --git a/src/runtime/hexagon/android/target/fastrpc/src/tvm_remote_nd_imp.cc b/src/runtime/hexagon/android/target/fastrpc/src/tvm_remote_nd_imp.cc
deleted file mode 100644
index c0f6f22172..0000000000
--- a/src/runtime/hexagon/android/target/fastrpc/src/tvm_remote_nd_imp.cc
+++ /dev/null
@@ -1,325 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include <assert.h>
-#include <dlfcn.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include <vector>
-
-#define FARF_ERROR 1
-#include "AEEStdDef.h"
-#include "AEEStdErr.h"
-#include "HAP_farf.h"
-#include "HAP_mem.h"
-#include "HAP_perf.h"
-#include "qurt.h"
-#include "tvm_hvx.h"
-#include "tvm_remote_nd.h"
-
-struct msg_call {
-  uint32_t func_va;
-  uint32_t scalar_num;
-  uint32_t stack_num;
-  uint32_t data[];
-} __attribute__((packed));
-
-__attribute__((naked)) uint32_t launcher(volatile msg_call* mc, uint64_t* pcc) {
-  __asm__(
-      "// This function is intentionally written to be readable,      \n"
-      "// rather than fast.                                           \n"
-      "// r0 = value of 'volatile msg_call *mc'                       \n"
-      "// r1 = address where to store the program cycle count         \n"
-
-      "// In this packet the store happens before the allocframe so   \n"
-      "// the offset added to r29 must reflect that the r29 has not   \n"
-      "// yet been updated (stack grows towards decreasing addresses):\n"
-      "//                    r29 before allocframe --.                \n"
-      "//   [ r17:16 ] [ r19:18 ] [ r21:20 ] [ FP/LR ]                \n"
-      "//   `-- r29 after allocframe      increasing addresses -->    \n"
-      "{ memd(r29+#-16) = r21:20                                      \n"
-      "  allocframe(#24)          }                                   \n"
-      "{ memd(r29+#0) = r17:16                                        \n"
-      "  memd(r29+#8) = r19:18    }                                   \n"
-      "{ r17:16 = combine(r1,r0)                                      \n"
-      "  r18 = r29                                                    \n"
-      "  r1 = memw(r0+#4)            // scalar_num                    \n"
-      "  r2 = memw(r0+#8)         }  // stack_num                     \n"
-      "// If there are no stack values, skip the stack setup.         \n"
-      "{ p0 = cmp.eq(r2,#0)                                           \n"
-      "  if (p0.new) jump:t .Llauncher1 }                             \n"
-
-      "// Allocate space on the stack. Let r2 = needed space          \n"
-      "// rounded up to a multiple of 8.                              \n"
-      "{ loop0(.Llauncher0,r2)                                        \n"
-      "  r2 = asl(r2,#2)          }                                   \n"
-      "{ r2 = add(r2,#4)          }                                   \n"
-      "{ r2 = clrbit(r2,#2)       }                                   \n"
-      "{ r29 = sub(r29,r2)        }                                   \n"
-
-      "// Copy stack contents onto the stack. Stack contents start    \n"
-      "// at r3 = r0 + offsetof(data) + scalar_num*4                  \n"
-      "{ r3 = addasl(r0,r1,#2)                                        \n"
-      "  r4 = r29                 }                                   \n"
-      "{ r3 = add(r3,#12)         } // offsetof(data)                 \n"
-      ".Llauncher0:                                                   \n"
-      "{ r5 = memw(r3++#4)                                            \n"
-      "  memw(r4++#4) = r5.new    } :endloop0                         \n"
-
-      "// Load registers. Some of the loaded data may actually be     \n"
-      "// values from the stack part of 'data', but it's not an issue.\n"
-      ".Llauncher1:                                                   \n"
-      "{ r0 = memw(r16+#12)         // mc + offsetof(data)            \n"
-      "  r1 = memw(r16+#16)       }                                   \n"
-      "{ r2 = memw(r16+#20)                                           \n"
-      "  r3 = memw(r16+#24)       }                                   \n"
-      "{ r4 = memw(r16+#28)                                           \n"
-      "  r5 = memw(r16+#32)       }                                   \n"
-
-      "// Call.                                                       \n"
-      "{ r6 = memw(r16+#0)                                            \n"
-      "  r21:20 = upcycle         }                                   \n"
-      "{ callr r6                 }                                   \n"
-
-      "// Restore stack pointer (free up r18), calculate cycle count. \n"
-      "{ r29 = r18                                                    \n"
-      "  r19:18 = upcycle         }                                   \n"
-      "{ r19:18 = sub(r19:18, r21:20) }                               \n"
-
-      "// Store pcount, restore non-volatile registers, and return.   \n"
-      "{ memd(r17+#0) = r19:18                                        \n"
-      "  r21:20 = memd(r29+#16)   }                                   \n"
-      "{ r19:18 = memd(r29+#8)                                        \n"
-      "  r17:16 = memd(r29+#0)    }                                   \n"
-      "{ dealloc_return           } // implicit-use r1:0              \n");
-}
-
-extern "C" {
-#pragma weak __wrap_pthread_create
-int __wrap_pthread_create(pthread_t* restrict thread, const pthread_attr_t* restrict attr,
-                          void* (*start)(void*), void* restrict arg) {
-  FARF(ERROR, "Wrong %s called", __func__);
-  abort();
-}
-}
-
-static void* lib_rt = nullptr;
-static void* lib_thread = nullptr;
-
-/*!
- *  \brief Perform initialization.
- *
- *  \return 0 on success, negative value on error.
- */
-int tvm_remote_nd_open() {
-  lib_thread = dlopen("libtvm_wrap_pthread.so", RTLD_NOW | RTLD_GLOBAL);
-  if (lib_thread == nullptr) {
-    FARF(ERROR, "%s: dlopen failed for libtvm_wrap_pthread.so: %s", __func__, dlerror());
-    return AEE_EUNABLETOLOAD;
-  }
-
-  lib_rt = dlopen("libtvm_runtime.so", RTLD_NOW | RTLD_GLOBAL);
-  if (lib_rt == nullptr) {
-    FARF(ERROR, "%s: dlopen failed for libtvm_runtime.so: %s", __func__, dlerror());
-    return AEE_EUNABLETOLOAD;
-  }
-  return AEE_SUCCESS;
-}
-
-/*!
- *  \brief Perform cleanup.
- *
- *  \return 0 on success, negative value on error.
- */
-int tvm_remote_nd_close() {
-  if (lib_thread != nullptr) {
-    dlclose(lib_thread);
-    lib_thread = nullptr;
-  }
-  if (lib_rt != nullptr) {
-    dlclose(lib_rt);
-    lib_rt = nullptr;
-  }
-  return AEE_SUCCESS;
-}
-
-/*!
- *  \brief Dummy function.
- *
- *  \param handle   Domain channel handle.
- *
- *  \return This function always returns 0.
- *
- * This function is present as a workaround. See comment at the call site
- * in hexagon_device_target.cc.
- */
-int tvm_remote_nd_call_mmap64() { return AEE_SUCCESS; }
-
-/*!
- *  \brief  Load a shared library.
- *
- *  \param soname       Name of the shared library.
- *  \param soname_len   Length of the name.
- *  \param lib_ptr      Where to store the handle of the loaded libarary.
- *
- *  \return 0 on success, negative value on error.
- */
-int tvm_remote_nd_load_library(const char* soname, int soname_len,
-                               tvm_remote_nd_handle_t* lib_ptr) {
-  // We need to use RTLD_NOW, the libraries we build for Hexagon
-  // offloading do not support lazy binding.
-  FARF(ALWAYS, "%s: %s", __func__, soname);
-  if (void* lib = dlopen(soname, RTLD_GLOBAL | RTLD_NOW)) {
-    *lib_ptr = reinterpret_cast<tvm_remote_nd_handle_t>(lib);
-    return AEE_SUCCESS;
-  }
-  FARF(ERROR, "%s: dlopen failed: %s", __func__, dlerror());
-  return AEE_EUNKNOWN;
-}
-
-/*!
- *  \brief  Resolve symbol name to an address.
- *
- *  \param lib          Handle of the shared library with the symbol.
- *  \param name         Symbol name.
- *  \param name_len     Length of the name.
- *  \param sym_ptr      Where to store the resolved address.
- *
- *  \return 0 on success, negative value on error.
- */
-int tvm_remote_nd_get_symbol(tvm_remote_nd_handle_t lib, const char* name, int name_len,
-                             tvm_remote_nd_handle_t* sym_ptr) {
-  FARF(ALWAYS, "%s: name=%s", __func__, name);
-  if (void* p = dlsym(reinterpret_cast<void*>(lib), name)) {
-    *sym_ptr = reinterpret_cast<tvm_remote_nd_handle_t>(p);
-    return AEE_SUCCESS;
-  }
-
-  FARF(ERROR, "%s: dlsym failed: %s", __func__, dlerror());
-  return AEE_EUNKNOWN;
-}
-
-static void print_msg_call(const msg_call& mc) {
-  FARF(ALWAYS, "device: launching %x scalar_num:%d stack_num:%d", mc.func_va, mc.scalar_num,
-       mc.stack_num);
-  for (unsigned i = 0; i != mc.scalar_num; ++i) {
-    FARF(ALWAYS, "scalar_data[%d]  %x", i, mc.data[i]);
-  }
-  for (unsigned i = 0; i != mc.stack_num; ++i) {
-    FARF(ALWAYS, "stack_data[%d]   %x", i, mc.data[mc.scalar_num + i]);
-  }
-}
-
-/*!
- *  \brief Call the specified function.
- *
- *  \param lib                    Handle of the library containing
- *                                the function to call.
- *  \param symbol                 Address of the function to call.
- *  \param scalar                 Address of values to pass in registers.
- *  \param scalar_len             Number of values to pass in registers.
- *  \param stack                  Address of values to pass on stack.
- *  \param stack_len              Number of values to pass on stack.
- *
- *  \param scalar_in_octet        Address of the incoming scalar buffer.
- *  \param scalar_in_octet_len    Length of the incoming scalar buffer.
- *  \param scalar_out_octet       Address of the outgoing scalar buffer.
- *  \param scalar_out_octet_len   Length of the outgoing scalar buffer.
- *  \param stack_in_octet         Address of the incoming stack buffer.
- *  \param stack_in_octet_len     Length of the incoming stack buffer.
- *  \param stack_out_octet        Address of the outgoing stack buffer.
- *  \param stack_out_octet_len    Length of the outgoing stack buffer.
- *
- *  \param pcycles                Pointer to where to store cycle count.
- *  \param time_usec              Pointer to where to store time in usec.
- *
- *  \return 0 on success, negative value on error.
- *
- * The 8 "octet" arguments in this function are used for cache operations
- * only. They are not used for procesing.
- */
-int tvm_remote_nd_kernel(tvm_remote_nd_handle_t lib, tvm_remote_nd_handle_t symbol,
-                         const int* scalar, int scalar_len, const int* stack, int stack_len,
-                         const tvm_remote_nd_buffer* scalar_in_octet, int scalar_in_octet_len,
-                         tvm_remote_nd_buffer* scalar_out_octet, int scalar_out_octet_len,
-                         const tvm_remote_nd_buffer* stack_in_octet, int stack_in_octet_len,
-                         tvm_remote_nd_buffer* stack_out_octet, int stack_out_octet_len,
-                         uint64* pcycles, uint64* time_usec) {
-  hvx::config_t hvx_info = {0};
-  hvx::prepare_mt_job(&hvx_info);
-
-  int lock_result;
-  // Check if HVX units are available
-  if (hvx_info.num_reserved > 0) {
-    lock_result = hvx::lock(hvx::MODE_128B);
-    if (lock_result < 0) {
-      FARF(ERROR, "%s: HVX locking failed lock_result=%d num_reserved=%d", __func__, lock_result,
-           hvx_info.num_reserved);
-    } else {
-      FARF(ALWAYS, "%s: HVX lock successful lock_result=%d", __func__, lock_result);
-    }
-  } else {
-    FARF(ERROR, "%s: there are no HVX units available", __func__);
-  }
-
-  struct msg_call* mc = (struct msg_call*)malloc(sizeof(uint32_t) * (3 + scalar_len + stack_len));
-  if (mc == nullptr) {
-    FARF(ERROR, "%s: failed to allocate memory for mc", __func__);
-    return AEE_ENOMEMORY;
-  }
-
-  int32_t* mc_ptr = reinterpret_cast<int32_t*>(mc);
-  // Scalar buffers come first.
-  int k = 3;
-  for (int i = 0; i < scalar_len; i++, k++) {
-    *(mc_ptr + k) = static_cast<uint32_t>(scalar[i]);
-  }
-
-  for (int i = 0; i < stack_len; i++, k++) {
-    *(mc_ptr + k) = static_cast<uint32_t>(stack[i]);
-  }
-
-  mc->scalar_num = scalar_len;
-  mc->stack_num = stack_len;
-  mc->func_va = symbol;
-  print_msg_call(*mc);
-  uint64_t start_time = HAP_perf_get_time_us();
-  int result = launcher(mc, pcycles);
-  *time_usec = HAP_perf_get_time_us() - start_time;
-  FARF(ALWAYS, "kernel execution: %llu pcycles  %llu usec", *pcycles, *time_usec);
-  if (lock_result > 0) hvx::unlock();
-  hvx::cleanup_mt_job(&hvx_info);
-  if (mc) free(mc);
-  return result;
-}
-
-/*!
- *  \brief Release previously loaded shared object.
- *
- *  \param lib          Handle of shared library to release.
- *
- *  \return 0 on success, negative value on error.
- */
-int tvm_remote_nd_release_library(tvm_remote_nd_handle_t lib) {
-  // FARF(ALWAYS, "tvm_remote_nd_release_library begin ");
-  dlclose(reinterpret_cast<void*>(lib));
-  FARF(ALWAYS, "tvm_remote_nd_release_library done ");
-  return 0;
-}
diff --git a/src/runtime/hexagon/android/target/fastrpc/src/tvm_wrap_pthread.cc b/src/runtime/hexagon/android/target/fastrpc/src/tvm_wrap_pthread.cc
deleted file mode 100644
index d26073af8a..0000000000
--- a/src/runtime/hexagon/android/target/fastrpc/src/tvm_wrap_pthread.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * Implement a wrapper around pthread_create that sets the thread stack
- * size to a chosen value.
- *
- * TVM runtime uses std::thread, but the C++ standard does not provide
- * any means of controlling thread attributes (like stack size). Because
- * of that, any thread created by the std::thread constructor will use
- * default attributes. The default stack size for a thread in QuRT is 16kB.
- * This has proven to be insufficient in the past, so we need to increase
- * it.
- * When libtvm_runtime.so is linked, a linker flag --wrap=pthread_create
- * is used, which causes the linker to rename all uses of pthread_create
- * with references to __wrap_pthread_create. This file implements the
- * __wrap function to set the larger stack size and call the actual
- * pthread_create. The call to pthread_create here must not be renamed,
- * so this function cannot be included in the TVM runtime binary.
- * Instead, it's implemented in a separate shared library.
- */
-
-#include <pthread.h>
-
-#include "HAP_farf.h"
-
-static constexpr size_t kThreadStackSize = 128 * 1024;  // 128kB
-
-// Make sure the function has C linkage.
-extern "C" {
-int __wrap_pthread_create(pthread_t* restrict thread, const pthread_attr_t* restrict attr,
-                          void* (*start)(void*), void* restrict arg);
-}
-
-int __wrap_pthread_create(pthread_t* restrict thread, const pthread_attr_t* restrict attr,
-                          void* (*start)(void*), void* restrict arg) {
-  pthread_attr_t def_attr;
-  if (attr == nullptr) {
-    if (int rc = pthread_attr_init(&def_attr)) {
-      FARF(ERROR, "pthread_attr_init failed: rc=%08x", rc);
-      return rc;
-    }
-    if (int rc = pthread_attr_setstacksize(&def_attr, kThreadStackSize)) {
-      FARF(ERROR, "pthread_attr_setstacksize failed: rc=%08x", rc);
-      return rc;
-    }
-    attr = &def_attr;
-  }
-  size_t stack_size = 0;
-  if (int rc = pthread_attr_getstacksize(attr, &stack_size)) {
-    FARF(ERROR, "pthread_attr_setstacksize failed: rc=%08x", rc);
-    return rc;
-  }
-  FARF(ALWAYS, "launching thread with stack_size=%zu", stack_size);
-  int t = pthread_create(thread, attr, start, arg);
-  if (int rc = pthread_attr_destroy(&def_attr)) {
-    FARF(ERROR, "pthread_attr_destroy failed (after pthread_create): rc=%08x", rc);
-  }
-  return t;
-}
diff --git a/src/runtime/hexagon/android/target/hexagon_device_target.cc b/src/runtime/hexagon/android/target/hexagon_device_target.cc
deleted file mode 100644
index a542c5a3e3..0000000000
--- a/src/runtime/hexagon/android/target/hexagon_device_target.cc
+++ /dev/null
@@ -1,521 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifdef __ANDROID__
-
-#include <unistd.h>
-
-#include <algorithm>
-#include <map>
-#include <memory>
-#include <string>
-#include <utility>
-
-#include "../hexagon_device.h"
-#include "AEEStdErr.h"
-#include "fastrpc/include/tvm_remote.h"
-#include "hexagon_dsprpcapi.h"
-#include "hexagon_stubapi.h"
-#include "hexagon_target_log.h"
-#include "remote64.h"
-#include "rpcmem.h"
-
-#pragma weak remote_session_control
-
-#define RPCMEM_HEAP 25
-
-// All log messages start with "HexagonTarget::%s", where %s is replaced
-// with the function name, so create macros that add that to avoid repetition.
-// The downside is that the format string must be given as a string literal,
-// but it seems to be a minor issue.
-#define VA_EXPANDER(...) , ##__VA_ARGS__
-#define TVM_LOGD_HT(fmt, ...) TVM_LOGD("HexagonTarget::%s: " fmt, __func__ VA_EXPANDER(__VA_ARGS__))
-#define TVM_LOGE_HT(fmt, ...) TVM_LOGE("HexagonTarget::%s: " fmt, __func__ VA_EXPANDER(__VA_ARGS__))
-
-namespace tvm {
-namespace runtime {
-namespace hexagon {
-
-static constexpr int kStackSize = 128 * 1024;  // 128kB stack
-
-class HexagonTarget : public tvm::runtime::hexagon::Device {
- public:
-  HexagonTarget() {}
-  ~HexagonTarget() final {}
-  void* Alloc(unsigned size, unsigned align) final;
-  void Free(void* ptr) final;
-  void* AllocVtcm(unsigned size, unsigned align) final;
-  void FreeVtcm(void* ptr) final;
-  void CopyDeviceToDevice(void* dst, const void* src, unsigned len) final;
-  void CopyDeviceToHost(void* host_dst, const void* src, unsigned len) final;
-  void CopyHostToDevice(void* dst, const void* host_src, unsigned len) final;
-  void* Load(const std::string& data, const std::string& fmt) final;
-  void Unload(void* mod) final;
-  void* Resolve(const std::string& sym) final;
-  void Call(void* func, uint32_t* scalar, unsigned scalar_num, uint32_t* stack,
-            unsigned stack_num) final;
-
- private:
-  std::pair<void*, size_t> AddAddrMapping(const void* dsp_addr, void* apps_addr, size_t size);
-  std::pair<void*, size_t> GetAppsAddr(const void* dsp_addr, bool exact) const;
-  void RemoveAddrMapping(const void* dsp_addr);
-  int OpenDomainChannel(bool set_unsigned_pd);
-  int CloseDomainChannel();
-  void ReleaseLibrary();
-  void FreeMemoryBeforeChannelClose();
-
-  // Mapping from a DSP address to a pair <apps address, buffer size>.
-  // Using void* pointers is ok, since DSP pointers will always fit
-  // in apps's pointers, i.e. sizeof_dsp(void*) <= sizeof_apps(void*).
-  std::map<const void*, std::pair<void*, size_t>> dsp_to_apps_;
-  remote_handle64 domain_channel_handle_ = AEE_EUNKNOWN;
-  tvm_remote_handle_t module_pointer_ = AEE_EUNKNOWN;
-  uint64_t count_channel_open_ = 0;
-  // Global lock, used for all critical sections. This can be refined
-  // in the future.
-  mutable std::mutex crit_section_;
-
-  // Don't use unsigned PDs by default. Change this to "true" to enable.
-  static constexpr bool unsigned_pd = false;
-
-  static void* const vtcm_mark_;
-};
-
-void* const HexagonTarget::vtcm_mark_ = reinterpret_cast<void*>(~0);
-
-std::shared_ptr<Device> CreateHexagonTarget() { return std::make_shared<HexagonTarget>(); }
-
-std::pair<void*, size_t> HexagonTarget::AddAddrMapping(const void* dsp_addr, void* apps_addr,
-                                                       size_t size) {
-  crit_section_.lock();
-  auto p = dsp_to_apps_.insert({dsp_addr, {apps_addr, size}});
-  crit_section_.unlock();
-  if (!p.second) {
-    TVM_LOGE_HT("failed to insert address mapping: dsp:%p -> apps:%p, size:%zu", dsp_addr,
-                apps_addr, size);
-    return std::make_pair(nullptr, 0);
-  }
-  TVM_LOGD_HT("added address mapping: dsp:%p -> apps:%p, size:%zu", dsp_addr, apps_addr, size);
-  return p.first->second;
-}
-
-void HexagonTarget::RemoveAddrMapping(const void* dsp_addr) {
-  crit_section_.lock();
-  auto f = dsp_to_apps_.find(dsp_addr);
-  if (f == dsp_to_apps_.end()) {
-    TVM_LOGE_HT("failed to remove address mapping for dsp:%p", dsp_addr);
-    crit_section_.unlock();
-    return;
-  }
-  dsp_to_apps_.erase(f);
-  crit_section_.unlock();
-}
-
-std::pair<void*, size_t> HexagonTarget::GetAppsAddr(const void* dsp_addr, bool exact) const {
-  struct AutoUnlock {
-    explicit AutoUnlock(std::mutex& m) : m(m) {}
-    ~AutoUnlock() { m.unlock(); }
-    std::mutex& m;
-  };
-
-  crit_section_.lock();
-  AutoUnlock u(crit_section_);
-
-  // If the address is in the map, simply return the result.
-  auto f = dsp_to_apps_.find(dsp_addr);
-  if (f != dsp_to_apps_.end()) return f->second;
-  // If exact mapping is requested, then it hasn't been found.
-  if (exact) return std::make_pair(nullptr, 0);
-
-  // If the address is not in the map, maybe it points to somewhere in the
-  // interior of a mapped buffer.
-  uintptr_t dsp_v = reinterpret_cast<uintptr_t>(dsp_addr);
-  for (const auto& v : dsp_to_apps_) {
-    uintptr_t dsp_k = reinterpret_cast<uintptr_t>(v.first);
-    size_t size = v.second.second;
-    if (dsp_v >= dsp_k && dsp_v < dsp_k + size) {
-      uintptr_t apps_k = reinterpret_cast<uintptr_t>(v.second.first);
-      size_t offset = dsp_v - dsp_k;
-      uintptr_t apps_v = apps_k + offset;
-      return std::make_pair(reinterpret_cast<void*>(apps_v), size - offset);
-    }
-  }
-  TVM_LOGE_HT("failed to locate apps address for dsp:%p", dsp_addr);
-  return std::make_pair(nullptr, 0);
-}
-
-int HexagonTarget::OpenDomainChannel(bool use_unsigned_pd) {
-  if (domain_channel_handle_ != AEE_EUNKNOWN) return AEE_SUCCESS;
-
-  const DspRpcAPI* dsp_api = DspRpcAPI::Global();
-  const StubAPI* stub_api = StubAPI::Global();
-
-  stub_api->rpcmem_init_ptr()();
-
-  if (auto* rsc_ptr = dsp_api->remote_session_control_ptr(true)) {
-    remote_rpc_thread_params th_data;
-    th_data.domain = CDSP_DOMAIN_ID;
-    th_data.stack_size = kStackSize;
-    th_data.prio = -1;  // Default priority.
-    int rc = rsc_ptr(FASTRPC_THREAD_PARAMS, &th_data, sizeof(th_data));
-    if (rc != AEE_SUCCESS) {
-      TVM_LOGE_HT("remote_session_control failed rc=%08x for stack size", rc);
-    }
-    if (use_unsigned_pd) {
-      remote_rpc_control_unsigned_module data;
-      data.enable = 1;
-      data.domain = CDSP_DOMAIN_ID;
-      int rc = rsc_ptr(DSPRPC_CONTROL_UNSIGNED_MODULE, &data, sizeof(data));
-      if (rc != AEE_SUCCESS) {
-        TVM_LOGE_HT("remote_session_control failed rc=%08x for unsigned PD", rc);
-      }
-    }
-  } else {
-    TVM_LOGD_HT("remote_session_control not available");
-  }
-
-  int rc = stub_api->tvm_remote_open(tvm_remote_URI "&_dom=cdsp", &domain_channel_handle_);
-  if (rc != AEE_SUCCESS) {
-    TVM_LOGE_HT("failed to open channel rc=0x%x", rc);
-  } else {
-    count_channel_open_++;
-    TVM_LOGD_HT("channel open success and rpcmem_init done");
-  }
-  return rc;
-}
-
-int HexagonTarget::CloseDomainChannel() {
-  if (domain_channel_handle_ == AEE_EUNKNOWN) return AEE_SUCCESS;
-
-  const StubAPI* stub_api = StubAPI::Global();
-
-  int rc = stub_api->tvm_remote_close(domain_channel_handle_);
-  if (rc == AEE_SUCCESS) {
-    domain_channel_handle_ = AEE_EUNKNOWN;
-    stub_api->rpcmem_deinit_ptr()();
-    TVM_LOGD_HT("channel close success and rpcmem_deinit done");
-  } else {
-    TVM_LOGE_HT("failed to close domain channel rc=0x%x", rc);
-  }
-  return rc;
-}
-
-void HexagonTarget::ReleaseLibrary() {
-  crit_section_.lock();
-  if (module_pointer_ != AEE_EUNKNOWN) {
-    const StubAPI* stub_api = StubAPI::Global();
-    int rc = stub_api->tvm_remote_release_library(domain_channel_handle_, module_pointer_);
-    if (rc != AEE_SUCCESS) {
-      TVM_LOGE_HT("failed to unload device library rc=0x%x", rc);
-    } else {
-      module_pointer_ = AEE_EUNKNOWN;
-    }
-  }
-  crit_section_.unlock();
-}
-
-void HexagonTarget::FreeMemoryBeforeChannelClose() {
-  while (!dsp_to_apps_.empty()) {
-    void* dsp_addr = const_cast<void*>((dsp_to_apps_.begin()->first));
-    TVM_LOGD_HT("Freeing up dsp_addr %p", dsp_addr);
-    HexagonTarget::Free(dsp_addr);
-  }
-}
-
-void* HexagonTarget::Alloc(unsigned size, unsigned align) {
-  const DspRpcAPI* dsp_api = DspRpcAPI::Global();
-  const StubAPI* stub_api = StubAPI::Global();
-
-  // Opening the domain channel should be done once.
-  crit_section_.lock();
-  int rc_oc = OpenDomainChannel(/*use_unsigned_pd*/ unsigned_pd);
-  crit_section_.unlock();
-  if (rc_oc != AEE_SUCCESS) {
-    TVM_LOGE_HT("mem alloc failed: unable to open domain channel");
-    return nullptr;
-  }
-
-  // This is a workaround. If HexagonTarget::Alloc is called from a different
-  // thread then remote_mmap64 fails. FastRPC expects one call to be made to
-  // DSP before calling remote_map64. Hence this call is needed for now untill
-  // FastRPC comes up with a fix.
-  int rc_call_mmap_64 = stub_api->tvm_remote_call_mmap64(domain_channel_handle_);
-  if (rc_call_mmap_64 != AEE_SUCCESS) {
-    TVM_LOGE_HT("mmap64 failed for domain channel %lu", domain_channel_handle_);
-    return nullptr;
-  }
-
-  void* mem = stub_api->rpcmem_alloc_ptr()(RPCMEM_HEAP, RPCMEM_DEFAULT_FLAGS, size);
-  if (mem == nullptr) {
-    TVM_LOGE_HT("mem alloc failed for size=0x%x alignment=0x%x", size, align);
-    return nullptr;
-  }
-  int mem_fd = stub_api->rpcmem_to_fd_ptr()(mem);
-  uintptr_t dsp_va = 0;
-  int rc = dsp_api->remote_mmap64_ptr()(mem_fd, 0, reinterpret_cast<uintptr_t>(mem), size, &dsp_va);
-  if (rc != AEE_SUCCESS) {
-    TVM_LOGE_HT(
-        "buffer mapping failed for remote_map64 fd=0x%x rc=0x%x "
-        "apps_addr=0x%lx",
-        mem_fd, rc, reinterpret_cast<uintptr_t>(mem));
-    return nullptr;
-  }
-
-  void* dsp_addr = reinterpret_cast<void*>(dsp_va);
-  AddAddrMapping(dsp_addr, mem, size);
-  return dsp_addr;
-}
-
-void HexagonTarget::Free(void* ptr) {
-  const DspRpcAPI* dsp_api = DspRpcAPI::Global();
-  const StubAPI* stub_api = StubAPI::Global();
-  auto bb = GetAppsAddr(ptr, true);
-  if (bb.first == vtcm_mark_) {
-    TVM_LOGD_HT("VTCM mapping found. dsp_addr=0x%p", ptr);
-    RemoveAddrMapping(ptr);
-    FreeVtcm(ptr);
-    return;
-  }
-
-  TVM_LOGD_HT("VTCM mapping not found. dsp_addr=0x%p", ptr);
-  auto aa = GetAppsAddr(ptr, true);
-  if (aa.first == nullptr) return;
-
-  int rc = dsp_api->remote_munmap64_ptr()(reinterpret_cast<uintptr_t>(ptr), aa.second);
-  if (rc != AEE_SUCCESS) {
-    TVM_LOGE_HT("buffer unmapping failed rc=0x%x", rc);
-  }
-  RemoveAddrMapping(ptr);
-  stub_api->rpcmem_free_ptr()(aa.first);
-}
-
-void* HexagonTarget::AllocVtcm(unsigned size, unsigned align) {
-  const StubAPI* stub_api = StubAPI::Global();
-
-  unsigned int dsp_va = 0;
-  int rc = stub_api->tvm_remote_alloc_vtcm(domain_channel_handle_, size, align, &dsp_va);
-  if (rc != AEE_SUCCESS) {
-    TVM_LOGE_HT("VTCM allocation failed size=%u, align=%u", size, align);
-    return nullptr;
-  }
-  void* dsp_addr = reinterpret_cast<void*>(dsp_va);
-  TVM_LOGD_HT("Done vtcm alloc dsp:%p", dsp_addr);
-  AddAddrMapping(dsp_addr, vtcm_mark_, size);
-  return dsp_addr;
-}
-
-void HexagonTarget::FreeVtcm(void* ptr) {
-  const StubAPI* stub_api = StubAPI::Global();
-
-  TVM_LOGD_HT("%s:Calling vtcm free. ptr=%p", __func__, ptr);
-  uintptr_t dsp_va = reinterpret_cast<uintptr_t>(ptr);
-  int rc = stub_api->tvm_remote_free_vtcm(domain_channel_handle_, dsp_va);
-  if (rc != AEE_SUCCESS) {
-    TVM_LOGE_HT("VTCM deallocation failed");
-  }
-  TVM_LOGD_HT("Done VTCM free from HexagonTarget::FreeVtcm");
-}
-
-void HexagonTarget::CopyDeviceToDevice(void* dst, const void* src, unsigned len) {
-  auto aa_src = GetAppsAddr(src, false);
-  auto aa_dst = GetAppsAddr(dst, false);
-  if (aa_src.first == vtcm_mark_ || aa_dst.first == vtcm_mark_) {
-    TVM_LOGE_HT("VTCM address. Copy operation not supported");
-    return;
-  }
-  if (!aa_src.first || !aa_dst.first) {
-    TVM_LOGE_HT("copy failed, dsp:%p -> dsp:%p, len:%u", src, dst, len);
-    return;
-  }
-  if (aa_src.second < len) {
-    TVM_LOGD_HT(
-        "specified length:%u larger than source buffer size:%zu, copy "
-        "truncated",
-        len, aa_src.second);
-  }
-  if (aa_dst.second < len) {
-    TVM_LOGD_HT(
-        "specified length:%u larger than dest buffer size:%zu, copy "
-        "truncated",
-        len, aa_dst.second);
-  }
-  len = std::min({size_t(len), aa_src.second, aa_dst.second});
-  TVM_LOGD_HT("copy, dsp:%p(apps:%p) -> dsp:%p(apps:%p), len:%u", src, aa_src.first, dst,
-              aa_dst.first, len);
-  std::memcpy(aa_dst.first, aa_src.first, len);
-}
-
-void HexagonTarget::CopyDeviceToHost(void* host_dst, const void* src, unsigned len) {
-  auto aa = GetAppsAddr(src, false);
-  if (aa.first == vtcm_mark_) {
-    TVM_LOGE_HT("VTCM address. Copy operation not supported");
-    return;
-  }
-  if (!aa.first) {
-    TVM_LOGE_HT("copy failed, dsp:%p -> apps:%p, len:%u", src, host_dst, len);
-    return;
-  }
-  if (aa.second < len) {
-    TVM_LOGD_HT("specified length:%u larger than buffer size:%zu, copy truncated", len, aa.second);
-    len = aa.second;
-  }
-  TVM_LOGD_HT("copy, dsp:%p(apps:%p) -> apps:%p, len:%u", src, aa.first, host_dst, len);
-  std::memcpy(host_dst, aa.first, len);
-}
-
-void HexagonTarget::CopyHostToDevice(void* dst, const void* host_src, unsigned len) {
-  auto aa = GetAppsAddr(dst, false);
-  if (aa.first == vtcm_mark_) {
-    TVM_LOGE_HT("VTCM address. Copy operation not supported");
-    return;
-  }
-  if (!aa.first) {
-    TVM_LOGE_HT("copy failed, dsp:%p <- apps:%p, len:%u", dst, host_src, len);
-    return;
-  }
-  if (aa.second < len) {
-    TVM_LOGD_HT("specified length:%u larger than buffer size:%zu, copy truncated", len, aa.second);
-    len = aa.second;
-  }
-  TVM_LOGD_HT("copy, dsp:%p(apps:%p) <- apps:%p, len:%u", dst, aa.first, host_src, len);
-  std::memcpy(aa.first, host_src, len);
-}
-
-void* HexagonTarget::Load(const std::string& data, const std::string& fmt) {
-  crit_section_.lock();
-  int rc_oc = OpenDomainChannel(/*use_unsigned_pd*/ unsigned_pd);
-  crit_section_.unlock();
-  if (rc_oc != AEE_SUCCESS) {
-    TVM_LOGE_HT("loading of %s failed: unable to open domain channel", data.c_str());
-    return nullptr;
-  }
-
-  if (domain_channel_handle_ == AEE_EUNKNOWN) return nullptr;
-  ReleaseLibrary();
-
-  crit_section_.lock();
-  TVM_LOGD_HT("loading library %s ", data.c_str());
-  const StubAPI* stub_api = StubAPI::Global();
-  int rc = stub_api->tvm_remote_load_library(domain_channel_handle_, data.c_str(), data.size() + 1,
-                                             &module_pointer_);
-  if (rc != AEE_SUCCESS) {
-    TVM_LOGE_HT("failed to load device library rc=0x%x", rc);
-  }
-  crit_section_.unlock();
-
-  if (module_pointer_ != AEE_EUNKNOWN) {
-    return reinterpret_cast<void*>(module_pointer_);
-  } else {
-    return nullptr;
-  }
-}
-
-void HexagonTarget::Unload(void* mod) {
-  crit_section_.lock();
-  count_channel_open_--;
-  crit_section_.unlock();
-  if (count_channel_open_ == 0) FreeMemoryBeforeChannelClose();
-
-  ReleaseLibrary();
-  if (module_pointer_ != AEE_EUNKNOWN) return;
-
-  crit_section_.lock();
-  if (count_channel_open_ == 0) CloseDomainChannel();
-  crit_section_.unlock();
-}
-
-void* HexagonTarget::Resolve(const std::string& sym) {
-  const StubAPI* stub_api = StubAPI::Global();
-
-  tvm_remote_handle_t pf;
-  TVM_LOGD_HT("resolving symbol %s", sym.c_str());
-  int rc = stub_api->tvm_remote_get_symbol(domain_channel_handle_, module_pointer_, sym.c_str(),
-                                           sym.size() + 1, &pf);
-  if (rc != AEE_SUCCESS) {
-    TVM_LOGE_HT("failed to get symbol from CDSP rc=0x%x", rc);
-    return nullptr;
-  }
-  void* addr = reinterpret_cast<void*>(pf);
-  TVM_LOGD_HT("resolved %s -> %p", sym.c_str(), addr);
-  return addr;
-}
-
-void HexagonTarget::Call(void* func, uint32_t* scalar, unsigned scalar_num, uint32_t* stack,
-                         unsigned stack_num) {
-  uint64 pcycles = 0, execution_time_usec = 0;
-  auto scalar_octet = std::unique_ptr<tvm_remote_buffer[]>(new tvm_remote_buffer[scalar_num]);
-  auto stack_octet = std::unique_ptr<tvm_remote_buffer[]>(new tvm_remote_buffer[stack_num]);
-  TVM_LOGD_HT("scalars=%p, stack=%p", scalar, stack);
-
-  if (scalar_octet == nullptr || stack_octet == nullptr) {
-    TVM_LOGE_HT("mem alloc failed for scalar/stack octets");
-    return;
-  }
-  std::memset(scalar_octet.get(), 0, scalar_num * sizeof(tvm_remote_buffer));
-  std::memset(stack_octet.get(), 0, stack_num * sizeof(tvm_remote_buffer));
-
-  auto ProcessInputs = [this](uint32_t* inputs, tvm_remote_buffer* buffers, unsigned num) {
-    for (unsigned i = 0; i != num; ++i) {
-      void* ptr = reinterpret_cast<void*>(static_cast<uintptr_t>(inputs[i]));
-      auto aa = GetAppsAddr(ptr, false);
-      if (aa.first == vtcm_mark_) {
-        buffers[i].data = nullptr;
-        buffers[i].dataLen = 0;
-      } else if (aa.first) {
-        buffers[i].data = static_cast<unsigned char*>(aa.first);
-        buffers[i].dataLen = aa.second;
-      }
-    }
-  };
-
-  ProcessInputs(scalar, scalar_octet.get(), scalar_num);
-  ProcessInputs(stack, stack_octet.get(), stack_num);
-
-  auto ToString = [](const char* title, uint32_t* data, unsigned num) {
-    std::ostringstream log;
-    log << "  " << title << ':' << num << " {" << std::hex;
-    for (unsigned i = 0; i != num; ++i) log << ' ' << data[i];
-    log << " }";
-    return log.str();
-  };
-
-  TVM_LOGD_HT("%s", ToString("scalars", scalar, scalar_num).c_str());
-  TVM_LOGD_HT("%s", ToString("  stack", stack, stack_num).c_str());
-
-  const StubAPI* stub_api = StubAPI::Global();
-  int rc = stub_api->tvm_remote_kernel(
-      domain_channel_handle_, module_pointer_,
-      static_cast<tvm_remote_handle_t>(reinterpret_cast<uintptr_t>(func)),
-      reinterpret_cast<int*>(scalar), scalar_num, reinterpret_cast<int*>(stack), stack_num,
-      scalar_octet.get(), scalar_num, scalar_octet.get(), scalar_num, stack_octet.get(), stack_num,
-      stack_octet.get(), stack_num, &pcycles, &execution_time_usec);
-
-  if (rc != AEE_SUCCESS) {
-    TVM_LOGE_HT("failed to run kernel on CDSP rc=0x%x", rc);
-  } else {
-    TVM_LOGD_HT("kernel execution: %llu pcycles, %llu usec, scalar_num=%d", pcycles,
-                execution_time_usec, scalar_num);
-  }
-}
-
-}  // namespace hexagon
-}  // namespace runtime
-}  // namespace tvm
-
-#endif  // #ifdef __ANDROID__
diff --git a/src/runtime/hexagon/android/target/hexagon_dsprpcapi.cc b/src/runtime/hexagon/android/target/hexagon_dsprpcapi.cc
deleted file mode 100644
index a089684c41..0000000000
--- a/src/runtime/hexagon/android/target/hexagon_dsprpcapi.cc
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifdef __ANDROID__
-#include "hexagon_dsprpcapi.h"
-
-#include <dlfcn.h>
-#include <stdint.h>
-#include <tvm/runtime/logging.h>
-
-#include "hexagon_target_log.h"
-
-namespace tvm {
-namespace runtime {
-
-namespace hexagon {
-
-DspRpcAPI::DspRpcAPI() {
-  ICHECK(lib_handle_ = dlopen(rpc_lib_name_, RTLD_LAZY | RTLD_LOCAL));
-
-#define RESOLVE(n) n##_ = GetSymbol<n##_t*>(#n)
-  RESOLVE(remote_handle_close);
-  RESOLVE(remote_handle_control);
-  RESOLVE(remote_handle_invoke);
-  RESOLVE(remote_handle_open);
-  RESOLVE(remote_mmap);
-  RESOLVE(remote_munmap);
-
-  RESOLVE(remote_handle64_close);
-  RESOLVE(remote_handle64_control);
-  RESOLVE(remote_handle64_invoke);
-  RESOLVE(remote_handle64_open);
-  RESOLVE(remote_mmap64);
-  RESOLVE(remote_munmap64);
-
-  RESOLVE(remote_register_buf);
-  RESOLVE(remote_register_buf_attr);
-  RESOLVE(remote_register_dma_handle);
-  RESOLVE(remote_register_dma_handle_attr);
-  RESOLVE(remote_register_fd);
-
-  RESOLVE(remote_session_control);
-  RESOLVE(remote_set_mode);
-
-  RESOLVE(rpcmem_init);
-  RESOLVE(rpcmem_deinit);
-  RESOLVE(rpcmem_alloc);
-  RESOLVE(rpcmem_free);
-  RESOLVE(rpcmem_to_fd);
-#undef RESOLVE
-}
-
-DspRpcAPI::~DspRpcAPI() {
-  if (lib_handle_) dlclose(lib_handle_);
-}
-
-template <typename T>
-T DspRpcAPI::GetSymbol(const char* sym) {
-  if (!lib_handle_) {
-    TVM_LOGE("error looking up symbol \"%s\": library not loaded", sym);
-    return nullptr;
-  }
-  dlerror();  // Clear any previous errror conditions.
-  if (T ret = reinterpret_cast<T>(dlsym(lib_handle_, sym))) {
-    return ret;
-  }
-
-  const char* err = dlerror();
-  const char* err_txt = err ? err : "symbol not found";
-  TVM_LOGD("error looking up symbol \"%s\": %s", sym, err_txt);
-  return nullptr;
-}
-
-const DspRpcAPI* DspRpcAPI::Global() {
-  static const DspRpcAPI dsp_api;
-  return &dsp_api;
-}
-
-}  // namespace hexagon
-
-}  // namespace runtime
-}  // namespace tvm
-
-#endif  // __ANDROID__
diff --git a/src/runtime/hexagon/android/target/hexagon_dsprpcapi.h b/src/runtime/hexagon/android/target/hexagon_dsprpcapi.h
deleted file mode 100644
index a3d186e302..0000000000
--- a/src/runtime/hexagon/android/target/hexagon_dsprpcapi.h
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifndef TVM_RUNTIME_HEXAGON_ANDROID_TARGET_HEXAGON_DSPRPCAPI_H_
-#define TVM_RUNTIME_HEXAGON_ANDROID_TARGET_HEXAGON_DSPRPCAPI_H_
-
-#ifdef __ANDROID__
-#include <stdint.h>
-#include <tvm/runtime/logging.h>
-
-#include "remote.h"
-#include "remote64.h"
-#include "rpcmem.h"
-
-namespace tvm {
-namespace runtime {
-
-namespace hexagon {
-
-/*!
- * Encapsulation of the API of lib(a|c)dsprpc.so (loaded via dlopen), allowing
- * for having versions of the library that do not implement all of the
- * functions.
- *
- * Functions defined in the DSP RPC library:
- *   remote_handle_close
- *   remote_handle_control
- *   remote_handle_invoke
- *   remote_handle_open
- *   remote_mmap
- *   remote_munmap
- *
- *   remote_handle64_close
- *   remote_handle64_control
- *   remote_handle64_invoke
- *   remote_handle64_open
- *   remote_mmap64
- *   remote_munmap64
- *
- *   remote_register_buf
- *   remote_register_buf_attr
- *   remote_register_dma_handle
- *   remote_register_dma_handle_attr
- *   remote_register_fd
- *
- *   remote_session_control
- *   remote_set_mode
- *
- *   rpcmem_init
- *   rpcmem_deinit
- *   rpcmem_alloc
- *   rpcmem_free
- *   rpcmem_to_fd
- */
-class DspRpcAPI {
- public:
-  DspRpcAPI();
-  ~DspRpcAPI();
-
-  using remote_handle = ::remote_handle;
-  using remote_handle64 = ::remote_handle64;
-
-#define DECLTYPE(ty) using ty##_t = decltype(::ty);
-  DECLTYPE(remote_handle_close)
-  DECLTYPE(remote_handle_control)
-  DECLTYPE(remote_handle_invoke)
-  DECLTYPE(remote_handle_open)
-  DECLTYPE(remote_mmap)
-  DECLTYPE(remote_munmap)
-
-  DECLTYPE(remote_handle64_close)
-  DECLTYPE(remote_handle64_control)
-  DECLTYPE(remote_handle64_invoke)
-  DECLTYPE(remote_handle64_open)
-  DECLTYPE(remote_mmap64)
-  DECLTYPE(remote_munmap64)
-
-  DECLTYPE(remote_register_buf)
-  DECLTYPE(remote_register_buf_attr)
-  DECLTYPE(remote_register_dma_handle)
-  DECLTYPE(remote_register_dma_handle_attr)
-  DECLTYPE(remote_register_fd)
-
-  DECLTYPE(remote_session_control)
-  DECLTYPE(remote_set_mode)
-
-  DECLTYPE(rpcmem_init)
-  DECLTYPE(rpcmem_deinit)
-  DECLTYPE(rpcmem_alloc)
-  DECLTYPE(rpcmem_free)
-  DECLTYPE(rpcmem_to_fd)
-#undef DECLTYPE
-
-#define DECLFUNC(fn)                                   \
-  fn##_t* fn##_ptr(bool allow_nullptr = false) const { \
-    if (!allow_nullptr) ICHECK(fn##_ != nullptr);      \
-    return fn##_;                                      \
-  }
-  DECLFUNC(remote_handle_close)
-  DECLFUNC(remote_handle_control)
-  DECLFUNC(remote_handle_invoke)
-  DECLFUNC(remote_handle_open)
-  DECLFUNC(remote_mmap)
-  DECLFUNC(remote_munmap)
-
-  DECLFUNC(remote_handle64_close)
-  DECLFUNC(remote_handle64_control)
-  DECLFUNC(remote_handle64_invoke)
-  DECLFUNC(remote_handle64_open)
-  DECLFUNC(remote_mmap64)
-  DECLFUNC(remote_munmap64)
-
-  DECLFUNC(remote_register_buf)
-  DECLFUNC(remote_register_buf_attr)
-  DECLFUNC(remote_register_dma_handle)
-  DECLFUNC(remote_register_dma_handle_attr)
-  DECLFUNC(remote_register_fd)
-
-  DECLFUNC(remote_session_control)
-  DECLFUNC(remote_set_mode)
-
-  DECLFUNC(rpcmem_init)
-  DECLFUNC(rpcmem_deinit)
-  DECLFUNC(rpcmem_alloc)
-  DECLFUNC(rpcmem_free)
-  DECLFUNC(rpcmem_to_fd)
-#undef DECLFUNC
-
-  static const DspRpcAPI* Global();
-
- private:
-  static constexpr const char* rpc_lib_name_ = "libadsprpc.so";
-  void* lib_handle_ = nullptr;
-
-#define DECLPTR(p) p##_t* p##_ = nullptr;
-  DECLPTR(remote_handle_close)
-  DECLPTR(remote_handle_control)
-  DECLPTR(remote_handle_invoke)
-  DECLPTR(remote_handle_open)
-  DECLPTR(remote_mmap)
-  DECLPTR(remote_munmap)
-
-  DECLPTR(remote_handle64_close)
-  DECLPTR(remote_handle64_control)
-  DECLPTR(remote_handle64_invoke)
-  DECLPTR(remote_handle64_open)
-  DECLPTR(remote_mmap64)
-  DECLPTR(remote_munmap64)
-
-  DECLPTR(remote_register_buf)
-  DECLPTR(remote_register_buf_attr)
-  DECLPTR(remote_register_dma_handle)
-  DECLPTR(remote_register_dma_handle_attr)
-  DECLPTR(remote_register_fd)
-
-  DECLPTR(remote_session_control)
-  DECLPTR(remote_set_mode)
-
-  DECLPTR(rpcmem_init)
-  DECLPTR(rpcmem_deinit)
-  DECLPTR(rpcmem_alloc)
-  DECLPTR(rpcmem_free)
-  DECLPTR(rpcmem_to_fd)
-#undef DECLPTR
-
-  template <typename T>
-  T GetSymbol(const char* sym);
-};
-
-}  // namespace hexagon
-
-}  // namespace runtime
-}  // namespace tvm
-
-#endif  // __ANDROID__
-#endif  // TVM_RUNTIME_HEXAGON_ANDROID_TARGET_HEXAGON_DSPRPCAPI_H_
diff --git a/src/runtime/hexagon/android/target/hexagon_stubapi.cc b/src/runtime/hexagon/android/target/hexagon_stubapi.cc
deleted file mode 100644
index 1fb7d942e9..0000000000
--- a/src/runtime/hexagon/android/target/hexagon_stubapi.cc
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifdef __ANDROID__
-#include "hexagon_stubapi.h"
-
-#include <dlfcn.h>
-#include <stdint.h>
-#include <sys/stat.h>
-#include <tvm/runtime/logging.h>
-
-#include "hexagon_target_log.h"
-
-namespace tvm {
-namespace runtime {
-namespace hexagon {
-
-StubAPI::StubAPI() {
-  struct stat sb;
-  if (!stat("/dev/subsys_cdsp", &sb)) {
-    enable_domains_ = true;
-    TVM_LOGD("CDSP subsystem present");
-  } else if (!stat("/dev/subsys_adsp", &sb)) {
-    enable_domains_ = false;
-    TVM_LOGD("ADSP subsystem present");
-  }
-
-  constexpr auto domain_lib_name = "libtvm_remote_stub.so";
-  constexpr auto nondomain_lib_name = "libtvm_remote_nd_stub.so";
-
-  const char* lib_name = enable_domains_ ? domain_lib_name : nondomain_lib_name;
-  ICHECK(lib_handle_ = dlopen(lib_name, RTLD_LAZY | RTLD_LOCAL));
-
-#define RESOLVE(fn) p##fn##_ = GetSymbol<fn##_t*>(#fn)
-  if (enable_domains_) {
-    RESOLVE(tvm_remote_load_library);
-    RESOLVE(tvm_remote_release_library);
-    RESOLVE(tvm_remote_get_symbol);
-    RESOLVE(tvm_remote_kernel);
-    RESOLVE(tvm_remote_open);
-    RESOLVE(tvm_remote_close);
-    RESOLVE(tvm_remote_alloc_vtcm);
-    RESOLVE(tvm_remote_free_vtcm);
-    RESOLVE(tvm_remote_call_mmap64);
-  } else {
-    RESOLVE(tvm_remote_nd_load_library);
-    RESOLVE(tvm_remote_nd_release_library);
-    RESOLVE(tvm_remote_nd_get_symbol);
-    RESOLVE(tvm_remote_nd_kernel);
-    RESOLVE(tvm_remote_nd_open);
-    RESOLVE(tvm_remote_nd_call_mmap64);
-  }
-
-  RESOLVE(rpcmem_init);
-  RESOLVE(rpcmem_deinit);
-  RESOLVE(rpcmem_alloc);
-  RESOLVE(rpcmem_free);
-  RESOLVE(rpcmem_to_fd);
-#undef RESOLVE
-}
-
-StubAPI::~StubAPI() {
-  if (lib_handle_) dlclose(lib_handle_);
-}
-
-template <typename T>
-T StubAPI::GetSymbol(const char* sym) {
-  if (!lib_handle_) {
-    TVM_LOGE("error looking up symbol \"%s\": library not loaded", sym);
-    return nullptr;
-  }
-  dlerror();  // Clear any previous errror conditions.
-  if (T ret = reinterpret_cast<T>(dlsym(lib_handle_, sym))) {
-    return ret;
-  }
-
-  const char* err = dlerror();
-  const char* err_txt = err ? err : "symbol not found";
-  TVM_LOGE("error looking up symbol \"%s\": %s", sym, err_txt);
-  return nullptr;
-}
-
-const StubAPI* StubAPI::Global() {
-  static const StubAPI stub_api;
-  return &stub_api;
-}
-
-}  // namespace hexagon
-}  // namespace runtime
-}  // namespace tvm
-
-#endif  // __ANDROID__
diff --git a/src/runtime/hexagon/android/target/hexagon_stubapi.h b/src/runtime/hexagon/android/target/hexagon_stubapi.h
deleted file mode 100644
index feb329f5ce..0000000000
--- a/src/runtime/hexagon/android/target/hexagon_stubapi.h
+++ /dev/null
@@ -1,315 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifndef TVM_RUNTIME_HEXAGON_ANDROID_TARGET_HEXAGON_STUBAPI_H_
-#define TVM_RUNTIME_HEXAGON_ANDROID_TARGET_HEXAGON_STUBAPI_H_
-
-#ifdef __ANDROID__
-#include <AEEStdErr.h>
-#include <rpcmem.h>
-#include <stdint.h>
-#include <tvm/runtime/logging.h>
-
-#include <tuple>
-
-#include "fastrpc/include/tvm_remote.h"
-#include "fastrpc/include/tvm_remote_nd.h"
-
-namespace tvm {
-namespace runtime {
-namespace hexagon {
-
-/*!
- * Unify the handling of domain and non-domain functions.
- *
- * In most cases, for a function "foo", the domain version will be called
- * "tvm_remote_foo", and the non-domain version will have "nd_foo".
- * The interfaces will be the same, except:
- * - the domain version will take "remote_handle64" as the first parameter,
- *   while the non-domain version will not:
- *   int tvm_remote_foo     (remote_handle64 h, param1, param2, ...);
- *   int tvm_remote_nd_foo                     (param1, param2, ...);
- * - any parameter of type "buffer" in the IDL, will be converted into a
- *   type "tvm_remote_buffer" for domain functions, and into
- *   "tvm_remote_nd_buffer" for non-domain functions. These two
- *   types are identical, but since they are declared in two different IDLs,
- *   they get different names.
- *
- * For any function, only a pointer to the "buffer" type is passed, but
- * since the pointee types are different, this is enough to create a
- * difference in the function signatures even if the "remote_handle64"
- * parameter is ignored. For this reason, in all function types, the
- * types "tvm_remote_buffer *" and "tvm_remote_nd_buffer *",
- * both const and non-const, are replaced with "void *", with the
- * corresponding const-qualification. This is done by the templates
- * "replace_pointee_type" and "map_tuple_element" below.
- *
- * The following functions are subject to the uniform handling:
- *
- *   tvm_remote_load_library     (remote_handle64 h, p1, p2, ...)
- *   tvm_remote_release_library
- *   tvm_remote_get_symbol
- *   tvm_remote_kernel
- *   tvm_remote_close
- *   tvm_remote_alloc_vtcm
- *   tvm_remote_free_vtcm
- *
- *   tvm_remote_nd_load_library  (p1, p2, ...)
- *   tvm_remote_nd_release_library
- *   tvm_remote_nd_get_symbol
- *   tvm_remote_nd_kernel
- *   tvm_remote_nd_close
- *
- * The "open" functions differ in their parameters in different ways, and
- * need to be handled individually.
- *
- *   tvm_remote_open
- *   tvm_remote_nd_open
- */
-
-namespace {
-/*!
- * replace_pointee_type<T, M, V>
- *
- * If T is a pointer to a potentially const-qualified M, then replace
- * M in T with V. Otherwise, leave T unchanged.
- */
-template <typename T, typename M, typename V>
-struct replace_pointee_type {
-  using type = T;
-};
-
-template <typename M, typename V>
-struct replace_pointee_type<M*, M, V> {
-  using type = V*;
-};
-
-template <typename M, typename V>
-struct replace_pointee_type<const M*, M, V> {
-  using type = const V*;
-};
-
-/*!
- * map_tuple_elements<M, V, std::tuple<As...>>
- *
- * From given tuple <As...>, form another tuple where for each A in As,
- * if A contains a pointer to M, the pointer is replaced with a pointer
- * to V, leaving other types unchanged.
- */
-template <typename...>
-struct map_tuple_elements;
-
-template <typename M, typename V, typename... As>
-struct map_tuple_elements<M, V, std::tuple<As...>> {
-  using type = std::tuple<typename replace_pointee_type<As, M, V>::type...>;
-};
-
-/*!
- * map_func_type<M, V, F>
- *
- * Given function type F = R(As...), form another function type by replacing
- * each pointer to M with a pointer to V.
- */
-template <typename M, typename V, typename F>
-struct map_func_type {
-  template <typename...>
-  struct func_to_tuple;
-  template <typename R, typename... As>
-  struct func_to_tuple<R(As...)> {
-    using args = std::tuple<As...>;
-    using ret = R;
-  };
-
-  template <typename R, typename... As>
-  struct tuple_to_func;
-  template <typename R, typename... As>
-  struct tuple_to_func<R, std::tuple<As...>> {
-    using func = R(As...);
-  };
-
-  using arg_tuple = typename func_to_tuple<F>::args;
-  using ret_type = typename func_to_tuple<F>::ret;
-  using mapped_args = typename map_tuple_elements<M, V, arg_tuple>::type;
-  using type = typename tuple_to_func<ret_type, mapped_args>::func;
-};
-}  // namespace
-
-class StubAPI {
- public:
-  StubAPI();
-  ~StubAPI();
-
- private:
-  // Create types for each remote function. For functions that take
-  // a pointer to tvm_remote_buffer or tvm_remote_nd_buffer,
-  // replace that pointer with pointer to void to make pointers to these
-  // two types identical in the function types created below.
-  // For example, int foo(tvm_remote_buffer*) and
-  // int bar(tvm_remote_nd_buffer*) should both have the same type.
-#define MAPTYPE(fn, ty) using fn##_t = typename map_func_type<ty, void, decltype(::fn)>::type;
-  MAPTYPE(tvm_remote_load_library, tvm_remote_buffer)
-  MAPTYPE(tvm_remote_release_library, tvm_remote_buffer)
-  MAPTYPE(tvm_remote_get_symbol, tvm_remote_buffer)
-  MAPTYPE(tvm_remote_kernel, tvm_remote_buffer)
-  MAPTYPE(tvm_remote_close, tvm_remote_buffer)
-  MAPTYPE(tvm_remote_alloc_vtcm, tvm_remote_buffer)
-  MAPTYPE(tvm_remote_free_vtcm, tvm_remote_buffer)
-  MAPTYPE(tvm_remote_call_mmap64, tvm_remote_buffer)
-
-  MAPTYPE(tvm_remote_nd_load_library, tvm_remote_nd_buffer)
-  MAPTYPE(tvm_remote_nd_release_library, tvm_remote_nd_buffer)
-  MAPTYPE(tvm_remote_nd_get_symbol, tvm_remote_nd_buffer)
-  MAPTYPE(tvm_remote_nd_kernel, tvm_remote_nd_buffer)
-  MAPTYPE(tvm_remote_nd_close, tvm_remote_buffer)
-  MAPTYPE(tvm_remote_nd_call_mmap64, tvm_remote_buffer)
-#undef MAPTYPE
-
-  // For remote functions whose prototypes differ significantly between
-  // the domain and non-domain versions, create the types directly.
-#define DECLTYPE(fn) using fn##_t = decltype(::fn);
-  DECLTYPE(tvm_remote_open)
-  DECLTYPE(tvm_remote_nd_open)
-
-  DECLTYPE(rpcmem_init)
-  DECLTYPE(rpcmem_deinit)
-  DECLTYPE(rpcmem_alloc)
-  DECLTYPE(rpcmem_free)
-  DECLTYPE(rpcmem_to_fd)
-#undef DECLTYPE
-
- public:
-  template <typename Fd, typename Fnd, typename... Ts>
-  int invoke(Fd func_d, Fnd func_nd, remote_handle64 handle, Ts... args) const {
-    if (enable_domains_) {
-      return func_d(handle, args...);
-    }
-    return func_nd(args...);
-  }
-  template <typename Fd, typename... Ts>
-  int invoke_d(Fd func_d, remote_handle64 handle, Ts... args) const {
-    if (enable_domains_) {
-      return func_d(handle, args...);
-    }
-    return 0;
-  }
-
-#define CONCAT_STR_FOR_REAL(a, b) a##b
-#define CONCAT_STR(a, b) CONCAT_STR_FOR_REAL(a, b)
-
-#define FUNC(name) CONCAT_STR(tvm_remote_, name)
-#define FUNC_D(name) CONCAT_STR(tvm_remote_, name)
-#define FUNC_ND(name) CONCAT_STR(tvm_remote_nd_, name)
-#define PTRNAME(fn) CONCAT_STR(p, CONCAT_STR(fn, _))
-
-#define DECLFUNC(name)                                                             \
-  template <typename... Ts>                                                        \
-  int FUNC(name)(remote_handle64 handle, Ts... args) const {                       \
-    return invoke(PTRNAME(FUNC_D(name)), PTRNAME(FUNC_ND(name)), handle, args...); \
-  }
-
-#define DECLFUNC_D(name)                                     \
-  template <typename... Ts>                                  \
-  int FUNC(name)(remote_handle64 handle, Ts... args) const { \
-    return invoke_d(PTRNAME(FUNC_D(name)), handle, args...); \
-  }
-
-  DECLFUNC(load_library)
-  DECLFUNC(release_library)
-  DECLFUNC(get_symbol)
-  DECLFUNC(kernel)
-  DECLFUNC(close)
-  DECLFUNC_D(alloc_vtcm)
-  DECLFUNC_D(free_vtcm)
-  DECLFUNC(call_mmap64)
-#undef DECLFUNC
-
-// Implementations provided here in case the target does not have these
-// in lib[ac]dsprpc.so.
-#define DECLSFUNC(fn) \
-  fn##_t* fn##_ptr() const { return p##fn##_; }
-  DECLSFUNC(rpcmem_init)
-  DECLSFUNC(rpcmem_deinit)
-  DECLSFUNC(rpcmem_alloc)
-  DECLSFUNC(rpcmem_free)
-  DECLSFUNC(rpcmem_to_fd)
-#undef DECLSFUNC
-#undef DECLFUNC_D
-
-  int tvm_remote_open(const char* uri, remote_handle64* handle) const {
-    if (enable_domains_) {
-      return PTRNAME(tvm_remote_open)(uri, handle);
-    }
-    return PTRNAME(tvm_remote_nd_open)();
-  }
-
-  static const StubAPI* Global();
-
- private:
-  bool enable_domains_ = true;
-  void* lib_handle_ = nullptr;
-
-#define DECLPTR(fn) fn##_t* PTRNAME(fn) = nullptr
-  DECLPTR(tvm_remote_load_library);
-  DECLPTR(tvm_remote_release_library);
-  DECLPTR(tvm_remote_get_symbol);
-  DECLPTR(tvm_remote_kernel);
-  DECLPTR(tvm_remote_open);
-  DECLPTR(tvm_remote_close);
-  DECLPTR(tvm_remote_alloc_vtcm);
-  DECLPTR(tvm_remote_free_vtcm);
-  DECLPTR(tvm_remote_call_mmap64);
-
-  DECLPTR(tvm_remote_nd_load_library);
-  DECLPTR(tvm_remote_nd_release_library);
-  DECLPTR(tvm_remote_nd_get_symbol);
-  DECLPTR(tvm_remote_nd_kernel);
-  DECLPTR(tvm_remote_nd_open);
-  DECLPTR(tvm_remote_nd_close);
-  DECLPTR(tvm_remote_nd_call_mmap64);
-#undef DECLPTR
-
-// "System" functions.
-#define DECLSPTR(fn) fn##_t* p##fn##_ = nullptr;
-  // Implementations provided here in case the target does not have these
-  // in lib[ac]dsprpc.so.
-  DECLSPTR(rpcmem_init);
-  DECLSPTR(rpcmem_deinit);
-  DECLSPTR(rpcmem_alloc);
-  DECLSPTR(rpcmem_free);
-  DECLSPTR(rpcmem_to_fd);
-#undef DECLSPTR
-
-#undef PTRNAME
-#undef FUNC_ND
-#undef FUNC_D
-#undef FUNC
-#undef CONCAT_STR
-#undef CONCAT_STR_FOR_REAL
-
-  template <typename T>
-  T GetSymbol(const char* sym);
-};
-
-}  // namespace hexagon
-
-}  // namespace runtime
-}  // namespace tvm
-
-#endif  // __ANDROID__
-#endif  // TVM_RUNTIME_HEXAGON_ANDROID_TARGET_HEXAGON_STUBAPI_H_
diff --git a/src/runtime/hexagon/android/target/hexagon_target_log.h b/src/runtime/hexagon/android/target/hexagon_target_log.h
deleted file mode 100644
index f8ba6a74e3..0000000000
--- a/src/runtime/hexagon/android/target/hexagon_target_log.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifndef TVM_RUNTIME_HEXAGON_ANDROID_TARGET_HEXAGON_TARGET_LOG_H_
-#define TVM_RUNTIME_HEXAGON_ANDROID_TARGET_HEXAGON_TARGET_LOG_H_
-#ifdef __ANDROID__
-
-#include <android/log.h>
-
-#define TVM_LOGV(...) __android_log_print(ANDROID_LOG_VERBOSE, "TVM", ##__VA_ARGS__)
-#define TVM_LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, "TVM", ##__VA_ARGS__)
-#define TVM_LOGI(...) __android_log_print(ANDROID_LOG_INFO, "TVM", ##__VA_ARGS__)
-#define TVM_LOGW(...) __android_log_print(ANDROID_LOG_WARN, "TVM", ##__VA_ARGS__)
-#define TVM_LOGE(...) __android_log_print(ANDROID_LOG_ERROR, "TVM", ##__VA_ARGS__)
-#define TVM_LOGF(...) __android_log_print(ANDROID_LOG_FATAL, "TVM", ##__VA_ARGS__)
-
-#endif  // __ANDROID__
-#endif  // TVM_RUNTIME_HEXAGON_ANDROID_TARGET_HEXAGON_TARGET_LOG_H_
diff --git a/src/runtime/hexagon/hexagon/hexagon_buffer.cc b/src/runtime/hexagon/hexagon_buffer.cc
similarity index 100%
rename from src/runtime/hexagon/hexagon/hexagon_buffer.cc
rename to src/runtime/hexagon/hexagon_buffer.cc
diff --git a/src/runtime/hexagon/hexagon/hexagon_buffer.h b/src/runtime/hexagon/hexagon_buffer.h
similarity index 97%
rename from src/runtime/hexagon/hexagon/hexagon_buffer.h
rename to src/runtime/hexagon/hexagon_buffer.h
index aa43209501..8cb8a32095 100644
--- a/src/runtime/hexagon/hexagon/hexagon_buffer.h
+++ b/src/runtime/hexagon/hexagon_buffer.h
@@ -17,8 +17,8 @@
  * under the License.
  */
 
-#ifndef TVM_RUNTIME_HEXAGON_HEXAGON_HEXAGON_BUFFER_H_
-#define TVM_RUNTIME_HEXAGON_HEXAGON_HEXAGON_BUFFER_H_
+#ifndef TVM_RUNTIME_HEXAGON_HEXAGON_BUFFER_H_
+#define TVM_RUNTIME_HEXAGON_HEXAGON_BUFFER_H_
 
 #include <tvm/runtime/c_runtime_api.h>
 #include <tvm/runtime/device_api.h>
@@ -199,4 +199,4 @@ struct BufferSet {
 }  // namespace runtime
 }  // namespace tvm
 
-#endif  // TVM_RUNTIME_HEXAGON_HEXAGON_HEXAGON_BUFFER_H_
+#endif  // TVM_RUNTIME_HEXAGON_HEXAGON_BUFFER_H_
diff --git a/src/runtime/hexagon/hexagon/hexagon_common.cc b/src/runtime/hexagon/hexagon_common.cc
similarity index 98%
rename from src/runtime/hexagon/hexagon/hexagon_common.cc
rename to src/runtime/hexagon/hexagon_common.cc
index 4fb2af8acd..3a3a32a5cb 100644
--- a/src/runtime/hexagon/hexagon/hexagon_common.cc
+++ b/src/runtime/hexagon/hexagon_common.cc
@@ -31,7 +31,7 @@
 #include <utility>
 #include <vector>
 
-#include "../../library_module.h"
+#include "../library_module.h"
 #include "hexagon_buffer.h"
 
 #if defined(__hexagon__)
@@ -97,5 +97,6 @@ TVM_REGISTER_GLOBAL("runtime.module.loadfile_hexagon").set_body([](TVMArgs args,
   ObjectPtr<Library> n = CreateDSOLibraryObject(args[0]);
   *rv = CreateModuleFromLibrary(n);
 });
+
 }  // namespace runtime
 }  // namespace tvm
diff --git a/src/runtime/hexagon/hexagon/hexagon_common.h b/src/runtime/hexagon/hexagon_common.h
similarity index 91%
rename from src/runtime/hexagon/hexagon/hexagon_common.h
rename to src/runtime/hexagon/hexagon_common.h
index 9e534bdaf1..9f304836fc 100644
--- a/src/runtime/hexagon/hexagon/hexagon_common.h
+++ b/src/runtime/hexagon/hexagon_common.h
@@ -20,8 +20,8 @@
 /*!
  * \file hexagon_utils.h
  */
-#ifndef TVM_RUNTIME_HEXAGON_HEXAGON_HEXAGON_COMMON_H_
-#define TVM_RUNTIME_HEXAGON_HEXAGON_HEXAGON_COMMON_H_
+#ifndef TVM_RUNTIME_HEXAGON_HEXAGON_COMMON_H_
+#define TVM_RUNTIME_HEXAGON_HEXAGON_COMMON_H_
 
 #include <dlpack/dlpack.h>
 #include <tvm/runtime/c_backend_api.h>
@@ -50,4 +50,4 @@ inline bool IsHexagonDevice(DLDevice dev) {
 
 constexpr int kHexagonAllocAlignment = 2048;
 
-#endif  // TVM_RUNTIME_HEXAGON_HEXAGON_HEXAGON_COMMON_H_
+#endif  // TVM_RUNTIME_HEXAGON_HEXAGON_COMMON_H_
diff --git a/src/runtime/hexagon/hexagon/hexagon_device_api_v2.cc b/src/runtime/hexagon/hexagon_device_api_v2.cc
similarity index 99%
rename from src/runtime/hexagon/hexagon/hexagon_device_api_v2.cc
rename to src/runtime/hexagon/hexagon_device_api_v2.cc
index ebd826b2c7..8da66ad1d0 100644
--- a/src/runtime/hexagon/hexagon/hexagon_device_api_v2.cc
+++ b/src/runtime/hexagon/hexagon_device_api_v2.cc
@@ -31,7 +31,7 @@
 #include <cstdlib>
 #include <cstring>
 
-#include "../../workspace_pool.h"
+#include "../workspace_pool.h"
 #include "hexagon_buffer.h"
 #include "hexagon_common.h"
 
diff --git a/src/runtime/hexagon/hexagon/hexagon_device_api_v2.h b/src/runtime/hexagon/hexagon_device_api_v2.h
similarity index 96%
rename from src/runtime/hexagon/hexagon/hexagon_device_api_v2.h
rename to src/runtime/hexagon/hexagon_device_api_v2.h
index 96805e55bb..84ff5aee1f 100644
--- a/src/runtime/hexagon/hexagon/hexagon_device_api_v2.h
+++ b/src/runtime/hexagon/hexagon_device_api_v2.h
@@ -17,8 +17,8 @@
  * under the License.
  */
 
-#ifndef TVM_RUNTIME_HEXAGON_HEXAGON_HEXAGON_DEVICE_API_V2_H_
-#define TVM_RUNTIME_HEXAGON_HEXAGON_HEXAGON_DEVICE_API_V2_H_
+#ifndef TVM_RUNTIME_HEXAGON_HEXAGON_DEVICE_API_V2_H_
+#define TVM_RUNTIME_HEXAGON_HEXAGON_DEVICE_API_V2_H_
 
 #include <tvm/runtime/device_api.h>
 
@@ -148,4 +148,4 @@ class HexagonDeviceAPIv2 final : public DeviceAPI {
 }  // namespace hexagon
 }  // namespace runtime
 }  // namespace tvm
-#endif  // TVM_RUNTIME_HEXAGON_HEXAGON_HEXAGON_DEVICE_API_V2_H_
+#endif  // TVM_RUNTIME_HEXAGON_HEXAGON_DEVICE_API_V2_H_
diff --git a/src/runtime/hexagon/hexagon_module.cc b/src/runtime/hexagon/hexagon_module.cc
index 46881d9984..3f72070aeb 100644
--- a/src/runtime/hexagon/hexagon_module.cc
+++ b/src/runtime/hexagon/hexagon_module.cc
@@ -19,7 +19,7 @@
 
 /*!
  * \file hexagon_module.cc
- * \brief The HexagonHostModuleNode
+ * \brief The HexagonModuleNode
  */
 #include "hexagon_module.h"
 
@@ -36,27 +36,19 @@
 namespace tvm {
 namespace runtime {
 
-HexagonHostModuleNode::HexagonHostModuleNode(std::string data, std::string fmt,
-                                             std::unordered_map<std::string, FunctionInfo> fmap,
-                                             std::string asm_str, std::string obj_str,
-                                             std::string ir_str, std::string bc_str,
-                                             const std::set<std::string>& packed_c_abi)
-    : data_(data),
-      fmt_(fmt),
-      fmap_(fmap),
-      asm_(asm_str),
-      obj_(obj_str),
-      ir_(ir_str),
-      bc_(bc_str),
-      packed_c_abi_funcs_(packed_c_abi) {}
+HexagonModuleNode::HexagonModuleNode(std::string data, std::string fmt,
+                                     std::unordered_map<std::string, FunctionInfo> fmap,
+                                     std::string asm_str, std::string obj_str, std::string ir_str,
+                                     std::string bc_str)
+    : data_(data), fmt_(fmt), fmap_(fmap), asm_(asm_str), obj_(obj_str), ir_(ir_str), bc_(bc_str) {}
 
-PackedFunc HexagonHostModuleNode::GetFunction(const std::string& name,
-                                              const ObjectPtr<Object>& sptr_to_self) {
-  LOG(FATAL) << "HexagonHostModuleNode::GetFunction is not implemented.";
+PackedFunc HexagonModuleNode::GetFunction(const std::string& name,
+                                          const ObjectPtr<Object>& sptr_to_self) {
+  LOG(FATAL) << "HexagonModuleNode::GetFunction is not implemented.";
   return PackedFunc();
 }
 
-std::string HexagonHostModuleNode::GetSource(const std::string& format) {
+std::string HexagonModuleNode::GetSource(const std::string& format) {
   if (format == "s" || format == "asm") {
     return asm_;
   }
@@ -66,7 +58,7 @@ std::string HexagonHostModuleNode::GetSource(const std::string& format) {
   return "";
 }
 
-void HexagonHostModuleNode::SaveToFile(const std::string& file_name, const std::string& format) {
+void HexagonModuleNode::SaveToFile(const std::string& file_name, const std::string& format) {
   std::string fmt = runtime::GetFileFormat(file_name, format);
   if (fmt == "so" || fmt == "dll" || fmt == "hexagon") {
     std::string meta_file = GetMetaFilePath(file_name);
@@ -88,15 +80,22 @@ void HexagonHostModuleNode::SaveToFile(const std::string& file_name, const std::
     ICHECK(!bc_.empty()) << "LLVM IR bitcode not available";
     SaveBinaryToFile(file_name, bc_);
   } else {
-    LOG(FATAL) << "HexagonHostModuleNode::SaveToFile: unhandled format `" << fmt << "'";
+    LOG(FATAL) << "HexagonModuleNode::SaveToFile: unhandled format `" << fmt << "'";
   }
 }
 
-void HexagonHostModuleNode::SaveToBinary(dmlc::Stream* stream) {
+void HexagonModuleNode::SaveToBinary(dmlc::Stream* stream) {
   stream->Write(fmt_);
   stream->Write(fmap_);
   stream->Write(data_);
 }
 
+Module HexagonModuleCreate(std::string data, std::string fmt,
+                           std::unordered_map<std::string, FunctionInfo> fmap, std::string asm_str,
+                           std::string obj_str, std::string ir_str, std::string bc_str) {
+  auto n = make_object<HexagonModuleNode>(data, fmt, fmap, asm_str, obj_str, ir_str, bc_str);
+  return Module(n);
+}
+
 }  // namespace runtime
 }  // namespace tvm
diff --git a/src/runtime/hexagon/hexagon_module.h b/src/runtime/hexagon/hexagon_module.h
index dd73682a0c..aac75002c2 100644
--- a/src/runtime/hexagon/hexagon_module.h
+++ b/src/runtime/hexagon/hexagon_module.h
@@ -43,26 +43,22 @@ namespace runtime {
  * \param obj_str       String with the object file data.
  * \param ir_str        String with the disassembled LLVM IR source.
  * \param bc_str        String with the bitcode LLVM IR.
- * \param packed_c_abi  Set of names of functions using PackedC calling
- *                      convention.
  */
 Module HexagonModuleCreate(std::string data, std::string fmt,
                            std::unordered_map<std::string, FunctionInfo> fmap, std::string asm_str,
-                           std::string obj_str, std::string ir_str, std::string bc_str,
-                           const std::set<std::string>& packed_c_abi);
+                           std::string obj_str, std::string ir_str, std::string bc_str);
 
 /*!
-  \brief Module implementation for managing cross compiled hexagon
-         binaries on a host machine. Base class for the HexagonModuleNode
-         used in offload mode. See docstring for HexagonModuleCreate for
+  \brief Module implementation for compiled Hexagon binaries. It is suitable
+         for managing cross-compiled Hexagon code on a host machine.
+         See docstring for HexagonModuleCreate for
          construction parameter details.
  */
-class HexagonHostModuleNode : public runtime::ModuleNode {
+class HexagonModuleNode : public runtime::ModuleNode {
  public:
-  HexagonHostModuleNode(std::string data, std::string fmt,
-                        std::unordered_map<std::string, FunctionInfo> fmap, std::string asm_str,
-                        std::string obj_str, std::string ir_str, std::string bc_str,
-                        const std::set<std::string>& packed_c_abi);
+  HexagonModuleNode(std::string data, std::string fmt,
+                    std::unordered_map<std::string, FunctionInfo> fmap, std::string asm_str,
+                    std::string obj_str, std::string ir_str, std::string bc_str);
   PackedFunc GetFunction(const std::string& name, const ObjectPtr<Object>& sptr_to_self) override;
   std::string GetSource(const std::string& format) override;
   const char* type_key() const final { return "hexagon"; }
@@ -77,7 +73,6 @@ class HexagonHostModuleNode : public runtime::ModuleNode {
   std::string obj_;
   std::string ir_;
   std::string bc_;
-  std::set<std::string> packed_c_abi_funcs_;
 };
 
 }  // namespace runtime
diff --git a/src/runtime/hexagon/hexagon/hexagon_user_dma.cc b/src/runtime/hexagon/hexagon_user_dma.cc
similarity index 100%
rename from src/runtime/hexagon/hexagon/hexagon_user_dma.cc
rename to src/runtime/hexagon/hexagon_user_dma.cc
diff --git a/src/runtime/hexagon/hexagon/hexagon_user_dma_descriptors.h b/src/runtime/hexagon/hexagon_user_dma_descriptors.h
similarity index 98%
rename from src/runtime/hexagon/hexagon/hexagon_user_dma_descriptors.h
rename to src/runtime/hexagon/hexagon_user_dma_descriptors.h
index cea91310dd..643dbc5e8b 100644
--- a/src/runtime/hexagon/hexagon/hexagon_user_dma_descriptors.h
+++ b/src/runtime/hexagon/hexagon_user_dma_descriptors.h
@@ -17,8 +17,8 @@
  * under the License.
  */
 
-#ifndef TVM_RUNTIME_HEXAGON_HEXAGON_HEXAGON_USER_DMA_DESCRIPTORS_H_
-#define TVM_RUNTIME_HEXAGON_HEXAGON_HEXAGON_USER_DMA_DESCRIPTORS_H_
+#ifndef TVM_RUNTIME_HEXAGON_HEXAGON_USER_DMA_DESCRIPTORS_H_
+#define TVM_RUNTIME_HEXAGON_HEXAGON_USER_DMA_DESCRIPTORS_H_
 
 namespace tvm {
 namespace runtime {
@@ -318,4 +318,4 @@ inline void dma_desc_set_dstwidthoffset(void* dma_desc_ptr, unsigned int v) {
 }  // namespace runtime
 }  // namespace tvm
 
-#endif  // TVM_RUNTIME_HEXAGON_HEXAGON_HEXAGON_USER_DMA_DESCRIPTORS_H_
+#endif  // TVM_RUNTIME_HEXAGON_HEXAGON_USER_DMA_DESCRIPTORS_H_
diff --git a/src/runtime/hexagon/hexagon/hexagon_user_dma_instructions.h b/src/runtime/hexagon/hexagon_user_dma_instructions.h
similarity index 90%
rename from src/runtime/hexagon/hexagon/hexagon_user_dma_instructions.h
rename to src/runtime/hexagon/hexagon_user_dma_instructions.h
index 86b4c6a218..c7255bc003 100644
--- a/src/runtime/hexagon/hexagon/hexagon_user_dma_instructions.h
+++ b/src/runtime/hexagon/hexagon_user_dma_instructions.h
@@ -17,8 +17,8 @@
  * under the License.
  */
 
-#ifndef TVM_RUNTIME_HEXAGON_HEXAGON_HEXAGON_USER_DMA_INSTRUCTIONS_H_
-#define TVM_RUNTIME_HEXAGON_HEXAGON_HEXAGON_USER_DMA_INSTRUCTIONS_H_
+#ifndef TVM_RUNTIME_HEXAGON_HEXAGON_USER_DMA_INSTRUCTIONS_H_
+#define TVM_RUNTIME_HEXAGON_HEXAGON_USER_DMA_INSTRUCTIONS_H_
 
 namespace tvm {
 namespace runtime {
@@ -76,4 +76,4 @@ inline void dmcfgwr(unsigned int dmindex, unsigned int data) {
 }  // namespace runtime
 }  // namespace tvm
 
-#endif  // TVM_RUNTIME_HEXAGON_HEXAGON_HEXAGON_USER_DMA_INSTRUCTIONS_H_
+#endif  // TVM_RUNTIME_HEXAGON_HEXAGON_USER_DMA_INSTRUCTIONS_H_
diff --git a/src/runtime/hexagon/hexagon/hexagon_user_dma_registers.h b/src/runtime/hexagon/hexagon_user_dma_registers.h
similarity index 97%
rename from src/runtime/hexagon/hexagon/hexagon_user_dma_registers.h
rename to src/runtime/hexagon/hexagon_user_dma_registers.h
index 2463e3ba7a..7bb390c2fb 100644
--- a/src/runtime/hexagon/hexagon/hexagon_user_dma_registers.h
+++ b/src/runtime/hexagon/hexagon_user_dma_registers.h
@@ -17,8 +17,8 @@
  * under the License.
  */
 
-#ifndef TVM_RUNTIME_HEXAGON_HEXAGON_HEXAGON_USER_DMA_REGISTERS_H_
-#define TVM_RUNTIME_HEXAGON_HEXAGON_HEXAGON_USER_DMA_REGISTERS_H_
+#ifndef TVM_RUNTIME_HEXAGON_HEXAGON_USER_DMA_REGISTERS_H_
+#define TVM_RUNTIME_HEXAGON_HEXAGON_USER_DMA_REGISTERS_H_
 
 namespace tvm {
 namespace runtime {
@@ -275,4 +275,4 @@ static inline unsigned int dm5_get_syndrone_addr(unsigned int cfg) {
 }  // namespace runtime
 }  // namespace tvm
 
-#endif  // TVM_RUNTIME_HEXAGON_HEXAGON_HEXAGON_USER_DMA_REGISTERS_H_
+#endif  // TVM_RUNTIME_HEXAGON_HEXAGON_USER_DMA_REGISTERS_H_
diff --git a/src/runtime/hexagon/host/hexagon_module.cc b/src/runtime/hexagon/host/hexagon_module.cc
deleted file mode 100644
index 8ac4fbd5b9..0000000000
--- a/src/runtime/hexagon/host/hexagon_module.cc
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file hexagon_module.cc
- * \brief The HexagonLibraryModuleNode
- */
-#include "../hexagon_module.h"
-
-#include <dmlc/memory_io.h>
-#include <tvm/runtime/module.h>
-#include <tvm/runtime/registry.h>
-
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "../../library_module.h"
-
-namespace tvm {
-namespace runtime {
-
-Module HexagonModuleCreate(std::string data, std::string fmt,
-                           std::unordered_map<std::string, FunctionInfo> fmap, std::string asm_str,
-                           std::string obj_str, std::string ir_str, std::string bc_str,
-                           const std::set<std::string>& packed_c_abi) {
-  auto n = make_object<HexagonHostModuleNode>(data, fmt, fmap, asm_str, obj_str, ir_str, bc_str,
-                                              packed_c_abi);
-  return Module(n);
-}
-
-}  // namespace runtime
-}  // namespace tvm
diff --git a/src/runtime/hexagon/rpc/simulator/rpc_server.cc b/src/runtime/hexagon/rpc/simulator/rpc_server.cc
index dee324ec1c..89cf533cfc 100644
--- a/src/runtime/hexagon/rpc/simulator/rpc_server.cc
+++ b/src/runtime/hexagon/rpc/simulator/rpc_server.cc
@@ -27,7 +27,7 @@
 
 #include "../../../library_module.h"
 #include "../../../minrpc/minrpc_server.h"
-#include "../../hexagon/hexagon_common.h"
+#include "../../hexagon_common.h"
 #include "hexagon_sim_proto.h"
 #include "tvm/runtime/packed_func.h"
 #include "tvm/runtime/registry.h"
diff --git a/src/target/llvm/codegen_hexagon.cc b/src/target/llvm/codegen_hexagon.cc
index 035f772f8d..3e4671a48e 100644
--- a/src/target/llvm/codegen_hexagon.cc
+++ b/src/target/llvm/codegen_hexagon.cc
@@ -46,13 +46,6 @@
 namespace tvm {
 namespace codegen {
 
-static std::string get_name(const PrimFunc& f) {
-  auto global_symbol = f->GetAttr<runtime::String>(tvm::attr::kGlobalSymbol);
-  ICHECK(global_symbol.defined())
-      << "CodeGenLLVM: Expect PrimFunc to have the global_symbol attribute";
-  return std::string(global_symbol.value());
-}
-
 // Hexagon code generation
 class CodeGenHexagon final : public CodeGenCPU {
  public:
@@ -268,16 +261,6 @@ CodeGenLLVM::TypedPointer CodeGenHexagon::CreateStructRefPtr(DataType t, llvm::V
 }
 
 namespace {
-// Check if the function matches the TVMBackendPackedCFunc prototype.
-bool UsesExportABI(const PrimFunc& f) {
-  if (f->attrs.defined()) {
-    auto it = f->attrs->dict.find("calling_conv");
-    return it != f->attrs->dict.end() &&
-           Downcast<Integer>((*it).second) == CallingConv::kCPackedFunc;
-  }
-  return false;
-}
-
 DMLC_ATTRIBUTE_UNUSED std::ostream& operator<<(std::ostream& os, const llvm::Module& m) {
   std::string ms;
   llvm::raw_string_ostream sos(ms);
@@ -297,7 +280,6 @@ void ProcessLLVMOptions(const std::vector<std::string>& llvm_vec) {
 
   llvm::cl::ParseCommandLineOptions(llvm_vec.size(), args);
 }
-
 }  // namespace
 
 runtime::Module BuildHexagon(IRModule mod, Target target) {
@@ -463,14 +445,7 @@ runtime::Module BuildHexagon(IRModule mod, Target target) {
   int rc = (*f)(so_name, o_names, extra_args);
   ICHECK(rc == 0) << "Failed to link " << so_name;
 
-  // Move it to ExtractFuncInfo?
-  std::set<std::string> export_abi;
-  for (auto kv : mod->functions) {
-    auto f = Downcast<PrimFunc>(kv.second);
-    if (UsesExportABI(f)) export_abi.insert(get_name(f));
-  }
-  return HexagonModuleCreate(so_name, "so", ExtractFuncInfo(mod), asm_str, obj_str, ir_str, bc_str,
-                             export_abi);
+  return HexagonModuleCreate(so_name, "so", ExtractFuncInfo(mod), asm_str, obj_str, ir_str, bc_str);
 }
 
 TVM_REGISTER_GLOBAL("target.build.hexagon").set_body_typed(BuildHexagon);
diff --git a/src/target/opt/build_hexagon_off.cc b/src/target/opt/build_hexagon_off.cc
index c734eeceed..2ce5cdb51f 100644
--- a/src/target/opt/build_hexagon_off.cc
+++ b/src/target/opt/build_hexagon_off.cc
@@ -24,8 +24,7 @@ namespace runtime {
 
 Module HexagonModuleCreate(std::string data, std::string fmt,
                            std::unordered_map<std::string, FunctionInfo> fmap, std::string asm_str,
-                           std::string obj_str, std::string ir_str, std::string bc_str,
-                           const std::set<std::string>& packed_c_abi) {
+                           std::string obj_str, std::string ir_str, std::string bc_str) {
   LOG(WARNING) << "Hexagon runtime is not enabled, return a source module...";
   return codegen::DeviceSourceModuleCreate(data, fmt, fmap, "hex");
 }
diff --git a/tests/cpp/runtime/hexagon_buffer.cc b/tests/cpp/runtime/hexagon_buffer.cc
index 0b37b08672..715d9b1b69 100644
--- a/tests/cpp/runtime/hexagon_buffer.cc
+++ b/tests/cpp/runtime/hexagon_buffer.cc
@@ -18,7 +18,7 @@
  */
 
 #include <gtest/gtest.h>
-#include <hexagon/hexagon/hexagon_buffer.h>
+#include <hexagon/hexagon_buffer.h>
 #include <tvm/runtime/container/optional.h>
 
 using namespace tvm::runtime;