You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by GitBox <gi...@apache.org> on 2020/07/14 02:04:57 UTC
[GitHub] [incubator-tvm] zhiics commented on a change in pull request #5915: [BYOC][Contrib] Arm Compute Library integration

zhiics commented on a change in pull request #5915:
URL: https://github.com/apache/incubator-tvm/pull/5915#discussion_r454043140



##########
File path: src/relay/backend/contrib/arm_compute_lib/codegen_acl.h
##########
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/backend/contrib/arm_compute_lib/codegen_acl.h
+ * \brief The Relay -> ACL JSON schema compiler.
+ */
+
+#ifndef TVM_RELAY_BACKEND_CONTRIB_ARM_COMPUTE_LIB_CODEGEN_ACL_H_
+#define TVM_RELAY_BACKEND_CONTRIB_ARM_COMPUTE_LIB_CODEGEN_ACL_H_
+
+#include <tvm/relay/expr_functor.h>
+
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "../../../../runtime/contrib/json/json_node.h"
+#include "../codegen_json/codegen_json.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace arm_compute_lib {
+
+/*!
+ * \brief Generates an ACLModule from a relay expression. This "compilation"
+ * does not require ACL since the actual conversion using ACL APIs is
+ * deferred until creation of the runtime. This step simply serializes the
+ * relay program into a JSON string.
+ */
+class ACLJSONSerializer : public backend::contrib::JSONSerializer {
+  using JSONGraphNode = tvm::runtime::json::JSONGraphNode;
+  using JSONGraphNodeEntry = tvm::runtime::json::JSONGraphNodeEntry;
+
+ public:
+  ACLJSONSerializer(const std::string& symbol, const Expr& expr) : JSONSerializer(symbol, expr) {}
+
+  std::vector<JSONGraphNodeEntry> VisitExpr_(const CallNode* cn) override;
+  std::vector<JSONGraphNodeEntry> VisitExpr_(const ConstantNode* cn) override;
+
+  /*!
+   * \brief Get the constant data transposed when pre-processing the
+   * input function.
+   *
+   * \return An array of constants
+   */
+  Array<runtime::NDArray> GetParamsData();
+
+ private:
+  /*!
+   * \brief Create a JSON representation of an operator.
+   *
+   * \param call The call to be represented.
+   * \return A JSON representation of a specific operator.
+   */
+  std::shared_ptr<JSONGraphNode> CreateOp(const CallNode* cn);
+  std::shared_ptr<JSONGraphNode> CreateCompositeConvolution(const CallNode* cn);
+
+  /* \brief Transposed constant tensors to serialize. Arm Compute Library expects constant tensors
+   * in OHWI format. */
+  Array<runtime::NDArray> constants_;
+};
+
+/*!
+ * \brief Pre-process a module containing functions ready for ACL codegen.
+ *
+ * For now we enforce OHWI kernel layout and fold the transforms away.
+ *
+ * \param mod The module to be pre-processed.
+ * \return The processed module.
+ */
+IRModule PreProcessModule(const IRModule& mod);
+
+/*!
+ * \brief Create a runtime module for ACL.
+ *
+ * This consists of a series of "serialized functions" which each represent a
+ * sub-graph to be computed by ACL and will each be executed independently from
+ * one another. Each function consists of serialized JSON describing the sub-graph
+ * and serialized constant tensors.
+ *
+ * \note The ACL runtime module only currently supports a single operator per
+ * sub-graph currently.
+ *
+ * \param ref The ext_func Relay expression/module to be executed using extern ops.
+ * \return A runtime module.
+ */
+runtime::Module ACLCompiler(const ObjectRef& ref);
+
+/*!
+ * \brief Get the external symbol of the Relay function name.
+ *
+ * \param func The provided function.
+ *
+ * \return An external symbol.
+ */
+std::string GetExtSymbol(const Function& func) {
+  const auto name_node = func->GetAttr<String>(tvm::attr::kGlobalSymbol);
+  CHECK(name_node.defined()) << "Fail to retrieve external symbol.";
+  return std::string(name_node.value());
+}
+
+TVM_REGISTER_GLOBAL("relay.ext.arm_compute_lib").set_body_typed(ACLCompiler);

Review comment:
       put this in the cc file

##########
File path: src/relay/backend/contrib/arm_compute_lib/codegen_acl.h
##########
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/backend/contrib/arm_compute_lib/codegen_acl.h
+ * \brief The Relay -> ACL JSON schema compiler.
+ */
+
+#ifndef TVM_RELAY_BACKEND_CONTRIB_ARM_COMPUTE_LIB_CODEGEN_ACL_H_
+#define TVM_RELAY_BACKEND_CONTRIB_ARM_COMPUTE_LIB_CODEGEN_ACL_H_
+
+#include <tvm/relay/expr_functor.h>
+
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "../../../../runtime/contrib/json/json_node.h"
+#include "../codegen_json/codegen_json.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace arm_compute_lib {
+
+/*!
+ * \brief Generates an ACLModule from a relay expression. This "compilation"
+ * does not require ACL since the actual conversion using ACL APIs is
+ * deferred until creation of the runtime. This step simply serializes the
+ * relay program into a JSON string.
+ */
+class ACLJSONSerializer : public backend::contrib::JSONSerializer {
+  using JSONGraphNode = tvm::runtime::json::JSONGraphNode;
+  using JSONGraphNodeEntry = tvm::runtime::json::JSONGraphNodeEntry;
+
+ public:
+  ACLJSONSerializer(const std::string& symbol, const Expr& expr) : JSONSerializer(symbol, expr) {}
+
+  std::vector<JSONGraphNodeEntry> VisitExpr_(const CallNode* cn) override;
+  std::vector<JSONGraphNodeEntry> VisitExpr_(const ConstantNode* cn) override;
+
+  /*!
+   * \brief Get the constant data transposed when pre-processing the
+   * input function.
+   *
+   * \return An array of constants
+   */
+  Array<runtime::NDArray> GetParamsData();
+
+ private:
+  /*!
+   * \brief Create a JSON representation of an operator.
+   *
+   * \param call The call to be represented.
+   * \return A JSON representation of a specific operator.
+   */
+  std::shared_ptr<JSONGraphNode> CreateOp(const CallNode* cn);
+  std::shared_ptr<JSONGraphNode> CreateCompositeConvolution(const CallNode* cn);
+
+  /* \brief Transposed constant tensors to serialize. Arm Compute Library expects constant tensors
+   * in OHWI format. */
+  Array<runtime::NDArray> constants_;
+};
+
+/*!
+ * \brief Pre-process a module containing functions ready for ACL codegen.
+ *
+ * For now we enforce OHWI kernel layout and fold the transforms away.
+ *
+ * \param mod The module to be pre-processed.
+ * \return The processed module.
+ */
+IRModule PreProcessModule(const IRModule& mod);
+
+/*!
+ * \brief Create a runtime module for ACL.
+ *
+ * This consists of a series of "serialized functions" which each represent a
+ * sub-graph to be computed by ACL and will each be executed independently from
+ * one another. Each function consists of serialized JSON describing the sub-graph
+ * and serialized constant tensors.
+ *
+ * \note The ACL runtime module only currently supports a single operator per
+ * sub-graph currently.
+ *
+ * \param ref The ext_func Relay expression/module to be executed using extern ops.
+ * \return A runtime module.
+ */
+runtime::Module ACLCompiler(const ObjectRef& ref);
+
+/*!
+ * \brief Get the external symbol of the Relay function name.
+ *
+ * \param func The provided function.
+ *
+ * \return An external symbol.
+ */
+std::string GetExtSymbol(const Function& func) {

Review comment:
       yeah, we should.

##########
File path: src/runtime/contrib/arm_compute_lib/acl_runtime.cc
##########
@@ -0,0 +1,399 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/runtime/contrib/arm_compute_lib/acl_runtime.cc
+ * \brief A simple JSON runtime for Arm Compute Library.
+ */
+
+#include <tvm/runtime/ndarray.h>
+#include <tvm/runtime/registry.h>
+
+#include "../../file_util.h"
+#include "../json/json_node.h"
+#include "../json/json_runtime.h"
+
+#ifdef TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB
+#include <arm_compute/core/Types.h>
+#include <arm_compute/runtime/NEON/functions/NEConvolutionLayer.h>
+#include <arm_compute/runtime/NEON/functions/NEPoolingLayer.h>
+#include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h>
+
+#include "acl_allocator.h"
+#include "acl_utils.h"
+#endif
+
+namespace tvm {
+namespace runtime {
+namespace contrib {
+
+using namespace tvm::runtime::json;
+
+#ifdef TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB
+using namespace arm_compute_lib;
+
+/*!
+ * \brief ACL objects we cache in order to avoid needing to construct
+ * a new layer each time.
+ */
+struct CachedLayer {
+  std::shared_ptr<arm_compute::IFunction> function;
+  std::vector<arm_compute::Tensor> inputs;
+  std::vector<arm_compute::Tensor> const_inputs;
+  std::vector<arm_compute::Tensor> outputs;
+};
+#endif
+
+class ACLRuntime : public JSONRuntimeBase {
+ public:
+  /*!
+   * \brief The ACL runtime module. Deserialize the provided functions
+   * on creation and store in the layer cache.
+   *
+   * \param symbol_name The name of the function.
+   * \param graph_json serialized JSON representation of a sub-graph.
+   * \param const_names The names of each constant in the sub-graph.
+   * \params consts An array of constants pre-transposed to the correct layout expected by ACL.
+   */
+  explicit ACLRuntime(const std::string& symbol_name, const std::string& graph_json,
+                      const Array<String>& const_names, const Array<NDArray>& consts)
+      : JSONRuntimeBase(symbol_name, graph_json, const_names) {
+    this->constants_ = consts;
+  }
+
+  /*!
+   * \brief Get a packed function.
+   *
+   * \param name The name/symbol of the function.
+   * \param sptr_to_self The pointer to the module node.
+   * \return The packed function.
+   */
+  PackedFunc GetFunction(const std::string& name, const ObjectPtr<Object>& sptr_to_self) override {
+    if (name == "get_symbol") {
+      return PackedFunc(
+          [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->symbol_name_; });
+    } else if (name == "get_const_vars") {
+      return PackedFunc(
+          [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->const_names_; });
+    } else if (this->symbol_name_ == name) {
+      return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
+        CHECK(this->initialized_) << "The module has not been initialized";
+
+        // Bind argument tensors to data entries.
+        this->SetInputOutputBuffers(args);
+        // Execute the subgraph.
+        this->Run();
+      });
+    } else if ("__init_" + this->symbol_name_ == name) {
+      // The function to initialize constant tensors.
+      return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
+        this->Init();
+        this->initialized_ = true;
+        *rv = 0;
+      });
+    } else {
+      return PackedFunc(nullptr);
+    }
+  }
+
+  /*!
+   * \brief Save a compiled network to a binary stream, which can then be
+   * serialized to disk.
+   *
+   * \param stream The stream to save the binary.
+   */
+  void SaveToBinary(dmlc::Stream* stream) override {
+    // Save the symbol
+    stream->Write(symbol_name_);
+    // Save the graph
+    stream->Write(graph_json_);
+    // Save the required const names
+    std::vector<std::string> const_names;
+    for (const auto& it : const_names_) {
+      const_names.push_back(it);
+    }
+    stream->Write(const_names);
+    // Save the required constant data
+    stream->Write(constants_.size());
+    for (const auto& it : constants_) {
+      it.Save(stream);
+    }
+  }
+
+  /*!
+   * \brief Load a compiled network from stream.
+   *
+   * \param strm The binary stream to load.
+   * \return The created ACL module.
+   */
+  static Module LoadFromBinary(void* strm) {
+    dmlc::Stream* stream = static_cast<dmlc::Stream*>(strm);
+    std::string symbol;
+    std::string graph_json;
+    std::vector<std::string> consts;
+    // Load the symbol
+    CHECK(stream->Read(&symbol)) << "Loading symbol name failed";
+    CHECK(stream->Read(&graph_json)) << "Loading graph json failed";
+    CHECK(stream->Read(&consts)) << "Loading the const name list failed";
+    Array<String> const_names;
+    for (const auto& it : consts) {
+      const_names.push_back(it);
+    }
+    size_t const_data_count;
+    CHECK(stream->Read(&const_data_count));
+    Array<NDArray> const_data;
+    for (size_t i = 0; i < const_data_count; ++i) {
+      runtime::NDArray temp;
+      CHECK(temp.Load(stream)) << "Failed to load constant";
+      const_data.push_back(temp);
+    }
+    auto n = make_object<ACLRuntime>(symbol, graph_json, const_names, const_data);
+    return Module(n);
+  }
+
+  /*!
+   * \brief The type key of the module.
+   *
+   * \return module type key.
+   */
+  const char* type_key() const override { return "arm_compute_lib"; }
+
+  /*!
+   * \brief Initialize runtime. Create ACL layer from JSON
+   * representation.
+   */
+  void Init() {
+    CHECK_EQ(this->constants_.size(), const_idx_.size())
+        << "The number of input constants must match the number expected.";
+    this->SetupConstants(this->constants_);
+#ifdef TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB
+    BuildEngine();
+#endif
+  }
+
+  // Do not accept constants from MetadataModule as they should be transposed
+  // by the ACL codegen so they have the correct expected layout.
+  void Init(const Array<NDArray>& constants) override { LOG(FATAL) << "Not implemented."; }
+
+  /*!
+   * \brief Unpack inputs and outputs and run inference on a given layer.
+   *
+   * \param args Access inputs and outputs.
+   * \param function The layer to execute inference on.
+   * \return Status of inference.
+   */
+  void Run() override {
+#ifdef TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB

Review comment:
       As commented above, I think we don't need this guard. We can simply not compile this file and all other ACL related files when `USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME` is not enabled. Please correct me if I miss something.

##########
File path: src/runtime/contrib/arm_compute_lib/acl_runtime.cc
##########
@@ -0,0 +1,399 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/runtime/contrib/arm_compute_lib/acl_runtime.cc
+ * \brief A simple JSON runtime for Arm Compute Library.
+ */
+
+#include <tvm/runtime/ndarray.h>
+#include <tvm/runtime/registry.h>
+
+#include "../../file_util.h"
+#include "../json/json_node.h"
+#include "../json/json_runtime.h"
+
+#ifdef TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB
+#include <arm_compute/core/Types.h>
+#include <arm_compute/runtime/NEON/functions/NEConvolutionLayer.h>
+#include <arm_compute/runtime/NEON/functions/NEPoolingLayer.h>
+#include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h>
+
+#include "acl_allocator.h"
+#include "acl_utils.h"
+#endif
+
+namespace tvm {
+namespace runtime {
+namespace contrib {
+
+using namespace tvm::runtime::json;
+
+#ifdef TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB
+using namespace arm_compute_lib;
+
+/*!
+ * \brief ACL objects we cache in order to avoid needing to construct
+ * a new layer each time.
+ */
+struct CachedLayer {
+  std::shared_ptr<arm_compute::IFunction> function;
+  std::vector<arm_compute::Tensor> inputs;
+  std::vector<arm_compute::Tensor> const_inputs;
+  std::vector<arm_compute::Tensor> outputs;
+};
+#endif
+
+class ACLRuntime : public JSONRuntimeBase {
+ public:
+  /*!
+   * \brief The ACL runtime module. Deserialize the provided functions
+   * on creation and store in the layer cache.
+   *
+   * \param symbol_name The name of the function.
+   * \param graph_json serialized JSON representation of a sub-graph.
+   * \param const_names The names of each constant in the sub-graph.
+   * \params consts An array of constants pre-transposed to the correct layout expected by ACL.
+   */
+  explicit ACLRuntime(const std::string& symbol_name, const std::string& graph_json,
+                      const Array<String>& const_names, const Array<NDArray>& consts)
+      : JSONRuntimeBase(symbol_name, graph_json, const_names) {
+    this->constants_ = consts;
+  }
+
+  /*!
+   * \brief Get a packed function.
+   *
+   * \param name The name/symbol of the function.
+   * \param sptr_to_self The pointer to the module node.
+   * \return The packed function.
+   */
+  PackedFunc GetFunction(const std::string& name, const ObjectPtr<Object>& sptr_to_self) override {
+    if (name == "get_symbol") {
+      return PackedFunc(
+          [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->symbol_name_; });
+    } else if (name == "get_const_vars") {
+      return PackedFunc(
+          [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->const_names_; });
+    } else if (this->symbol_name_ == name) {
+      return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
+        CHECK(this->initialized_) << "The module has not been initialized";
+
+        // Bind argument tensors to data entries.
+        this->SetInputOutputBuffers(args);
+        // Execute the subgraph.
+        this->Run();
+      });
+    } else if ("__init_" + this->symbol_name_ == name) {
+      // The function to initialize constant tensors.
+      return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
+        this->Init();
+        this->initialized_ = true;
+        *rv = 0;
+      });
+    } else {
+      return PackedFunc(nullptr);
+    }
+  }
+
+  /*!
+   * \brief Save a compiled network to a binary stream, which can then be
+   * serialized to disk.
+   *
+   * \param stream The stream to save the binary.
+   */
+  void SaveToBinary(dmlc::Stream* stream) override {
+    // Save the symbol
+    stream->Write(symbol_name_);
+    // Save the graph
+    stream->Write(graph_json_);
+    // Save the required const names
+    std::vector<std::string> const_names;
+    for (const auto& it : const_names_) {
+      const_names.push_back(it);
+    }
+    stream->Write(const_names);
+    // Save the required constant data
+    stream->Write(constants_.size());
+    for (const auto& it : constants_) {
+      it.Save(stream);
+    }
+  }
+
+  /*!
+   * \brief Load a compiled network from stream.
+   *
+   * \param strm The binary stream to load.
+   * \return The created ACL module.
+   */
+  static Module LoadFromBinary(void* strm) {
+    dmlc::Stream* stream = static_cast<dmlc::Stream*>(strm);
+    std::string symbol;
+    std::string graph_json;
+    std::vector<std::string> consts;
+    // Load the symbol
+    CHECK(stream->Read(&symbol)) << "Loading symbol name failed";
+    CHECK(stream->Read(&graph_json)) << "Loading graph json failed";
+    CHECK(stream->Read(&consts)) << "Loading the const name list failed";
+    Array<String> const_names;
+    for (const auto& it : consts) {
+      const_names.push_back(it);
+    }
+    size_t const_data_count;
+    CHECK(stream->Read(&const_data_count));
+    Array<NDArray> const_data;
+    for (size_t i = 0; i < const_data_count; ++i) {
+      runtime::NDArray temp;
+      CHECK(temp.Load(stream)) << "Failed to load constant";
+      const_data.push_back(temp);
+    }
+    auto n = make_object<ACLRuntime>(symbol, graph_json, const_names, const_data);
+    return Module(n);
+  }
+
+  /*!
+   * \brief The type key of the module.
+   *
+   * \return module type key.
+   */
+  const char* type_key() const override { return "arm_compute_lib"; }
+
+  /*!
+   * \brief Initialize runtime. Create ACL layer from JSON
+   * representation.
+   */
+  void Init() {
+    CHECK_EQ(this->constants_.size(), const_idx_.size())
+        << "The number of input constants must match the number expected.";
+    this->SetupConstants(this->constants_);
+#ifdef TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB
+    BuildEngine();
+#endif
+  }
+
+  // Do not accept constants from MetadataModule as they should be transposed
+  // by the ACL codegen so they have the correct expected layout.
+  void Init(const Array<NDArray>& constants) override { LOG(FATAL) << "Not implemented."; }

Review comment:
       This is because the Function passed to the codegen is updated. Let's think a better way to keep the updated Function so that constants can be correctly collected and all the logic here can follow what dnnl json runtime does exactly. Otherwise, multiple copies of constants are saved which leads to very high memory consumption.

##########
File path: src/runtime/contrib/arm_compute_lib/acl_runtime.cc
##########
@@ -0,0 +1,399 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/runtime/contrib/arm_compute_lib/acl_runtime.cc
+ * \brief A simple JSON runtime for Arm Compute Library.
+ */
+
+#include <tvm/runtime/ndarray.h>
+#include <tvm/runtime/registry.h>
+
+#include "../../file_util.h"
+#include "../json/json_node.h"
+#include "../json/json_runtime.h"
+
+#ifdef TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB
+#include <arm_compute/core/Types.h>
+#include <arm_compute/runtime/NEON/functions/NEConvolutionLayer.h>
+#include <arm_compute/runtime/NEON/functions/NEPoolingLayer.h>
+#include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h>
+
+#include "acl_allocator.h"
+#include "acl_utils.h"
+#endif
+
+namespace tvm {
+namespace runtime {
+namespace contrib {
+
+using namespace tvm::runtime::json;
+
+#ifdef TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB
+using namespace arm_compute_lib;
+
+/*!
+ * \brief ACL objects we cache in order to avoid needing to construct
+ * a new layer each time.
+ */
+struct CachedLayer {
+  std::shared_ptr<arm_compute::IFunction> function;
+  std::vector<arm_compute::Tensor> inputs;
+  std::vector<arm_compute::Tensor> const_inputs;
+  std::vector<arm_compute::Tensor> outputs;
+};
+#endif
+
+class ACLRuntime : public JSONRuntimeBase {
+ public:
+  /*!
+   * \brief The ACL runtime module. Deserialize the provided functions
+   * on creation and store in the layer cache.
+   *
+   * \param symbol_name The name of the function.
+   * \param graph_json serialized JSON representation of a sub-graph.
+   * \param const_names The names of each constant in the sub-graph.
+   * \params consts An array of constants pre-transposed to the correct layout expected by ACL.
+   */
+  explicit ACLRuntime(const std::string& symbol_name, const std::string& graph_json,
+                      const Array<String>& const_names, const Array<NDArray>& consts)
+      : JSONRuntimeBase(symbol_name, graph_json, const_names) {
+    this->constants_ = consts;
+  }
+
+  /*!
+   * \brief Get a packed function.
+   *
+   * \param name The name/symbol of the function.
+   * \param sptr_to_self The pointer to the module node.
+   * \return The packed function.
+   */
+  PackedFunc GetFunction(const std::string& name, const ObjectPtr<Object>& sptr_to_self) override {

Review comment:
       Do we need this? They are in the base class already?

##########
File path: cmake/modules/contrib/ArmComputeLib.cmake
##########
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# We separate the codegen and runtime build since ACL can only be built
+# for AArch. In the world where we take the cross compilation approach,
+# which is common with arm devices, we need to be able to cross-compile
+# a relay graph on x86 for AArch and then run the graph on AArch.
+if(USE_ARM_COMPUTE_LIB)
+    file(GLOB ACL_RELAY_CONTRIB_SRC src/relay/backend/contrib/arm_compute_lib/*.cc)
+    file(GLOB ACL_RUNTIME_MODULE src/runtime/contrib/arm_compute_lib/acl_runtime.cc)
+    list(APPEND COMPILER_SRCS ${ACL_RELAY_CONTRIB_SRC})
+    list(APPEND COMPILER_SRCS ${ACL_RUNTIME_MODULE})
+    message(STATUS "Build with Arm Compute Library support...")
+endif()
+
+if(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME)
+    set(ACL_PATH ${CMAKE_CURRENT_SOURCE_DIR}/acl)
+    # Detect custom ACL path.
+    if (NOT USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME STREQUAL "ON")
+        set(ACL_PATH ${USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME})
+    endif()
+
+    file(GLOB ACL_CONTRIB_SRC src/runtime/contrib/arm_compute_lib/*)
+
+    set(ACL_INCLUDE_DIRS ${ACL_PATH}/include ${ACL_PATH})
+    include_directories(${ACL_INCLUDE_DIRS})
+
+    find_library(EXTERN_ACL_COMPUTE_LIB
+            NAMES arm_compute libarm_compute
+            HINTS "${ACL_PATH}" "${ACL_PATH}/lib" "${ACL_PATH}/build"
+            )
+    find_library(EXTERN_ACL_COMPUTE_CORE_LIB
+            NAMES arm_compute_core libarm_compute_core
+            HINTS "${ACL_PATH}" "${ACL_PATH}/lib" "${ACL_PATH}/build"
+            )
+    find_library(EXTERN_ACL_COMPUTE_GRAPH_LIB
+            NAMES arm_compute_graph libarm_compute_graph
+            HINTS "${ACL_PATH}" "${ACL_PATH}/lib" "${ACL_PATH}/build"
+            )
+
+    list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_ACL_COMPUTE_LIB})
+    list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_ACL_COMPUTE_CORE_LIB})
+    list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_ACL_COMPUTE_GRAPH_LIB})
+    list(APPEND RUNTIME_SRCS ${ACL_CONTRIB_SRC})
+    message(STATUS "Build with Arm Compute Library graph runtime support: "
+            ${EXTERN_ACL_COMPUTE_LIB} ", \n"
+            ${EXTERN_ACL_COMPUTE_CORE_LIB} ", \n"
+            ${EXTERN_ACL_COMPUTE_GRAPH_LIB})
+
+    # Set flag to detect ACL graph runtime support.
+    add_definitions(-DTVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB)

Review comment:
       Do we need this? Shouldn't we enable it directly when `USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME` is already enabled.

##########
File path: src/relay/backend/contrib/arm_compute_lib/codegen_acl.h
##########
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/backend/contrib/arm_compute_lib/codegen_acl.h
+ * \brief The Relay -> ACL JSON schema compiler.
+ */
+
+#ifndef TVM_RELAY_BACKEND_CONTRIB_ARM_COMPUTE_LIB_CODEGEN_ACL_H_
+#define TVM_RELAY_BACKEND_CONTRIB_ARM_COMPUTE_LIB_CODEGEN_ACL_H_
+
+#include <tvm/relay/expr_functor.h>
+
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "../../../../runtime/contrib/json/json_node.h"
+#include "../codegen_json/codegen_json.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace arm_compute_lib {
+
+/*!
+ * \brief Generates an ACLModule from a relay expression. This "compilation"
+ * does not require ACL since the actual conversion using ACL APIs is
+ * deferred until creation of the runtime. This step simply serializes the
+ * relay program into a JSON string.
+ */
+class ACLJSONSerializer : public backend::contrib::JSONSerializer {
+  using JSONGraphNode = tvm::runtime::json::JSONGraphNode;
+  using JSONGraphNodeEntry = tvm::runtime::json::JSONGraphNodeEntry;
+
+ public:
+  ACLJSONSerializer(const std::string& symbol, const Expr& expr) : JSONSerializer(symbol, expr) {}
+
+  std::vector<JSONGraphNodeEntry> VisitExpr_(const CallNode* cn) override;
+  std::vector<JSONGraphNodeEntry> VisitExpr_(const ConstantNode* cn) override;
+
+  /*!
+   * \brief Get the constant data transposed when pre-processing the
+   * input function.
+   *
+   * \return An array of constants
+   */
+  Array<runtime::NDArray> GetParamsData();
+
+ private:
+  /*!
+   * \brief Create a JSON representation of an operator.
+   *
+   * \param call The call to be represented.
+   * \return A JSON representation of a specific operator.
+   */
+  std::shared_ptr<JSONGraphNode> CreateOp(const CallNode* cn);
+  std::shared_ptr<JSONGraphNode> CreateCompositeConvolution(const CallNode* cn);
+
+  /* \brief Transposed constant tensors to serialize. Arm Compute Library expects constant tensors
+   * in OHWI format. */
+  Array<runtime::NDArray> constants_;
+};
+
+/*!
+ * \brief Pre-process a module containing functions ready for ACL codegen.
+ *
+ * For now we enforce OHWI kernel layout and fold the transforms away.
+ *
+ * \param mod The module to be pre-processed.
+ * \return The processed module.
+ */
+IRModule PreProcessModule(const IRModule& mod);
+
+/*!
+ * \brief Create a runtime module for ACL.
+ *
+ * This consists of a series of "serialized functions" which each represent a
+ * sub-graph to be computed by ACL and will each be executed independently from
+ * one another. Each function consists of serialized JSON describing the sub-graph
+ * and serialized constant tensors.
+ *
+ * \note The ACL runtime module only currently supports a single operator per
+ * sub-graph currently.
+ *
+ * \param ref The ext_func Relay expression/module to be executed using extern ops.
+ * \return A runtime module.
+ */
+runtime::Module ACLCompiler(const ObjectRef& ref);
+
+/*!
+ * \brief Get the external symbol of the Relay function name.
+ *
+ * \param func The provided function.
+ *
+ * \return An external symbol.
+ */
+std::string GetExtSymbol(const Function& func) {
+  const auto name_node = func->GetAttr<String>(tvm::attr::kGlobalSymbol);
+  CHECK(name_node.defined()) << "Fail to retrieve external symbol.";
+  return std::string(name_node.value());
+}
+
+TVM_REGISTER_GLOBAL("relay.ext.arm_compute_lib").set_body_typed(ACLCompiler);
+
+/*!
+ * \brief Check whether ACL graph runtime is used.
+ * \return True if ACL graph runtime is enabled, False if not.
+ */
+inline constexpr bool IsACLRuntimeEnabled() {
+#if TVM_GRAPH_RUNTIME_ACL

Review comment:
       should it be `TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB` as this is what you added in the cmake




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org